Created
December 12, 2024 08:41
-
-
Save KevinPayravi/fb41beef9176432ff883bf1465f8c569 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import logging | |
import pandas as pd | |
from selenium import webdriver | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.common.keys import Keys | |
from selenium.webdriver.chrome.options import Options | |
from bs4 import BeautifulSoup | |
logging.basicConfig( | |
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" | |
) | |
logger = logging.getLogger() | |
urls = [ | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35359", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34815", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34816", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35145", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35143", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35139", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35152", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35358", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35280", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35136", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35002", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35001", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35141", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35387", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35511", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35370", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34211", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34402", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34445", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/33902", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34464", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34160", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34411", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34340", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34392", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/33981", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34278", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/33947", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34323", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34320", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35153", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35133", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34187", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34389", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34024", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/33992", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34141", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34368", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34013", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34451", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34277", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34076", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34468", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34516", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34495", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35315", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35441", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35091", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35093", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35560", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35501", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35643", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34814", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34419", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/33822", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34379", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34287", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34016", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34467", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/33970", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34388", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34456", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34378", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34303", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34817", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34466", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34565", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/33892", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35078", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34819", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34327", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34194", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34317", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34422", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34390", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34315", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34202", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34407", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34294", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34497", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34450", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34434", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34432", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34263", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35270", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34138", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34100", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34469", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/33934", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34236", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34354", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34252", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/33928", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34427", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34491", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34439", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35313", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34463", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34000", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34510", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34169", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35148", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35379", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34873", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34870", | |
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35121", | |
] | |
results = [] | |
# Set up the Selenium WebDriver w/ Chrome | |
options = Options() | |
options.headless = True | |
driver = webdriver.Chrome(options=options) | |
for url in urls: | |
try: | |
logger.info(f"Processing URL: {url}") | |
driver.get(url) | |
# Wait 10 sec for the page to load fully | |
time.sleep(10) | |
html_content = driver.page_source | |
logger.debug(f"HTML content for {url}: {html_content[:500]}...") | |
soup = BeautifulSoup(html_content, "html.parser") | |
session_title = ( | |
soup.select_one("p.favoriteItem").text.strip() | |
if soup.select_one("p.favoriteItem") | |
else "N/A" | |
) | |
session_date = ( | |
soup.select_one("p.SlotDate .defaultTZ").text.strip() | |
if soup.select_one("p.SlotDate .defaultTZ") | |
else "N/A" | |
) | |
session_time = ( | |
soup.select_one("p.SlotTime .defaultTZ").text.strip() | |
if soup.select_one("p.SlotTime .defaultTZ") | |
else "N/A" | |
) | |
people = soup.select(".PersonList a") | |
if not people: | |
logger.warning(f"No people found on session page: {url}") | |
continue | |
for person in people: | |
person_name = person.text.strip() | |
person_href = person.get("href", "") | |
role_affiliation = person.find_next("span", class_="roleAffiliation") | |
person_title = role_affiliation.find("ul") if role_affiliation else None | |
person_title = person_title.text.strip() if person_title else "N/A" | |
results.append( | |
{ | |
"person_name": person_name, | |
"person_title": person_title, | |
"person_href": person_href, | |
"session_title": session_title, | |
"session_date": session_date, | |
"session_time": session_time, | |
} | |
) | |
logger.info(f"Successfully processed session at {url}") | |
except Exception as e: | |
logger.error(f"Error occurred while processing {url}: {e}") | |
df = pd.DataFrame(results) | |
df.to_excel("session_people_with_titles.xlsx", index=False) | |
logger.info("Data has been written to session_people_with_titles.xlsx") | |
driver.quit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment