Skip to content

Instantly share code, notes, and snippets.

@KevinPayravi
Created December 12, 2024 08:41
Show Gist options
  • Save KevinPayravi/fb41beef9176432ff883bf1465f8c569 to your computer and use it in GitHub Desktop.
Save KevinPayravi/fb41beef9176432ff883bf1465f8c569 to your computer and use it in GitHub Desktop.
import time
import logging
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger()
urls = [
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35359",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34815",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34816",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35145",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35143",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35139",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35152",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35358",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35280",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35136",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35002",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35001",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35141",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35387",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35511",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35370",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34211",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34402",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34445",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/33902",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34464",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34160",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34411",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34340",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34392",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/33981",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34278",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/33947",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34323",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34320",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35153",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35133",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34187",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34389",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34024",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/33992",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34141",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34368",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34013",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34451",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34277",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34076",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34468",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34516",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34495",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35315",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35441",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35091",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35093",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35560",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35501",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35643",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34814",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34419",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/33822",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34379",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34287",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34016",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34467",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/33970",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34388",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34456",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34378",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34303",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34817",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34466",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34565",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/33892",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35078",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34819",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34327",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34194",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34317",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34422",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34390",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34315",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34202",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34407",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34294",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34497",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34450",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34434",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34432",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34263",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35270",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34138",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34100",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34469",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/33934",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34236",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34354",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34252",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/33928",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34427",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34491",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34439",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35313",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34463",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34000",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34510",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34169",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35148",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35379",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34873",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/34870",
"https://aaas.confex.com/aaas/2025/meetingapp.cgi/Session/35121",
]
results = []
# Set up the Selenium WebDriver w/ Chrome
options = Options()
options.headless = True
driver = webdriver.Chrome(options=options)
for url in urls:
try:
logger.info(f"Processing URL: {url}")
driver.get(url)
# Wait 10 sec for the page to load fully
time.sleep(10)
html_content = driver.page_source
logger.debug(f"HTML content for {url}: {html_content[:500]}...")
soup = BeautifulSoup(html_content, "html.parser")
session_title = (
soup.select_one("p.favoriteItem").text.strip()
if soup.select_one("p.favoriteItem")
else "N/A"
)
session_date = (
soup.select_one("p.SlotDate .defaultTZ").text.strip()
if soup.select_one("p.SlotDate .defaultTZ")
else "N/A"
)
session_time = (
soup.select_one("p.SlotTime .defaultTZ").text.strip()
if soup.select_one("p.SlotTime .defaultTZ")
else "N/A"
)
people = soup.select(".PersonList a")
if not people:
logger.warning(f"No people found on session page: {url}")
continue
for person in people:
person_name = person.text.strip()
person_href = person.get("href", "")
role_affiliation = person.find_next("span", class_="roleAffiliation")
person_title = role_affiliation.find("ul") if role_affiliation else None
person_title = person_title.text.strip() if person_title else "N/A"
results.append(
{
"person_name": person_name,
"person_title": person_title,
"person_href": person_href,
"session_title": session_title,
"session_date": session_date,
"session_time": session_time,
}
)
logger.info(f"Successfully processed session at {url}")
except Exception as e:
logger.error(f"Error occurred while processing {url}: {e}")
df = pd.DataFrame(results)
df.to_excel("session_people_with_titles.xlsx", index=False)
logger.info("Data has been written to session_people_with_titles.xlsx")
driver.quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment