Scraping web data
Here’s the code we ended up with in class:
from bs4 import BeautifulSoup import requests assignments_url = "http://cs.brown.edu/courses/csci0112/fall-2020/assignments.html" assignments_page = BeautifulSoup(requests.get(assignments_url), features='html.parser') def scrape_homeworks(page: BeautifulSoup) -> dict: homework_rows = page.find_all('table')[0].find_all('tbody')[0].find_all('tr') homework_assignments = {row.find_all('td')[1].text: row.find_all('td')[3].text for row in homework_rows} return homework_assignments ## staff names staff_url = "http://cs.brown.edu/courses/csci0112/fall-2020/staff.html" staff_page = BeautifulSoup(requests.get(assignments_url), features='html.parser') def scrape_staff_names(page: BeautifulSoup) -> list: names = [strong.find('span').strip() for strong in page.find_all('strong')] return names