Scraping web data

Here’s the code we ended up with in class:

from bs4 import BeautifulSoup
import requests

assignments_url = "http://cs.brown.edu/courses/csci0112/fall-2020/assignments.html"
assignments_page = BeautifulSoup(requests.get(assignments_url), features='html.parser')


def scrape_homeworks(page: BeautifulSoup) -> dict:
    homework_rows = page.find_all('table')[0].find_all('tbody')[0].find_all('tr')
    homework_assignments = {row.find_all('td')[1].text: row.find_all('td')[3].text 
                            for row in homework_rows}
    return homework_assignments

## staff names

staff_url = "http://cs.brown.edu/courses/csci0112/fall-2020/staff.html"
staff_page = BeautifulSoup(requests.get(assignments_url), features='html.parser')

def scrape_staff_names(page: BeautifulSoup) -> list:
    names = [strong.find('span').strip() for strong in page.find_all('strong')]
    return names