A document from MCS 275 Spring 2022, instructor Emily Dumas. You can also get the notebook file.

Scrape the table of upcoming UIC math grad courses

In [1]:
from bs4 import BeautifulSoup
from urllib.request import urlopen
import csv
import os
In [2]:
PAGEURL = "https://mscs.uic.edu/graduate/current-students/advising-and-registration/graduate-courses/"
CACHEFILE = "toscrape.html"
In [3]:
def get_page_soup():
    if os.path.exists(CACHEFILE):
        with open(CACHEFILE,"rb") as fp:
            return BeautifulSoup(fp,"html.parser")
    else:
        print("No cached copy; retrieving '{}'".format(PAGEURL))
        with urlopen(PAGEURL) as res:
            data = res.read()
            with open(CACHEFILE,"wb") as fp:
                fp.write(data)
            return BeautifulSoup(s,"html.parser")
In [4]:
soup = get_page_soup()
In [5]:
# First let's find the table tag we want, for Fall 2022

for section in soup.find_all("section"):
    h2 = section.find("h2",id="fall-2022-graduate-courses")
    if h2:
        # This is the right section, so let's get its first table
        table = section.find("table")
        # No need to look further, so let's break out of the loop
        break
In [6]:
nrows = 0
with open("fall2022courses.csv","w",newline="",encoding="UTF-8") as fp:
    writer = csv.writer(fp)
    for row in table.find_all("tr"):
        row_cells = [ cell.text.replace("\n"," ") for cell in row.find_all( ["th", "td"] ) ]
        # list like ["Course","Description",...]
        writer.writerow(row_cells)
        nrows += 1
print("Done, processed {} rows".format(nrows))
Done, processed 19 rows