from bs4 import BeautifulSoup
from urllib.request import urlopen
import csv
import os
PAGEURL = "https://mscs.uic.edu/graduate/current-students/advising-and-registration/graduate-courses/"
CACHEFILE = "toscrape.html"
def get_page_soup():
if os.path.exists(CACHEFILE):
with open(CACHEFILE,"rb") as fp:
return BeautifulSoup(fp,"html.parser")
else:
print("No cached copy; retrieving '{}'".format(PAGEURL))
with urlopen(PAGEURL) as res:
data = res.read()
with open(CACHEFILE,"wb") as fp:
fp.write(data)
return BeautifulSoup(s,"html.parser")
soup = get_page_soup()
# First let's find the table tag we want, for Fall 2022
for section in soup.find_all("section"):
h2 = section.find("h2",id="fall-2022-graduate-courses")
if h2:
# This is the right section, so let's get its first table
table = section.find("table")
# No need to look further, so let's break out of the loop
break
nrows = 0
with open("fall2022courses.csv","w",newline="",encoding="UTF-8") as fp:
writer = csv.writer(fp)
for row in table.find_all("tr"):
row_cells = [ cell.text.replace("\n"," ") for cell in row.find_all( ["th", "td"] ) ]
# list like ["Course","Description",...]
writer.writerow(row_cells)
nrows += 1
print("Done, processed {} rows".format(nrows))