This version computes everything before returning a list.
def cubes(n):
"""Return the first n cubes of natural numbers"""
L = []
for x in range(1,n+1):
L.append(x**3)
return L
This version returns immediately and gives a generator, which produces the items as needed for iteration.
def cubesgen(n):
"""Return the first n cubes of natural numbers, lazily"""
for x in range(1,n+1):
yield x**3
cubes(5)
cubesgen(5)
for y in cubes(5):
print("Here is a cube:",y)
Generators are most often used as iterables directly in for loops:
for y in cubesgen(5): # MOST COMMON
print("Here is a cube:",y)
But you can also save the return value and request items manually:
g = cubesgen(5) # MUCH LESS COMMON
next(g) # now we can single-step the generator object until exhausted
next(g)
next(g)
next(g)
next(g)
next(g)
def allcubes():
"""Return the cubes of the natural numbers, lazily"""
x = 1
while True:
yield x**3
x += 1
# Print all the cubes with at most 4 digits
for y in allcubes():
if len(str(y))>4:
break # Essential to have a break, as this iterable is infinite!
print(y)
from bs4 import BeautifulSoup
import os
def links_in_html_doc(fn):
"""Return the destinations of http(s) links in an HTML file"""
with open(fn) as infile:
soup = BeautifulSoup(infile)
for atag in soup.find_all("a"):
url = atag["href"]
if str(url).startswith("http"):
yield url
def links_in_html_dir(dirname):
"""Return the destinations of http(s) links in all HTML files
in the directory specified by `dirname`"""
for fn in os.listdir(dirname):
if not fn.endswith(".html"):
continue
yield from links_in_html_doc(os.path.join(dirname,fn))
# Just print all the links in all the HTML files in one directory
# You'll need to set this to a directory containing some HTML files
# In the course sample code repo, this relative path contains the
# HTML slide presentations for several of our lectures.
HTMLDIR = "web/html-for-scraping"
for link in links_in_html_dir(HTMLDIR):
print(link)
# Make a histogram showing most common link destinations
# (assume HTMLDIR is set, as in previous cell)
from collections import defaultdict
hist = defaultdict(int)
for link in links_in_html_dir(HTMLDIR):
hist[link] += 1
for link,count in sorted(hist.items(),key=lambda pair:-pair[1]):
print("Appears",count,"times:",link)
[ x**3 for x in range(5) ]
( x**3 for x in range(5) )
sum( [ x**3 for x in range(10_000_000) ] ) # uses lots of memory
sum( x**3 for x in range(10_000_000) ) # uses very little of memory
Generator comprehensions are especially nice when combined with aggregating functions like any
or all
which may terminate early. For example, any( GENERATOR_COMPREHENSION )
will evaluate to True
as soon as the generator yields its first truthy value; subsequent values are not computed. In contrast, any( [ LIST_COMPREHENSION ])
will always generate the entire list before searching for the first truthy value.