import requests import bs4 def rape(_callback, url): r = requests.get(url) r.encoding = "utf-8" soup = bs4.BeautifulSoup(r.text) gallery = soup.findAll("div", id="gallery")[0] notes = gallery.findAll("ul") for note in notes: inner = note.find("li") title = inner.find("b").text funny = inner.find("i").text _callback(title, funny, [c["href"] for c in inner.findAll("a")]) import os import re def callback(date, funny, pages): print date print funny safe = re.sub("[^a-z0-9A-Z]", "", date) try: os.mkdir(safe) except OSError: pass for i, page in enumerate(pages): os.system("curl %s > %s/%02i.jpg" % (page, safe, i)) print page rape(callback, "http://student.if.pw.edu.pl/~patmad/Mech.html")