# -*- coding: utf-8 -*- import requests import bs4 def rape(_callback, url): r = requests.get(url) r.encoding = "utf-8" soup = bs4.BeautifulSoup(r.text) gallery = soup.findAll("div", id="gallery")[0] notes = gallery.findAll("ul") for note in notes: inner = note.find("li") title = inner.find("b").text funny = inner.find("i").text _callback(title, funny, [c["href"] for c in inner.findAll("a")]) import os import re def callback(date, funny, pages): print date print funny safe = re.sub("[^a-z0-9A-Z]", "", date) try: os.mkdir(safe) except OSError: pass for i, page in enumerate(pages): os.system("curl %s > %s/%02i.jpg" % (page, safe, i)) print page def main(): subject = raw_input("Subject: ") webpage = raw_input("Page name: ") try: os.mkdir(subject) except OSError: pass os.chdir(subject) rape(callback, "http://student.if.pw.edu.pl/~patmad/" + webpage) if __name__ == "__main__": main()