nooz/noterapist.py~

38 lines
851 B
Python

import requests
import bs4
def rape(_callback, url):
r = requests.get(url)
r.encoding = "utf-8"
soup = bs4.BeautifulSoup(r.text)
gallery = soup.findAll("div", id="gallery")[0]
notes = gallery.findAll("ul")
for note in notes:
inner = note.find("li")
title = inner.find("b").text
funny = inner.find("i").text
_callback(title, funny, [c["href"] for c in inner.findAll("a")])
import os
import re
def callback(date, funny, pages):
print date
print funny
safe = re.sub("[^a-z0-9A-Z]", "", date)
try:
os.mkdir(safe)
except OSError:
pass
for i, page in enumerate(pages):
os.system("curl %s > %s/%02i.jpg" % (page, safe, i))
print page
rape(callback, "http://student.if.pw.edu.pl/~patmad/Mech.html")