43 lines
1.1 KiB
Python
43 lines
1.1 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
import requests
|
||
|
import bs4
|
||
|
|
||
|
def rape(_callback, url):
|
||
|
r = requests.get(url)
|
||
|
r.encoding = "utf-8"
|
||
|
soup = bs4.BeautifulSoup(r.text)
|
||
|
gallery = soup.findAll("div", id="gallery")[0]
|
||
|
notes = gallery.findAll("ul")
|
||
|
for note in notes:
|
||
|
inner = note.find("li")
|
||
|
title = inner.find("b").text
|
||
|
funny = inner.find("i").text
|
||
|
_callback(title, funny, [c["href"] for c in inner.findAll("a")])
|
||
|
|
||
|
import os
|
||
|
import re
|
||
|
|
||
|
def callback(date, funny, pages):
|
||
|
print date
|
||
|
print funny
|
||
|
safe = re.sub("[^a-z0-9A-Z]", "", date)
|
||
|
try:
|
||
|
os.mkdir(safe)
|
||
|
except OSError:
|
||
|
pass
|
||
|
for i, page in enumerate(pages):
|
||
|
os.system("curl %s > %s/%02i.jpg" % (page, safe, i))
|
||
|
print page
|
||
|
|
||
|
def main():
|
||
|
subject = raw_input("Subject: ")
|
||
|
webpage = raw_input("Page name: ")
|
||
|
try:
|
||
|
os.mkdir(subject)
|
||
|
except OSError:
|
||
|
pass
|
||
|
os.chdir(subject)
|
||
|
rape(callback, "http://student.if.pw.edu.pl/~patmad/" + webpage)
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|