nooz/noterapist.py

# -*- coding: utf-8 -*-
import requests
import bs4

def rape(_callback, url):
    r = requests.get(url)
    r.encoding = "utf-8"
    soup = bs4.BeautifulSoup(r.text)
    gallery = soup.findAll("div", id="gallery")[0]
    notes =  gallery.findAll("ul")
    for note in notes:
        inner = note.find("li")
        title = inner.find("b").text
        funny = inner.find("i").text
        _callback(title, funny, [c["href"] for c in inner.findAll("a")])

import os
import re

def callback(date, funny, pages):
    print date
    print funny
    safe = re.sub("[^a-z0-9A-Z]", "", date)
    try:
        os.mkdir(safe)
    except OSError:
        pass
    for i, page in enumerate(pages):
        os.system("curl %s > %s/%02i.jpg" % (page, safe, i))
        print page

def main():
    subject = raw_input("Subject: ")
    webpage = raw_input("Page name: ")
    try:
        os.mkdir(subject)
    except OSError:
        pass
    os.chdir(subject)
    rape(callback, "http://student.if.pw.edu.pl/~patmad/" + webpage)

if __name__ == "__main__":
    main()
More notes and some helpful scripts 2013-01-19 11:05:28 +00:00			`# -- coding: utf-8 --`
			`import requests`
			`import bs4`

			`def rape(_callback, url):`
			`r = requests.get(url)`
			`r.encoding = "utf-8"`
			`soup = bs4.BeautifulSoup(r.text)`
			`gallery = soup.findAll("div", id="gallery")[0]`
			`notes = gallery.findAll("ul")`
			`for note in notes:`
			`inner = note.find("li")`
			`title = inner.find("b").text`
			`funny = inner.find("i").text`
			`_callback(title, funny, [c["href"] for c in inner.findAll("a")])`

			`import os`
			`import re`

			`def callback(date, funny, pages):`
			`print date`
			`print funny`
			`safe = re.sub("[^a-z0-9A-Z]", "", date)`
			`try:`
			`os.mkdir(safe)`
			`except OSError:`
			`pass`
			`for i, page in enumerate(pages):`
			`os.system("curl %s > %s/%02i.jpg" % (page, safe, i))`
			`print page`

			`def main():`
			`subject = raw_input("Subject: ")`
			`webpage = raw_input("Page name: ")`
			`try:`
			`os.mkdir(subject)`
			`except OSError:`
			`pass`
			`os.chdir(subject)`
			`rape(callback, "http://student.if.pw.edu.pl/~patmad/" + webpage)`

			`if __name__ == "__main__":`
			`main()`