summaryrefslogtreecommitdiffstats
path: root/plugins/scrape.rb
blob: 715cf89c42c15101f2e21e91d31d43b2d7a10c1b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
require 'digest/md5'
require 'data_mapper'
require 'nokogiri'

include EventMachine::IRC::Commands

DataMapper.setup(:default, Config[:scrape][:db])

module Scrape
    def self.download(uri)
        ext = "." + uri.sub(/.*[.]([^.]*)/, '\1') if uri.match(/[^.]+[.][^.]+/)
        content = Net::HTTP.get(uri)
        md5 = Digest::MD5.hexdigest(content)
        File.open(Config[:scrape][:basepath] + md5 + ext, 'w') do |file|
            file.write(content)
        end

        return md5 + ext
    end

    class Link
        include DataMapper::Resource

        property :id,           Serial
        property :url,          String
        property :original_url, String
        property :created_at,   DateTime
        property :nick,         String
    end
end

DataMapper.auto_upgrade!

Client.register_trigger("PRIVMSG") do |msg|
    titles = []
    return 0 if msg[:params][1].nil?
    msg[:params][1].split.each do |word|
        return 0 if word =~ /notitle/
        if word =~ /^https?:\/\// then
            original_url = word
            if word =~ /4cdn[.]org/ then
                url = Scrape::download(word)
            else
                url = Digest::MD5.hexdigest(word)
            end

            title = Nokogiri::HTML(Net::HTTP.get(word)).title()
            title = title.gsub(/[\s]+/, " ").strip if not title.nil?

            Scrape::Link.create(
                :url            => url,
                :original_url   => original_url,
                :created_at     => Time.now,
                :nick           => msg[:prefix].split('!').first
            )

            titles << title if title.size > 0 and title !~ /Let me google that/

        end
    end

    Client.privmsg(msg[:params][0], Config[:scrape][:title_prefix] + titles.join(' ')) if not titles.count == 0
end