repost/plugins/scrape.rb

64 lines
1.7 KiB
Ruby
Raw Normal View History

2014-03-09 18:40:32 +00:00
require 'open-uri'
require 'digest/md5'
2014-03-10 08:01:37 +00:00
require 'data_mapper'
2014-03-13 13:45:00 +00:00
require 'nokogiri'
2014-03-09 18:40:32 +00:00
2014-03-09 18:00:52 +00:00
include EventMachine::IRC::Commands
2014-03-10 07:40:39 +00:00
DataMapper.setup(:default, Config[:scrape][:db])
module Scrape
2014-03-10 08:11:56 +00:00
def self.download(uri)
2014-03-10 08:12:56 +00:00
ext = "." + uri.sub(/.*[.]([^.]*)/, '\1') if uri.match(/[^.]+[.][^.]+/)
content = open(uri).read
2014-03-10 07:40:39 +00:00
md5 = Digest::MD5.hexdigest(content)
File.open(Config[:scrape][:basepath] + md5 + ext, 'w') do |file|
file.write(content)
end
return md5 + ext
end
class Link
include DataMapper::Resource
property :id, Serial
property :url, String
property :original_url, String
property :created_at, DateTime
property :nick, String
end
end
DataMapper.auto_upgrade!
2014-03-09 18:59:42 +00:00
Client.register_trigger("PRIVMSG") do |msg|
2014-03-25 09:13:25 +00:00
titles = []
2014-03-13 15:37:43 +00:00
return 0 if msg[:params][1].nil?
msg[:params][1].split.each do |word|
2014-03-13 16:29:24 +00:00
return 0 if word =~ /notitle/
2014-03-10 07:40:39 +00:00
if word =~ /^https?:\/\// then
original_url = word
if word =~ /4cdn[.]org/ then
2014-03-10 08:06:17 +00:00
url = Scrape::download(word)
2014-03-10 07:40:39 +00:00
else
url = Digest::MD5.hexdigest(word)
2014-03-09 18:59:42 +00:00
end
2014-03-10 07:40:39 +00:00
2014-03-25 09:13:25 +00:00
title = Nokogiri::HTML(open(word).read(10240)).title().delete!('\n')
2014-03-13 13:45:00 +00:00
2014-03-10 07:40:39 +00:00
Scrape::Link.create(
:url => url,
:original_url => original_url,
:created_at => Time.now,
:nick => msg[:prefix].split('!').first
)
2014-03-13 13:45:00 +00:00
2014-03-25 09:13:25 +00:00
titles << title
2014-03-09 18:40:32 +00:00
end
2014-03-09 18:59:42 +00:00
end
2014-03-25 09:13:25 +00:00
Client.privmsg(msg[:params][0], Config[:scrape][:title_prefix] + titles.join(' ')) if not titles.count < 1
2014-03-09 18:59:42 +00:00
end