repost/plugins/scrape.rb

64 lines
1.8 KiB
Ruby

require 'digest/md5'
require 'data_mapper'
require 'nokogiri'
include EventMachine::IRC::Commands
DataMapper.setup(:default, Config[:scrape][:db])
module Scrape
def self.download(uri)
ext = "." + uri.sub(/.*[.]([^.]*)/, '\1') if uri.match(/[^.]+[.][^.]+/)
content = Net::HTTP.get(uri)
md5 = Digest::MD5.hexdigest(content)
File.open(Config[:scrape][:basepath] + md5 + ext, 'w') do |file|
file.write(content)
end
return md5 + ext
end
class Link
include DataMapper::Resource
property :id, Serial
property :url, String
property :original_url, String
property :created_at, DateTime
property :nick, String
end
end
DataMapper.auto_upgrade!
Client.register_trigger("PRIVMSG") do |msg|
titles = []
return 0 if msg[:params][1].nil?
msg[:params][1].split.each do |word|
return 0 if word =~ /notitle/
if word =~ /^https?:\/\// then
original_url = word
if word =~ /4cdn[.]org/ then
url = Scrape::download(word)
else
url = Digest::MD5.hexdigest(word)
end
title = Nokogiri::HTML(Net::HTTP.get(word)).title()
title = title.gsub(/[\s]+/, " ").strip if not title.nil?
Scrape::Link.create(
:url => url,
:original_url => original_url,
:created_at => Time.now,
:nick => msg[:prefix].split('!').first
)
titles << title if title.size > 0 and title !~ /Let me google that/
end
end
Client.privmsg(msg[:params][0], Config[:scrape][:title_prefix] + titles.join(' ')) if not titles.count == 0
end