diff --git a/.gitignore b/.gitignore index 11b08e2..7266cb1 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ config.rb *.swp *.swo *~ +Gemfile.lock diff --git a/Gemfile b/Gemfile index af38b76..9fc75e3 100644 --- a/Gemfile +++ b/Gemfile @@ -6,3 +6,4 @@ gem "logger" gem "nokogiri" gem "datamapper" gem "dm-postgres-adapter" +gem "dm-sqlite-adapter" diff --git a/TODO b/TODO new file mode 100644 index 0000000..76c37a6 --- /dev/null +++ b/TODO @@ -0,0 +1,8 @@ + * Ratelimit outgoing messages + * Prettier config class, not just a Hash + * .split on [:params][1] in scrape, to make it work with multiple uris on one line + * somewhat done - prints title only for the first uri, to avoid flooding the channel + * Per channel/network configuration + * Multiple network support + * content downloading in scrape in a separate thread to not make the whole client timeout on downloading a big/slow url + * switch from open-uri and net/www to CURB (https://github.com/taf2/curb) diff --git a/plugins/commands/cycki.rb b/plugins/commands/cycki.rb index e173365..afb07a0 100644 --- a/plugins/commands/cycki.rb +++ b/plugins/commands/cycki.rb @@ -2,5 +2,5 @@ require 'net/http' require 'nokogiri' Client.register_command("cycki") { |args| - Config[:commands][:cycki][:message] + Nokogiri::HTML(Net::HTTP.get(URI(Config[:commands][:cycki][:url]))).xpath('//img/@src').inner_text + Config[:commands][:cycki][:message] + Nokogiri::HTML(Net::HTTP.get(URI(Config[:commands][:cycki][:url]))).xpath('//img/@src').inner_text.gsub("_preview", "") } diff --git a/plugins/scrape.rb b/plugins/scrape.rb index 0f9e82a..d30e0fc 100644 --- a/plugins/scrape.rb +++ b/plugins/scrape.rb @@ -1,17 +1,60 @@ require 'open-uri' require 'digest/md5' +require 'data_mapper' +require 'nokogiri' include EventMachine::IRC::Commands +DataMapper.setup(:default, Config[:scrape][:db]) + +module Scrape + def self.download(uri) + ext = "." + uri.sub(/.*[.]([^.]*)/, '\1') if uri.match(/[^.]+[.][^.]+/) + content = open(uri).read + md5 = Digest::MD5.hexdigest(content) + File.open(Config[:scrape][:basepath] + md5 + ext, 'w') do |file| + file.write(content) + end + + return md5 + ext + end + + class Link + include DataMapper::Resource + + property :id, Serial + property :url, String + property :original_url, String + property :created_at, DateTime + property :nick, String + end +end + +DataMapper.auto_upgrade! + Client.register_trigger("PRIVMSG") do |msg| - msg[:params].each do |word| - if word =~ /4cdn[.]org/ then - ext = "." + word.sub(/.*[.]([^.]*)/, '\1') if word.match(/[^.]+[.][^.]+/) - content = open(word).read - md5 = Digest::MD5.hexdigest(content) - File.open(Config[:scrape][:basepath] + md5 + ext, 'w') do |file| - file.write(content) + return 0 if msg[:params][1].nil? + msg[:params][1].split.each do |word| + return 0 if word =~ /notitle/ + if word =~ /^https?:\/\// then + original_url = word + if word =~ /4cdn[.]org/ then + url = Scrape::download(word) + else + url = Digest::MD5.hexdigest(word) end + + title = Nokogiri::HTML(open(word).read(10240)).title().split(/\n/)[0] + + Scrape::Link.create( + :url => url, + :original_url => original_url, + :created_at => Time.now, + :nick => msg[:prefix].split('!').first + ) + + Client.privmsg(msg[:params][0], Config[:scrape][:title_prefix] + title) if not title.nil? + return 0 end end end