From b476ced1a332f4263680bb15fb6eabb64e5238bc Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Sun, 9 Mar 2014 22:23:45 +0100 Subject: [PATCH 01/23] Ignore Gemfile.lock --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 11b08e2..7266cb1 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ config.rb *.swp *.swo *~ +Gemfile.lock From abff24f33fc86b79249c5d5207ae51a54d384469 Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Mon, 10 Mar 2014 08:40:39 +0100 Subject: [PATCH 02/23] Possibly, it'll work now. --- plugins/scrape.rb | 46 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/plugins/scrape.rb b/plugins/scrape.rb index 0f9e82a..f2e7da8 100644 --- a/plugins/scrape.rb +++ b/plugins/scrape.rb @@ -3,15 +3,49 @@ require 'digest/md5' include EventMachine::IRC::Commands +DataMapper.setup(:default, Config[:scrape][:db]) + +module Scrape + def download(uri) + ext = "." + word.sub(/.*[.]([^.]*)/, '\1') if word.match(/[^.]+[.][^.]+/) + content = open(word).read + md5 = Digest::MD5.hexdigest(content) + File.open(Config[:scrape][:basepath] + md5 + ext, 'w') do |file| + file.write(content) + end + + return md5 + ext + end + + class Link + include DataMapper::Resource + + property :id, Serial + property :url, String + property :original_url, String + property :created_at, DateTime + property :nick, String + end +end + +DataMapper.auto_upgrade! + Client.register_trigger("PRIVMSG") do |msg| msg[:params].each do |word| - if word =~ /4cdn[.]org/ then - ext = "." + word.sub(/.*[.]([^.]*)/, '\1') if word.match(/[^.]+[.][^.]+/) - content = open(word).read - md5 = Digest::MD5.hexdigest(content) - File.open(Config[:scrape][:basepath] + md5 + ext, 'w') do |file| - file.write(content) + if word =~ /^https?:\/\// then + original_url = word + if word =~ /4cdn[.]org/ then + url = download(word) + else + url = Digest::MD5.hexdigest(word) end + + Scrape::Link.create( + :url => url, + :original_url => original_url, + :created_at => Time.now, + :nick => msg[:prefix].split('!').first + ) end end end From 78fd894291fcc7b179621891cab1d41bfabd6a0e Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Mon, 10 Mar 2014 09:00:51 +0100 Subject: [PATCH 03/23] whoops, forgot about Gemfile and require line --- Gemfile | 1 + plugins/scrape.rb | 1 + 2 files changed, 2 insertions(+) diff --git a/Gemfile b/Gemfile index af38b76..9fc75e3 100644 --- a/Gemfile +++ b/Gemfile @@ -6,3 +6,4 @@ gem "logger" gem "nokogiri" gem "datamapper" gem "dm-postgres-adapter" +gem "dm-sqlite-adapter" diff --git a/plugins/scrape.rb b/plugins/scrape.rb index f2e7da8..0eb407d 100644 --- a/plugins/scrape.rb +++ b/plugins/scrape.rb @@ -1,5 +1,6 @@ require 'open-uri' require 'digest/md5' +require 'datamapper' include EventMachine::IRC::Commands From 7e8c46c947bdedd91be80ec40d961a084cb69956 Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Mon, 10 Mar 2014 09:01:37 +0100 Subject: [PATCH 04/23] typo --- plugins/scrape.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/scrape.rb b/plugins/scrape.rb index 0eb407d..1850492 100644 --- a/plugins/scrape.rb +++ b/plugins/scrape.rb @@ -1,6 +1,6 @@ require 'open-uri' require 'digest/md5' -require 'datamapper' +require 'data_mapper' include EventMachine::IRC::Commands From d9c1b8c9c66f8ccb4010e605a8e7ea74b872bfd6 Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Mon, 10 Mar 2014 09:06:17 +0100 Subject: [PATCH 05/23] brainfart. --- plugins/scrape.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/scrape.rb b/plugins/scrape.rb index 1850492..0479e9c 100644 --- a/plugins/scrape.rb +++ b/plugins/scrape.rb @@ -36,7 +36,7 @@ Client.register_trigger("PRIVMSG") do |msg| if word =~ /^https?:\/\// then original_url = word if word =~ /4cdn[.]org/ then - url = download(word) + url = Scrape::download(word) else url = Digest::MD5.hexdigest(word) end From 180b5332de8bde762829471cb3d0ff23274db498 Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Mon, 10 Mar 2014 09:11:56 +0100 Subject: [PATCH 06/23] brainfart #2 --- plugins/scrape.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/scrape.rb b/plugins/scrape.rb index 0479e9c..9fd8d8c 100644 --- a/plugins/scrape.rb +++ b/plugins/scrape.rb @@ -7,7 +7,7 @@ include EventMachine::IRC::Commands DataMapper.setup(:default, Config[:scrape][:db]) module Scrape - def download(uri) + def self.download(uri) ext = "." + word.sub(/.*[.]([^.]*)/, '\1') if word.match(/[^.]+[.][^.]+/) content = open(word).read md5 = Digest::MD5.hexdigest(content) From cc354191307da34e35cafa7279dd276bdc1fe427 Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Mon, 10 Mar 2014 09:12:56 +0100 Subject: [PATCH 07/23] brainfart #3 --- plugins/scrape.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/scrape.rb b/plugins/scrape.rb index 9fd8d8c..416df5c 100644 --- a/plugins/scrape.rb +++ b/plugins/scrape.rb @@ -8,8 +8,8 @@ DataMapper.setup(:default, Config[:scrape][:db]) module Scrape def self.download(uri) - ext = "." + word.sub(/.*[.]([^.]*)/, '\1') if word.match(/[^.]+[.][^.]+/) - content = open(word).read + ext = "." + uri.sub(/.*[.]([^.]*)/, '\1') if uri.match(/[^.]+[.][^.]+/) + content = open(uri).read md5 = Digest::MD5.hexdigest(content) File.open(Config[:scrape][:basepath] + md5 + ext, 'w') do |file| file.write(content) From d6001189b5adfd6e04c3fefeb11b5b0a2c9eb3df Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Wed, 12 Mar 2014 19:18:45 +0100 Subject: [PATCH 08/23] remove _preview from urls. --- plugins/commands/cycki.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/commands/cycki.rb b/plugins/commands/cycki.rb index e173365..08782a4 100644 --- a/plugins/commands/cycki.rb +++ b/plugins/commands/cycki.rb @@ -2,5 +2,5 @@ require 'net/http' require 'nokogiri' Client.register_command("cycki") { |args| - Config[:commands][:cycki][:message] + Nokogiri::HTML(Net::HTTP.get(URI(Config[:commands][:cycki][:url]))).xpath('//img/@src').inner_text + Config[:commands][:cycki][:message] + Nokogiri::HTML(Net::HTTP.get(URI(Config[:commands][:cycki][:url]))).xpath('//img/@src').inner_text.slice! "_preview" } From 0dd44bafb2319cd325ace5515a7a051333b05d87 Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Wed, 12 Mar 2014 19:20:29 +0100 Subject: [PATCH 09/23] whoops --- plugins/commands/cycki.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/commands/cycki.rb b/plugins/commands/cycki.rb index 08782a4..523c51d 100644 --- a/plugins/commands/cycki.rb +++ b/plugins/commands/cycki.rb @@ -2,5 +2,5 @@ require 'net/http' require 'nokogiri' Client.register_command("cycki") { |args| - Config[:commands][:cycki][:message] + Nokogiri::HTML(Net::HTTP.get(URI(Config[:commands][:cycki][:url]))).xpath('//img/@src').inner_text.slice! "_preview" + Config[:commands][:cycki][:message] + Nokogiri::HTML(Net::HTTP.get(URI(Config[:commands][:cycki][:url]))).xpath('//img/@src').inner_text.slice!("_preview") } From 21c691d1bde87cfe6c6a5b610f49ddd27213ac15 Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Wed, 12 Mar 2014 19:21:30 +0100 Subject: [PATCH 10/23] whoops --- plugins/commands/cycki.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/commands/cycki.rb b/plugins/commands/cycki.rb index 523c51d..bdd9bdb 100644 --- a/plugins/commands/cycki.rb +++ b/plugins/commands/cycki.rb @@ -2,5 +2,5 @@ require 'net/http' require 'nokogiri' Client.register_command("cycki") { |args| - Config[:commands][:cycki][:message] + Nokogiri::HTML(Net::HTTP.get(URI(Config[:commands][:cycki][:url]))).xpath('//img/@src').inner_text.slice!("_preview") + Config[:commands][:cycki][:message] + Nokogiri::HTML(Net::HTTP.get(URI(Config[:commands][:cycki][:url]))).xpath('//img/@src').inner_text.slice("_preview") } From 2a71a3c772daa37e0b31cf4fd75bdc7a5b5c10ce Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Wed, 12 Mar 2014 19:24:10 +0100 Subject: [PATCH 11/23] whoops #3 --- plugins/commands/cycki.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/commands/cycki.rb b/plugins/commands/cycki.rb index bdd9bdb..afb07a0 100644 --- a/plugins/commands/cycki.rb +++ b/plugins/commands/cycki.rb @@ -2,5 +2,5 @@ require 'net/http' require 'nokogiri' Client.register_command("cycki") { |args| - Config[:commands][:cycki][:message] + Nokogiri::HTML(Net::HTTP.get(URI(Config[:commands][:cycki][:url]))).xpath('//img/@src').inner_text.slice("_preview") + Config[:commands][:cycki][:message] + Nokogiri::HTML(Net::HTTP.get(URI(Config[:commands][:cycki][:url]))).xpath('//img/@src').inner_text.gsub("_preview", "") } From 0d1477c80b170d9c39f4006265b6735ba20447c1 Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Thu, 13 Mar 2014 14:45:00 +0100 Subject: [PATCH 12/23] print url titles. --- plugins/scrape.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/plugins/scrape.rb b/plugins/scrape.rb index 416df5c..388d3fe 100644 --- a/plugins/scrape.rb +++ b/plugins/scrape.rb @@ -1,6 +1,7 @@ require 'open-uri' require 'digest/md5' require 'data_mapper' +require 'nokogiri' include EventMachine::IRC::Commands @@ -41,12 +42,16 @@ Client.register_trigger("PRIVMSG") do |msg| url = Digest::MD5.hexdigest(word) end + title = Nokogiri::HTML(open(word).read).title() + Scrape::Link.create( :url => url, :original_url => original_url, :created_at => Time.now, :nick => msg[:prefix].split('!').first ) + + Client.privmsg(msg[:params][0], title) if not title.nil? end end end From 0afbf7cb39685970dda2ff47804efb5d1cc7c7c8 Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Thu, 13 Mar 2014 14:52:57 +0100 Subject: [PATCH 13/23] url title prefix. --- plugins/scrape.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/scrape.rb b/plugins/scrape.rb index 388d3fe..bed7b0c 100644 --- a/plugins/scrape.rb +++ b/plugins/scrape.rb @@ -51,7 +51,7 @@ Client.register_trigger("PRIVMSG") do |msg| :nick => msg[:prefix].split('!').first ) - Client.privmsg(msg[:params][0], title) if not title.nil? + Client.privmsg(msg[:params][0], Config[:scrape][:title_prefix] + title) if not title.nil? end end end From ae151059610cf9a9c0e365bebc22b084c054349c Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Thu, 13 Mar 2014 15:03:13 +0100 Subject: [PATCH 14/23] get only the first line of title. --- plugins/scrape.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/scrape.rb b/plugins/scrape.rb index bed7b0c..fca01cb 100644 --- a/plugins/scrape.rb +++ b/plugins/scrape.rb @@ -42,7 +42,7 @@ Client.register_trigger("PRIVMSG") do |msg| url = Digest::MD5.hexdigest(word) end - title = Nokogiri::HTML(open(word).read).title() + title = Nokogiri::HTML(open(word).read).title().split(/\n/)[0] Scrape::Link.create( :url => url, From 28277cb42e84583e07ff327ca1e250d61494a5d5 Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Thu, 13 Mar 2014 15:34:19 +0100 Subject: [PATCH 15/23] limit url size. --- plugins/scrape.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/scrape.rb b/plugins/scrape.rb index fca01cb..fb6992e 100644 --- a/plugins/scrape.rb +++ b/plugins/scrape.rb @@ -42,7 +42,7 @@ Client.register_trigger("PRIVMSG") do |msg| url = Digest::MD5.hexdigest(word) end - title = Nokogiri::HTML(open(word).read).title().split(/\n/)[0] + title = Nokogiri::HTML(open(word).read(10240).title().split(/\n/)[0][ Scrape::Link.create( :url => url, From 6851590e6baafc62a0017166f5eadf4750921c4e Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Thu, 13 Mar 2014 15:35:07 +0100 Subject: [PATCH 16/23] typo. --- plugins/scrape.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/scrape.rb b/plugins/scrape.rb index fb6992e..5191bf5 100644 --- a/plugins/scrape.rb +++ b/plugins/scrape.rb @@ -42,7 +42,7 @@ Client.register_trigger("PRIVMSG") do |msg| url = Digest::MD5.hexdigest(word) end - title = Nokogiri::HTML(open(word).read(10240).title().split(/\n/)[0][ + title = Nokogiri::HTML(open(word).read(10240)).title().split(/\n/)[0][ Scrape::Link.create( :url => url, From 7a41abdcfaaa2a6e07e6474a2934d92682819be8 Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Thu, 13 Mar 2014 15:36:23 +0100 Subject: [PATCH 17/23] typo. --- plugins/scrape.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/scrape.rb b/plugins/scrape.rb index 5191bf5..2891137 100644 --- a/plugins/scrape.rb +++ b/plugins/scrape.rb @@ -42,7 +42,7 @@ Client.register_trigger("PRIVMSG") do |msg| url = Digest::MD5.hexdigest(word) end - title = Nokogiri::HTML(open(word).read(10240)).title().split(/\n/)[0][ + title = Nokogiri::HTML(open(word).read(10240)).title().split(/\n/)[0] Scrape::Link.create( :url => url, From d831d8f3ad3bbab9b1d301989483ac505661e4cd Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Thu, 13 Mar 2014 15:52:13 +0100 Subject: [PATCH 18/23] add TODO. --- TODO | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 TODO diff --git a/TODO b/TODO new file mode 100644 index 0000000..f05e2da --- /dev/null +++ b/TODO @@ -0,0 +1,6 @@ + * Ratelimit outgoing messages + * Prettier config class, not just a Hash + * .split on [:params][1] in scrape, to make it work with multiple uris on one line + * Per channel/network configuration + * Multiple network support + * content downloading in scrape in a separate thread to not make the whole client timeout on downloading a big/slow url From 487d433a5c3a36e5589eae30e2143dc4efd59e89 Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Thu, 13 Mar 2014 16:19:54 +0100 Subject: [PATCH 19/23] todo update. --- TODO | 1 + 1 file changed, 1 insertion(+) diff --git a/TODO b/TODO index f05e2da..abd9b5f 100644 --- a/TODO +++ b/TODO @@ -4,3 +4,4 @@ * Per channel/network configuration * Multiple network support * content downloading in scrape in a separate thread to not make the whole client timeout on downloading a big/slow url + * switch from open-uri and net/www to CURB (https://github.com/taf2/curb) From 795947c01d1ebe16e71acdd5962e9cf6dbd3a4a7 Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Thu, 13 Mar 2014 16:37:43 +0100 Subject: [PATCH 20/23] do the split thing. --- plugins/scrape.rb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/plugins/scrape.rb b/plugins/scrape.rb index 2891137..5e69003 100644 --- a/plugins/scrape.rb +++ b/plugins/scrape.rb @@ -33,7 +33,8 @@ end DataMapper.auto_upgrade! Client.register_trigger("PRIVMSG") do |msg| - msg[:params].each do |word| + return 0 if msg[:params][1].nil? + msg[:params][1].split.each do |word| if word =~ /^https?:\/\// then original_url = word if word =~ /4cdn[.]org/ then @@ -52,6 +53,7 @@ Client.register_trigger("PRIVMSG") do |msg| ) Client.privmsg(msg[:params][0], Config[:scrape][:title_prefix] + title) if not title.nil? + return 0 end end end From b7906c84e4cc6a92b58f63fe61a1dbffe296a66f Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Thu, 13 Mar 2014 16:42:47 +0100 Subject: [PATCH 21/23] foobar. --- TODO | 1 + 1 file changed, 1 insertion(+) diff --git a/TODO b/TODO index abd9b5f..76c37a6 100644 --- a/TODO +++ b/TODO @@ -1,6 +1,7 @@ * Ratelimit outgoing messages * Prettier config class, not just a Hash * .split on [:params][1] in scrape, to make it work with multiple uris on one line + * somewhat done - prints title only for the first uri, to avoid flooding the channel * Per channel/network configuration * Multiple network support * content downloading in scrape in a separate thread to not make the whole client timeout on downloading a big/slow url From e11331bc7a89ef463c8c00b7c1afdcfb357d33b2 Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Thu, 13 Mar 2014 17:07:15 +0100 Subject: [PATCH 22/23] Add a notitle option. --- plugins/scrape.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/plugins/scrape.rb b/plugins/scrape.rb index 5e69003..6ca694a 100644 --- a/plugins/scrape.rb +++ b/plugins/scrape.rb @@ -35,6 +35,7 @@ DataMapper.auto_upgrade! Client.register_trigger("PRIVMSG") do |msg| return 0 if msg[:params][1].nil? msg[:params][1].split.each do |word| + return 0 if word =~ "notitle" if word =~ /^https?:\/\// then original_url = word if word =~ /4cdn[.]org/ then From 005eda4ae53f89847385d322c4eb7028b899a9bb Mon Sep 17 00:00:00 2001 From: Robert Gerus Date: Thu, 13 Mar 2014 17:29:24 +0100 Subject: [PATCH 23/23] typo. --- plugins/scrape.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/scrape.rb b/plugins/scrape.rb index 6ca694a..d30e0fc 100644 --- a/plugins/scrape.rb +++ b/plugins/scrape.rb @@ -35,7 +35,7 @@ DataMapper.auto_upgrade! Client.register_trigger("PRIVMSG") do |msg| return 0 if msg[:params][1].nil? msg[:params][1].split.each do |word| - return 0 if word =~ "notitle" + return 0 if word =~ /notitle/ if word =~ /^https?:\/\// then original_url = word if word =~ /4cdn[.]org/ then