summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRobert S. Gerus <ar@bash.org.pl>2015-12-23 09:52:06 +0100
committerRobert S. Gerus <ar@bash.org.pl>2015-12-23 09:52:06 +0100
commita2147fce2361f3869b883c1e2b317795a51b2d01 (patch)
tree258fc2416a6eeef6bf6c1ecacab14a307978a7ac
parent29f6f6aed127d2b1558ce21000725d826efd3c65 (diff)
downloadgorepost-a2147fce2361f3869b883c1e2b317795a51b2d01.tar.gz
gorepost-a2147fce2361f3869b883c1e2b317795a51b2d01.tar.bz2
gorepost-a2147fce2361f3869b883c1e2b317795a51b2d01.tar.xz
gorepost-a2147fce2361f3869b883c1e2b317795a51b2d01.zip
Scrape and rehost 4chan urls.
That's totally a good idea, right?
-rw-r--r--bot/urltitle.go48
1 files changed, 48 insertions, 0 deletions
diff --git a/bot/urltitle.go b/bot/urltitle.go
index b19620b..755fa17 100644
--- a/bot/urltitle.go
+++ b/bot/urltitle.go
@@ -5,10 +5,16 @@
package bot
import (
+ "crypto/sha1"
"encoding/json"
"fmt"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/transform"
+ "io"
+ "io/ioutil"
+ "net/http"
+ "os"
+ "path"
"regexp"
"strings"
"unicode/utf8"
@@ -53,6 +59,44 @@ func youtubeShort(l string) string {
return youtube(string(res))
}
+func fourchanscrape(l string) string {
+ h := sha1.New()
+ t, e := ioutil.TempFile("", "4scrape_")
+ ext := path.Ext(l)
+ if e != nil {
+ return "error creating temp file"
+ }
+ multiwriter := io.MultiWriter(h, t)
+
+ response, err := http.Get(l)
+ if err != nil {
+ return "error while downloading url"
+ }
+ defer response.Body.Close()
+
+ if response.StatusCode != 200 {
+ return "no title"
+ }
+
+ _, err = io.Copy(multiwriter, response.Body)
+ if err != nil {
+ return "error while reading response"
+ }
+
+ old := t.Name()
+ t.Close()
+
+ filename := fmt.Sprintf("%x%s", h.Sum(nil), ext)
+ dest := path.Join(cfg.LookupString(nil, "FourChanDir"), filename)
+
+ err = os.Rename(old, dest)
+ if err != nil {
+ return "error while renaming tempfile"
+ }
+
+ return path.Join(cfg.LookupString(nil, "FourChanLinkBase"), filename)
+}
+
func genericURLTitle(l string) string {
title, err := httpGetXpath(l, "//head/title")
if err == errElementNotFound {
@@ -85,6 +129,10 @@ var customDataFetchers = []struct {
fetcher: youtubeShort,
},
{
+ re: regexp.MustCompile("//i[.]4cdn[.]org/"),
+ fetcher: fourchanscrape,
+ },
+ {
re: regexp.MustCompile(".*"),
fetcher: genericURLTitle,
},