summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRobert Gerus <arachnist@i.am-a.cat>2015-12-05 04:55:54 +0100
committerRobert Gerus <arachnist@i.am-a.cat>2015-12-05 04:55:54 +0100
commit24965d675653e5e98dc694cc2ac66940162df670 (patch)
tree98e5c9955b0780e88021923617a4e941fa802bec
parent8906d2ec86df5006c2558dde333e1721abdcb9dd (diff)
downloadgorepost-24965d675653e5e98dc694cc2ac66940162df670.tar.gz
gorepost-24965d675653e5e98dc694cc2ac66940162df670.tar.bz2
gorepost-24965d675653e5e98dc694cc2ac66940162df670.tar.xz
gorepost-24965d675653e5e98dc694cc2ac66940162df670.zip
Attempt to recode iso8859-2 title strings.
Fixes #25
-rw-r--r--bot/urltitle.go14
1 files changed, 13 insertions, 1 deletions
diff --git a/bot/urltitle.go b/bot/urltitle.go
index 4699148..7785704 100644
--- a/bot/urltitle.go
+++ b/bot/urltitle.go
@@ -6,9 +6,12 @@ package bot
import (
"fmt"
+ "golang.org/x/text/encoding/charmap"
+ "golang.org/x/text/transform"
"log"
"regexp"
"strings"
+ "unicode/utf8"
cfg "github.com/arachnist/gorepost/config"
"github.com/arachnist/gorepost/irc"
@@ -16,6 +19,7 @@ import (
var trimTitle *regexp.Regexp
var trimLink *regexp.Regexp
+var enc = charmap.ISO8859_2
func getURLTitle(l string) string {
title, err := httpGetXpath(l, "//head/title")
@@ -25,7 +29,15 @@ func getURLTitle(l string) string {
return fmt.Sprint("error:", err)
}
- return string(trimTitle.ReplaceAll([]byte(title), []byte{' '})[:])
+ title = string(trimTitle.ReplaceAll([]byte(title), []byte{' '})[:])
+ if !utf8.ValidString(title) {
+ title, _, err = transform.String(enc.NewDecoder(), title)
+ if err != nil {
+ return fmt.Sprint("error:", err)
+ }
+ }
+
+ return title
}
func linktitle(output func(irc.Message), msg irc.Message) {