Generalize http and xpath extraction functions.

configurable-file-paths
Robert Gerus 2015-11-19 15:13:45 +01:00
parent 6dc9e68c18
commit 57cd8ac5e7
2 changed files with 78 additions and 44 deletions

74
bot/helpers.go Normal file
View File

@ -0,0 +1,74 @@
// Copyright 2015 Robert S. Gerus. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package bot
import (
"errors"
"io"
"net/http"
"time"
"github.com/moovweb/gokogiri"
"github.com/moovweb/gokogiri/xpath"
)
var elementNotFound = errors.New("Element not found in document")
func httpGet(l string) ([]byte, error) {
var buf []byte
tr := &http.Transport{
TLSHandshakeTimeout: 5 * time.Second,
ResponseHeaderTimeout: 5 * time.Second,
}
client := &http.Client{Transport: tr}
resp, err := client.Get(l)
if err != nil {
return []byte{}, err
}
// 5MiB
if resp.ContentLength > 5*1024*1024 || resp.ContentLength < 0 {
buf = make([]byte, 5*1024*1024)
} else if resp.ContentLength == 0 {
return []byte{}, nil
} else {
buf = make([]byte, resp.ContentLength)
}
i, err := io.ReadFull(resp.Body, buf)
if err == io.ErrUnexpectedEOF {
buf = buf[:i]
} else if err != nil {
return []byte{}, err
}
return buf, nil
}
func httpGetXpath(l, x string) (string, error) {
buf, err := httpGet(l)
if err != nil {
return "", err
}
doc, err := gokogiri.ParseHtml(buf)
defer doc.Free()
if err != nil {
return "", err
}
xpath := xpath.Compile(x)
sr, err := doc.Root().Search(xpath)
if err != nil {
return "", err
}
if len(sr) > 0 {
return sr[0].InnerHtml(), nil
} else {
return "", elementNotFound
}
}

View File

@ -7,15 +7,9 @@ package bot
import (
"bytes"
"fmt"
"io"
"log"
"net/http"
"regexp"
"strings"
"time"
"github.com/moovweb/gokogiri"
"github.com/moovweb/gokogiri/xpath"
cfg "github.com/arachnist/gorepost/config"
"github.com/arachnist/gorepost/irc"
@ -24,48 +18,14 @@ import (
var trimTitle *regexp.Regexp
func getUrlTitle(l string) string {
var buf []byte
tr := &http.Transport{
TLSHandshakeTimeout: 5 * time.Second,
ResponseHeaderTimeout: 5 * time.Second,
}
client := &http.Client{Transport: tr}
resp, err := client.Get(l)
if err != nil {
return fmt.Sprintf("error:", err)
}
// 5MiB
if resp.ContentLength > 5*1024*1024 || resp.ContentLength < 0 {
buf = make([]byte, 5*1024*1024)
} else if resp.ContentLength == 0 {
return "empty"
} else {
buf = make([]byte, resp.ContentLength)
}
i, err := io.ReadFull(resp.Body, buf)
if err == io.ErrUnexpectedEOF {
buf = buf[:i]
title, err := httpGetXpath(l, "//head/title")
if err == elementNotFound {
return "no title"
} else if err != nil {
return fmt.Sprintf("error:", err)
}
doc, err := gokogiri.ParseHtml(buf)
defer doc.Free()
if err != nil {
return fmt.Sprintf("error:", err)
}
xpath := xpath.Compile("//head/title")
sr, err := doc.Root().Search(xpath)
if len(sr) > 0 {
return string(trimTitle.ReplaceAll([]byte(sr[0].InnerHtml()), []byte{' '})[:])
} else {
return "no title"
}
return string(trimTitle.ReplaceAll([]byte(title), []byte{' '})[:])
}
func linktitle(output chan irc.Message, msg irc.Message) {