hscloud/hswaw/site/feeds.go

package main

import (
	"context"
	"encoding/xml"
	"fmt"
	"html/template"
	"net/http"
	"sort"
	"time"

	"github.com/golang/glog"
)

// This implements 'Atom' feed parsing. Honestly, this was written without
// looking at any spec. If it ever breaks, you know why.

var (
	// feedURLs is a map from an atom feed name to its URL. All the following
	// feeds will be combined and rendered on the main page of the website.
	feedsURLs = map[string]string{
		"blog": "https://blog.hackerspace.pl/feed/atom/",
	}
)

// atomFeed is a retrieved atom feed.
type atomFeed struct {
	XMLName xml.Name     `xml:"feed"`
	Entries []*atomEntry `xml:"entry"`
}

// atomEntry is an entry (eg. blog post) from an atom feed. It contains fields
// directly from the XML, plus some additional parsed types and metadata.
type atomEntry struct {
	XMLName      xml.Name      `xml:"entry"`
	Author       string        `xml:"author>name"`
	Title        template.HTML `xml:"title"`
	Summary      template.HTML `xml:"summary"`
	UpdatedRaw   string        `xml:"updated"`
	PublishedRaw string        `xml:"published"`
	Link         struct {
		Href string `xml:"href,attr"`
	} `xml:"link"`

	// Updated is the updated time parsed from UpdatedRaw.
	Updated time.Time
	// UpdatedHuman is a human-friendly representation of Updated for web rendering.
	UpdatedHuman string
	// Published is the published time parsed from PublishedRaw.
	Published time.Time
	// Source is the name of the feed that this entry was retrieved from. Only
	// set after combining multiple feeds together (ie. when returned from
	// getFeeds).
	Source string
}

// getAtomFeed retrieves a single Atom feed from the given URL.
func getAtomFeed(ctx context.Context, url string) (*atomFeed, error) {
	r, err := http.NewRequestWithContext(ctx, "GET", url, nil)
	if err != nil {
		return nil, fmt.Errorf("NewRequest(%q): %w", url, err)
	}
	res, err := http.DefaultClient.Do(r)
	if err != nil {
		return nil, fmt.Errorf("Do(%q): %w", url, err)
	}
	defer res.Body.Close()

	var feed atomFeed
	d := xml.NewDecoder(res.Body)
	if err := d.Decode(&feed); err != nil {
		return nil, fmt.Errorf("Decode: %w", err)
	}

	for i, e := range feed.Entries {
		updated, err := time.Parse(time.RFC3339, e.UpdatedRaw)
		if err != nil {
			return nil, fmt.Errorf("entry %d: cannot parse updated date %q: %v", i, e.UpdatedRaw, err)
		}
		published, err := time.Parse(time.RFC3339, e.PublishedRaw)
		if err != nil {
			return nil, fmt.Errorf("entry %d: cannot parse published date %q: %v", i, e.PublishedRaw, err)
		}
		e.Updated = updated
		e.Published = published
		e.UpdatedHuman = e.Updated.Format("02-01-2006")
		if e.Author == "" {
			e.Author = "Anonymous"
		}
	}

	return &feed, nil
}

// feedWorker runs a worker which retrieves all atom feeds every minute and
// updates the services' feeds map with the retrieved data. On error, the feeds
// are not updated (whatever is already cached in the map will continue to be
// available) and the error is logged.
func (s *service) feedWorker(ctx context.Context) {
	okay := false
	get := func() {
		feeds := make(map[string]*atomFeed)

		prev := okay
		okay = true
		for name, url := range feedsURLs {
			feed, err := getAtomFeed(ctx, url)
			if err != nil {
				glog.Errorf("Getting feed %v failed: %v", feed, err)
				okay = false
				continue
			}
			feeds[name] = feed
		}

		// Log whenever the first fetch succeeds, or whenever the fetch
		// succeeds again (avoiding polluting logs with success messages).
		if !prev && okay {
			glog.Infof("Feeds okay.")
		}

		// Update cached feeds.
		s.feedsMu.Lock()
		s.feeds = feeds
		s.feedsMu.Unlock()
	}
	// Perform initial fetch.
	get()

	// ... and update every minute.
	t := time.NewTicker(time.Minute)
	defer t.Stop()

	for {
		select {
		case <-ctx.Done():
			return
		case <-t.C:
			get()
		}
	}
}

// getFeeds retrieves the currently cached feeds and combines them into a
// single reverse-chronological timeline, annotating each entries' Source field
// with the name of the feed from where it was retrieved.
func (s *service) getFeeds() []*atomEntry {
	s.feedsMu.RLock()
	feeds := s.feeds
	s.feedsMu.RUnlock()

	var res []*atomEntry
	for n, feed := range feeds {
		for _, entry := range feed.Entries {
			e := *entry
			e.Source = n
			res = append(res, &e)
		}
	}
	sort.Slice(res, func(i, j int) bool {
		return res[j].Published.Before(res[i].Published)
	})
	return res
}
hswaw/site: render main page and blog feed This reimplements the blog rendering functionality and the main/index page. www-main used to combine multiple atom feeds into one (Redmine and the wordpress blog at blog.hackerspace.pl). We retain the functionality, but only render the wordpress blog now (some other content might follow). We also cowardly comment out the broken calendar iframe. Change-Id: I9abcd8d85149968d06e1cb9c97d72eba7f0bc99f 2021-05-30 23:15:20 +00:00			`package main`

			`import (`
			`"context"`
			`"encoding/xml"`
			`"fmt"`
			`"html/template"`
			`"net/http"`
			`"sort"`
			`"time"`

			`"github.com/golang/glog"`
			`)`

			`// This implements 'Atom' feed parsing. Honestly, this was written without`
			`// looking at any spec. If it ever breaks, you know why.`

			`var (`
			`// feedURLs is a map from an atom feed name to its URL. All the following`
			`// feeds will be combined and rendered on the main page of the website.`
			`feedsURLs = map[string]string{`
			`"blog": "https://blog.hackerspace.pl/feed/atom/",`
			`}`
			`)`

			`// atomFeed is a retrieved atom feed.`
			`type atomFeed struct {`
			XMLName xml.Name `xml:"feed"`
			Entries []*atomEntry `xml:"entry"`
			`}`

			`// atomEntry is an entry (eg. blog post) from an atom feed. It contains fields`
			`// directly from the XML, plus some additional parsed types and metadata.`
			`type atomEntry struct {`
			XMLName xml.Name `xml:"entry"`
			Author string `xml:"author>name"`
			Title template.HTML `xml:"title"`
			Summary template.HTML `xml:"summary"`
			UpdatedRaw string `xml:"updated"`
			PublishedRaw string `xml:"published"`
			`Link struct {`
			Href string `xml:"href,attr"`
			} `xml:"link"`

			`// Updated is the updated time parsed from UpdatedRaw.`
			`Updated time.Time`
			`// UpdatedHuman is a human-friendly representation of Updated for web rendering.`
			`UpdatedHuman string`
			`// Published is the published time parsed from PublishedRaw.`
			`Published time.Time`
			`// Source is the name of the feed that this entry was retrieved from. Only`
			`// set after combining multiple feeds together (ie. when returned from`
			`// getFeeds).`
			`Source string`
			`}`

			`// getAtomFeed retrieves a single Atom feed from the given URL.`
			`func getAtomFeed(ctx context.Context, url string) (*atomFeed, error) {`
			`r, err := http.NewRequestWithContext(ctx, "GET", url, nil)`
			`if err != nil {`
			`return nil, fmt.Errorf("NewRequest(%q): %w", url, err)`
			`}`
			`res, err := http.DefaultClient.Do(r)`
			`if err != nil {`
			`return nil, fmt.Errorf("Do(%q): %w", url, err)`
			`}`
			`defer res.Body.Close()`

			`var feed atomFeed`
			`d := xml.NewDecoder(res.Body)`
			`if err := d.Decode(&feed); err != nil {`
			`return nil, fmt.Errorf("Decode: %w", err)`
			`}`

			`for i, e := range feed.Entries {`
			`updated, err := time.Parse(time.RFC3339, e.UpdatedRaw)`
			`if err != nil {`
			`return nil, fmt.Errorf("entry %d: cannot parse updated date %q: %v", i, e.UpdatedRaw, err)`
			`}`
			`published, err := time.Parse(time.RFC3339, e.PublishedRaw)`
			`if err != nil {`
			`return nil, fmt.Errorf("entry %d: cannot parse published date %q: %v", i, e.PublishedRaw, err)`
			`}`
			`e.Updated = updated`
			`e.Published = published`
			`e.UpdatedHuman = e.Updated.Format("02-01-2006")`
			`if e.Author == "" {`
			`e.Author = "Anonymous"`
			`}`
			`}`

			`return &feed, nil`
			`}`

			`// feedWorker runs a worker which retrieves all atom feeds every minute and`
			`// updates the services' feeds map with the retrieved data. On error, the feeds`
			`// are not updated (whatever is already cached in the map will continue to be`
			`// available) and the error is logged.`
			`func (s *service) feedWorker(ctx context.Context) {`
			`okay := false`
			`get := func() {`
			`feeds := make(map[string]*atomFeed)`

			`prev := okay`
			`okay = true`
			`for name, url := range feedsURLs {`
			`feed, err := getAtomFeed(ctx, url)`
			`if err != nil {`
			`glog.Errorf("Getting feed %v failed: %v", feed, err)`
			`okay = false`
			`continue`
			`}`
			`feeds[name] = feed`
			`}`

			`// Log whenever the first fetch succeeds, or whenever the fetch`
			`// succeeds again (avoiding polluting logs with success messages).`
			`if !prev && okay {`
			`glog.Infof("Feeds okay.")`
			`}`

			`// Update cached feeds.`
			`s.feedsMu.Lock()`
			`s.feeds = feeds`
			`s.feedsMu.Unlock()`
			`}`
			`// Perform initial fetch.`
			`get()`

			`// ... and update every minute.`
			`t := time.NewTicker(time.Minute)`
			`defer t.Stop()`

			`for {`
			`select {`
			`case <-ctx.Done():`
			`return`
			`case <-t.C:`
			`get()`
			`}`
			`}`
			`}`

			`// getFeeds retrieves the currently cached feeds and combines them into a`
			`// single reverse-chronological timeline, annotating each entries' Source field`
			`// with the name of the feed from where it was retrieved.`
			`func (s service) getFeeds() []atomEntry {`
			`s.feedsMu.RLock()`
			`feeds := s.feeds`
			`s.feedsMu.RUnlock()`

			`var res []*atomEntry`
			`for n, feed := range feeds {`
			`for _, entry := range feed.Entries {`
			`e := *entry`
			`e.Source = n`
			`res = append(res, &e)`
			`}`
			`}`
			`sort.Slice(res, func(i, j int) bool {`
			`return res[j].Published.Before(res[i].Published)`
			`})`
			`return res`
			`}`