app/matrix: media repo proxy init

This implements media-repo-proxy, a lil' bit of Go to make our
infrastructure work with matrix-media-repo's concept of Host headers.

For some reason, MMR really wants Host: hackerspace.pl instead of Host:
matrix.hackerspace.pl. We'd fix that in their code, but with no tests
and with complex config reload logic it looks very daunting. We'd just
fix that in our Ingress, but that's not easy (no per-rule host
overrides).

So, we commit a tiny little itty bitty war crime and implement a piece
of Go code that serves as a rewriter for this.

This works, tested on boston:

    $ curl -H "Host: matrix.hackerspace.pl" 10.10.12.46:8080/_matrix/media/r0/download/hackerspace.pl/EwVBulPgCWDWNGMKjcOKGGbk | file -
    /dev/stdin: JPEG image data, JFIF standard 1.01, aspect ratio, density 1x1, segment length 16, baseline, precision 8, 650x300, components 3

(this address is media-repo.matrix.svc.k0.hswaw.net)

But hey, at least it has tests.

Change-Id: Ib6af1988fe8e112c9f3a5577506b18b48d80af62
Reviewed-on: https://gerrit.hackerspace.pl/c/hscloud/+/1143
Reviewed-by: q3k <q3k@hackerspace.pl>
This commit is contained in:
q3k 2021-09-16 11:28:00 +02:00 committed by informatic
parent 8b9c8f9a03
commit ebe6075556
5 changed files with 277 additions and 1 deletions

View file

@ -91,7 +91,56 @@ local kube = import "../../../kube/kube.libsonnet";
},
},
svc: app.ns.Contain(kube.Service("media-repo")) {
// Run //app/matrix/media-repo-proxy, if needed. This rewrites Host headers
// from the homeserver's serving Host to the MXID hostname (which
// matrix-media-repo expects).
//
// Currently we only are able to run one proxy for one homeserver config -
// but we don't expect to have multiple homeservers per matrix-media-repo
// any time soon.
local needProxying = [
h
for h in cfg.homeservers
if "https://%s" % [h.name] != h.csApi
],
proxies: if std.length(needProxying) > 1 then error "can only proxy one homeserver" else
if std.length(needProxying) == 1 then {
local homeserver = needProxying[0],
local upstreamHost = homeserver.name,
local prefix = "https://",
local downstreamHost = std.substr(homeserver.csApi, std.length(prefix), std.length(homeserver.csApi)-std.length(prefix)),
deployment: app.ns.Contain(kube.Deployment("media-repo-proxy")) {
spec+: {
template+: {
spec+: {
containers_: {
default: kube.Container("default") {
image: "registry.k0.hswaw.net/q3k/media-repo-proxy:1631791816-18609443fffde38a055f504e80f95e44f49d2481",
command: [
"/app/matrix/media-repo-proxy",
"-downstream_host", downstreamHost,
"-upstream_host", upstreamHost,
"-upstream", app.internalSvc.host_colon_port,
"-listen", ":8080",
],
ports_: {
http: { containerPort: 8080 },
},
},
},
},
},
},
},
} else {},
internalSvc: app.ns.Contain(kube.Service("media-repo-internal")) {
target_pod:: app.deployment.spec.template,
},
svc: if std.length(needProxying) > 0 then app.ns.Contain(kube.Service("media-repo")) {
target_pod:: app.proxies.deployment.spec.template,
} else app.internalSvc,
}

View file

@ -0,0 +1,47 @@
load("@io_bazel_rules_docker//container:container.bzl", "container_image", "container_layer", "container_push")
load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library", "go_test")
go_library(
name = "media-repo-proxy_lib",
srcs = ["main.go"],
importpath = "code.hackerspace.pl/hscloud/app/matrix/media-repo-proxy",
visibility = ["//visibility:private"],
)
go_binary(
name = "media-repo-proxy",
embed = [":media-repo-proxy_lib"],
visibility = ["//visibility:public"],
)
go_test(
name = "media-repo-proxy_test",
srcs = ["main_test.go"],
embed = [":media-repo-proxy_lib"],
)
container_layer(
name = "layer_bin",
files = [
":media-repo-proxy",
],
directory = "/app/matrix/",
)
container_image(
name = "runtime",
base = "@prodimage-bionic//image",
layers = [
":layer_bin",
],
)
container_push(
name = "push",
image = ":runtime",
format = "Docker",
registry = "registry.k0.hswaw.net",
repository = "q3k/media-repo-proxy",
tag = "1631791816-{STABLE_GIT_COMMIT}",
)

View file

@ -0,0 +1,18 @@
# Matrix-Media-Repository Proxy
This is A Saurceful Of Go that between [Matrix Media Repo](https://github.com/turt2live/matrix-media-repo) instances and Ingresses.
It has one job: rewrite Host headers. The reason for this is that matrix-media-repo wants Host: hackerspace.pl (MXID domain) while our traffic comes in with Host: matrix.hackerspace.pl (actual Host at which we serve Matrix).
## Alternatives considered
1. Rewriting this in Nginx Ingress Controller: not easy to do on a per-rule basis, would require some extra ingresses and dumb loopbacks.
2. Fixing matrix-media-repo: not easy with the assumptions their code makes and with no tests that we can run.
## Running
Locally:
$ bazel run //app/matrix/media-repo-proxy -- -downstream_host=matrix.hackerspace.pl -upstream_host=hackerspace.pl -upstream=foo.bar.svc.cluster.local:8080
In prod, should be part of jsonnet infra and be brought up as needed.

View file

@ -0,0 +1,79 @@
package main
import (
"flag"
"fmt"
"log"
"net"
"net/http"
"net/http/httputil"
)
var (
flagUpstream string
flagUpstreamHost string
flagDownstreamHost string
flagListen string
)
func newProxy() http.Handler {
proxy := httputil.ReverseProxy{
Director: func(r *http.Request) {
r.URL.Scheme = "http"
r.URL.Host = flagUpstream
r.Host = flagUpstreamHost
// MMR reads this field and prioritizes it over the Host header.
r.Header.Set("X-Forwarded-Host", flagUpstreamHost)
},
}
acl := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
remote := r.RemoteAddr
sip := r.Header.Get("Hscloud-Nic-Source-IP")
sport := r.Header.Get("Hscloud-Nic-Source-Port")
if sip != "" && sport != "" {
remote = net.JoinHostPort(sip, sport)
r.Header.Set("X-Forwarded-For", remote)
}
log.Printf("%s %s %s", remote, r.Method, r.URL.Path)
// ... during federation requests, Host is foo.example.com:443, strip
// that out if that's the case. Ignore port number, we don't care about
// it.
host, _, err := net.SplitHostPort(r.Host)
if err != nil {
// Error can mean many things, but generally it means 'no port', or
// a very malformed host. Regardless, just default to the raw
// value, we explicitly check it against a required host value
// further down
host = r.Host
}
if host != flagDownstreamHost {
log.Printf("Invalid host requested %q, wanted %q", r.Host, flagDownstreamHost)
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, "invalid host\n")
return
}
proxy.ServeHTTP(w, r)
})
return acl
}
func main() {
flag.StringVar(&flagUpstreamHost, "upstream_host", "hackerspace.pl", "Upstream Host header, as sent to upstream")
flag.StringVar(&flagUpstream, "upstream", "foo.bar.svc.cluster.local:8080", "Address and port to reach upstream")
flag.StringVar(&flagDownstreamHost, "downstream_host", "matrix.hackerspace.pl", "Downstream Host header, as requested by client traffic")
flag.StringVar(&flagListen, "listen", ":8080", "Address to listen at for downstream traffic")
flag.Parse()
log.Printf("Starting media-repo-proxy")
proxy := newProxy()
log.Printf("Listening on %s...", flagListen)
if err := http.ListenAndServe(flagListen, proxy); err != nil {
log.Printf("Listen failed: %v", err)
}
}

View file

@ -0,0 +1,83 @@
package main
import (
"fmt"
"io"
"net/http"
"net/http/httptest"
"net/url"
"testing"
)
func TestForward(t *testing.T) {
// Test backend which proudly proclaims the value of the X-Forwarded-For header it received.
backendServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
fmt.Fprintf(w, "hello %s %s\n", r.Host, r.Header.Get("X-Forwarded-For"))
}))
defer backendServer.Close()
rpURL, err := url.Parse(backendServer.URL)
if err != nil {
t.Fatalf("parsing test backend URL failed: %v", err)
}
// Configure and run proxy.
flagUpstream = rpURL.Host
flagUpstreamHost = "example.com"
flagDownstreamHost = "matrix.example.com"
proxy := httptest.NewServer(newProxy())
defer proxy.Close()
// Run through a few tests.
for i, te := range []struct {
headers map[string]string
host string
want string
}{
{
// 0: expected to succeed
headers: map[string]string{
"Hscloud-Nic-Source-IP": "1.2.3.4",
"Hscloud-Nic-Source-Port": "1337",
},
host: "matrix.example.com",
want: "hello example.com 1.2.3.4:1337, 127.0.0.1\n",
},
{
// 1: expected to succeed
host: "matrix.example.com",
want: "hello example.com 127.0.0.1\n",
},
{
// 2: expected to succeed
host: "matrix.example.com:443",
want: "hello example.com 127.0.0.1\n",
},
{
// 3: expected to fail
host: "example.com",
want: "invalid host\n",
},
} {
req, _ := http.NewRequest("GET", proxy.URL, nil)
req.Host = te.host
for k, v := range te.headers {
req.Header.Set(k, v)
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
t.Fatalf("Get failed: %v", err)
}
b, err := io.ReadAll(resp.Body)
if err != nil {
t.Fatalf("Read failed: %v", err)
}
resp.Body.Close()
if want, got := te.want, string(b); want != got {
t.Errorf("%d: wrong response from upstream, wanted %q, got %q", i, want, got)
}
}
}