From ebe6075556a0da3e6d515cd1f2ab5c75c05c9969 Mon Sep 17 00:00:00 2001 From: Serge Bazanski Date: Thu, 16 Sep 2021 11:28:00 +0200 Subject: [PATCH] app/matrix: media repo proxy init This implements media-repo-proxy, a lil' bit of Go to make our infrastructure work with matrix-media-repo's concept of Host headers. For some reason, MMR really wants Host: hackerspace.pl instead of Host: matrix.hackerspace.pl. We'd fix that in their code, but with no tests and with complex config reload logic it looks very daunting. We'd just fix that in our Ingress, but that's not easy (no per-rule host overrides). So, we commit a tiny little itty bitty war crime and implement a piece of Go code that serves as a rewriter for this. This works, tested on boston: $ curl -H "Host: matrix.hackerspace.pl" 10.10.12.46:8080/_matrix/media/r0/download/hackerspace.pl/EwVBulPgCWDWNGMKjcOKGGbk | file - /dev/stdin: JPEG image data, JFIF standard 1.01, aspect ratio, density 1x1, segment length 16, baseline, precision 8, 650x300, components 3 (this address is media-repo.matrix.svc.k0.hswaw.net) But hey, at least it has tests. Change-Id: Ib6af1988fe8e112c9f3a5577506b18b48d80af62 Reviewed-on: https://gerrit.hackerspace.pl/c/hscloud/+/1143 Reviewed-by: q3k --- app/matrix/lib/media-repo.libsonnet | 51 ++++++++++++++- app/matrix/media-repo-proxy/BUILD.bazel | 47 ++++++++++++++ app/matrix/media-repo-proxy/README.md | 18 +++++ app/matrix/media-repo-proxy/main.go | 79 ++++++++++++++++++++++ app/matrix/media-repo-proxy/main_test.go | 83 ++++++++++++++++++++++++ 5 files changed, 277 insertions(+), 1 deletion(-) create mode 100644 app/matrix/media-repo-proxy/BUILD.bazel create mode 100644 app/matrix/media-repo-proxy/README.md create mode 100644 app/matrix/media-repo-proxy/main.go create mode 100644 app/matrix/media-repo-proxy/main_test.go diff --git a/app/matrix/lib/media-repo.libsonnet b/app/matrix/lib/media-repo.libsonnet index 338dc781..90af77b7 100644 --- a/app/matrix/lib/media-repo.libsonnet +++ b/app/matrix/lib/media-repo.libsonnet @@ -91,7 +91,56 @@ local kube = import "../../../kube/kube.libsonnet"; }, }, - svc: app.ns.Contain(kube.Service("media-repo")) { + // Run //app/matrix/media-repo-proxy, if needed. This rewrites Host headers + // from the homeserver's serving Host to the MXID hostname (which + // matrix-media-repo expects). + // + // Currently we only are able to run one proxy for one homeserver config - + // but we don't expect to have multiple homeservers per matrix-media-repo + // any time soon. + local needProxying = [ + h + for h in cfg.homeservers + if "https://%s" % [h.name] != h.csApi + ], + proxies: if std.length(needProxying) > 1 then error "can only proxy one homeserver" else + if std.length(needProxying) == 1 then { + local homeserver = needProxying[0], + + local upstreamHost = homeserver.name, + local prefix = "https://", + local downstreamHost = std.substr(homeserver.csApi, std.length(prefix), std.length(homeserver.csApi)-std.length(prefix)), + + deployment: app.ns.Contain(kube.Deployment("media-repo-proxy")) { + spec+: { + template+: { + spec+: { + containers_: { + default: kube.Container("default") { + image: "registry.k0.hswaw.net/q3k/media-repo-proxy:1631791816-18609443fffde38a055f504e80f95e44f49d2481", + command: [ + "/app/matrix/media-repo-proxy", + "-downstream_host", downstreamHost, + "-upstream_host", upstreamHost, + "-upstream", app.internalSvc.host_colon_port, + "-listen", ":8080", + ], + ports_: { + http: { containerPort: 8080 }, + }, + }, + }, + }, + }, + }, + }, + } else {}, + + internalSvc: app.ns.Contain(kube.Service("media-repo-internal")) { target_pod:: app.deployment.spec.template, }, + + svc: if std.length(needProxying) > 0 then app.ns.Contain(kube.Service("media-repo")) { + target_pod:: app.proxies.deployment.spec.template, + } else app.internalSvc, } diff --git a/app/matrix/media-repo-proxy/BUILD.bazel b/app/matrix/media-repo-proxy/BUILD.bazel new file mode 100644 index 00000000..a56d8814 --- /dev/null +++ b/app/matrix/media-repo-proxy/BUILD.bazel @@ -0,0 +1,47 @@ +load("@io_bazel_rules_docker//container:container.bzl", "container_image", "container_layer", "container_push") +load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library", "go_test") + +go_library( + name = "media-repo-proxy_lib", + srcs = ["main.go"], + importpath = "code.hackerspace.pl/hscloud/app/matrix/media-repo-proxy", + visibility = ["//visibility:private"], +) + +go_binary( + name = "media-repo-proxy", + embed = [":media-repo-proxy_lib"], + visibility = ["//visibility:public"], +) + +go_test( + name = "media-repo-proxy_test", + srcs = ["main_test.go"], + embed = [":media-repo-proxy_lib"], +) + +container_layer( + name = "layer_bin", + files = [ + ":media-repo-proxy", + ], + directory = "/app/matrix/", +) + +container_image( + name = "runtime", + base = "@prodimage-bionic//image", + layers = [ + ":layer_bin", + ], +) + +container_push( + name = "push", + image = ":runtime", + format = "Docker", + registry = "registry.k0.hswaw.net", + repository = "q3k/media-repo-proxy", + tag = "1631791816-{STABLE_GIT_COMMIT}", +) + diff --git a/app/matrix/media-repo-proxy/README.md b/app/matrix/media-repo-proxy/README.md new file mode 100644 index 00000000..c9df4adc --- /dev/null +++ b/app/matrix/media-repo-proxy/README.md @@ -0,0 +1,18 @@ +# Matrix-Media-Repository Proxy + +This is A Saurceful Of Go that between [Matrix Media Repo](https://github.com/turt2live/matrix-media-repo) instances and Ingresses. + +It has one job: rewrite Host headers. The reason for this is that matrix-media-repo wants Host: hackerspace.pl (MXID domain) while our traffic comes in with Host: matrix.hackerspace.pl (actual Host at which we serve Matrix). + +## Alternatives considered + +1. Rewriting this in Nginx Ingress Controller: not easy to do on a per-rule basis, would require some extra ingresses and dumb loopbacks. +2. Fixing matrix-media-repo: not easy with the assumptions their code makes and with no tests that we can run. + +## Running + +Locally: + + $ bazel run //app/matrix/media-repo-proxy -- -downstream_host=matrix.hackerspace.pl -upstream_host=hackerspace.pl -upstream=foo.bar.svc.cluster.local:8080 + +In prod, should be part of jsonnet infra and be brought up as needed. diff --git a/app/matrix/media-repo-proxy/main.go b/app/matrix/media-repo-proxy/main.go new file mode 100644 index 00000000..920e89e5 --- /dev/null +++ b/app/matrix/media-repo-proxy/main.go @@ -0,0 +1,79 @@ +package main + +import ( + "flag" + "fmt" + "log" + "net" + "net/http" + "net/http/httputil" +) + +var ( + flagUpstream string + flagUpstreamHost string + flagDownstreamHost string + flagListen string +) + +func newProxy() http.Handler { + proxy := httputil.ReverseProxy{ + Director: func(r *http.Request) { + r.URL.Scheme = "http" + r.URL.Host = flagUpstream + r.Host = flagUpstreamHost + // MMR reads this field and prioritizes it over the Host header. + r.Header.Set("X-Forwarded-Host", flagUpstreamHost) + }, + } + + acl := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + remote := r.RemoteAddr + sip := r.Header.Get("Hscloud-Nic-Source-IP") + sport := r.Header.Get("Hscloud-Nic-Source-Port") + if sip != "" && sport != "" { + remote = net.JoinHostPort(sip, sport) + r.Header.Set("X-Forwarded-For", remote) + } + log.Printf("%s %s %s", remote, r.Method, r.URL.Path) + + // ... during federation requests, Host is foo.example.com:443, strip + // that out if that's the case. Ignore port number, we don't care about + // it. + host, _, err := net.SplitHostPort(r.Host) + if err != nil { + // Error can mean many things, but generally it means 'no port', or + // a very malformed host. Regardless, just default to the raw + // value, we explicitly check it against a required host value + // further down + host = r.Host + } + + if host != flagDownstreamHost { + log.Printf("Invalid host requested %q, wanted %q", r.Host, flagDownstreamHost) + w.WriteHeader(http.StatusBadRequest) + fmt.Fprintf(w, "invalid host\n") + return + } + proxy.ServeHTTP(w, r) + }) + + return acl +} + +func main() { + flag.StringVar(&flagUpstreamHost, "upstream_host", "hackerspace.pl", "Upstream Host header, as sent to upstream") + flag.StringVar(&flagUpstream, "upstream", "foo.bar.svc.cluster.local:8080", "Address and port to reach upstream") + flag.StringVar(&flagDownstreamHost, "downstream_host", "matrix.hackerspace.pl", "Downstream Host header, as requested by client traffic") + flag.StringVar(&flagListen, "listen", ":8080", "Address to listen at for downstream traffic") + flag.Parse() + + log.Printf("Starting media-repo-proxy") + + proxy := newProxy() + + log.Printf("Listening on %s...", flagListen) + if err := http.ListenAndServe(flagListen, proxy); err != nil { + log.Printf("Listen failed: %v", err) + } +} diff --git a/app/matrix/media-repo-proxy/main_test.go b/app/matrix/media-repo-proxy/main_test.go new file mode 100644 index 00000000..ba4d4c36 --- /dev/null +++ b/app/matrix/media-repo-proxy/main_test.go @@ -0,0 +1,83 @@ +package main + +import ( + "fmt" + "io" + "net/http" + "net/http/httptest" + "net/url" + "testing" +) + +func TestForward(t *testing.T) { + // Test backend which proudly proclaims the value of the X-Forwarded-For header it received. + backendServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + fmt.Fprintf(w, "hello %s %s\n", r.Host, r.Header.Get("X-Forwarded-For")) + })) + defer backendServer.Close() + rpURL, err := url.Parse(backendServer.URL) + if err != nil { + t.Fatalf("parsing test backend URL failed: %v", err) + } + + // Configure and run proxy. + flagUpstream = rpURL.Host + flagUpstreamHost = "example.com" + flagDownstreamHost = "matrix.example.com" + proxy := httptest.NewServer(newProxy()) + defer proxy.Close() + + // Run through a few tests. + for i, te := range []struct { + headers map[string]string + host string + want string + }{ + { + // 0: expected to succeed + headers: map[string]string{ + "Hscloud-Nic-Source-IP": "1.2.3.4", + "Hscloud-Nic-Source-Port": "1337", + }, + host: "matrix.example.com", + want: "hello example.com 1.2.3.4:1337, 127.0.0.1\n", + }, + { + // 1: expected to succeed + host: "matrix.example.com", + want: "hello example.com 127.0.0.1\n", + }, + { + // 2: expected to succeed + host: "matrix.example.com:443", + want: "hello example.com 127.0.0.1\n", + }, + { + // 3: expected to fail + host: "example.com", + want: "invalid host\n", + }, + } { + req, _ := http.NewRequest("GET", proxy.URL, nil) + req.Host = te.host + for k, v := range te.headers { + req.Header.Set(k, v) + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("Get failed: %v", err) + } + + b, err := io.ReadAll(resp.Body) + if err != nil { + t.Fatalf("Read failed: %v", err) + } + resp.Body.Close() + + if want, got := te.want, string(b); want != got { + t.Errorf("%d: wrong response from upstream, wanted %q, got %q", i, want, got) + } + } + +}