forked from hswaw/hscloud
devtools/ci/remote-cache: init
This is a first pass at a Bazel remote cache. It notably does not yet do any authentication, upload limits or garbage collection. We won't be deploying it to prod until these are done. Change-Id: I70a89dbe8b3ec933b2ce82e234a969e8337ba1d9
This commit is contained in:
parent
87801be432
commit
5de0b32e3b
4 changed files with 269 additions and 0 deletions
23
devtools/ci/remote-cache/BUILD.bazel
Normal file
23
devtools/ci/remote-cache/BUILD.bazel
Normal file
|
@ -0,0 +1,23 @@
|
|||
load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
|
||||
|
||||
go_library(
|
||||
name = "go_default_library",
|
||||
srcs = [
|
||||
"main.go",
|
||||
"service.go",
|
||||
],
|
||||
importpath = "code.hackerspace.pl/hscloud/devtools/ci/remote-cache",
|
||||
visibility = ["//visibility:private"],
|
||||
deps = [
|
||||
"//go/mirko:go_default_library",
|
||||
"@com_github_golang_glog//:go_default_library",
|
||||
"@com_github_minio_minio_go_v7//:go_default_library",
|
||||
"@com_github_minio_minio_go_v7//pkg/credentials:go_default_library",
|
||||
],
|
||||
)
|
||||
|
||||
go_binary(
|
||||
name = "remote-cache",
|
||||
embed = [":go_default_library"],
|
||||
visibility = ["//visibility:public"],
|
||||
)
|
34
devtools/ci/remote-cache/README.md
Normal file
34
devtools/ci/remote-cache/README.md
Normal file
|
@ -0,0 +1,34 @@
|
|||
remote-cache
|
||||
============
|
||||
|
||||
A small Go service that acts as a [Bazel remote cache HTTP server](https://docs.bazel.build/versions/master/remote-caching.html#http-caching-protocol) and is backed in Ceph.
|
||||
|
||||
Status
|
||||
------
|
||||
|
||||
Work in progress, does not run on prod yet, needs write authentication support first.
|
||||
|
||||
Building
|
||||
--------
|
||||
|
||||
bazel build //devtools/ci/remote-cache
|
||||
|
||||
Running locally
|
||||
---------------
|
||||
|
||||
For now, you'll have to manually acquire some Ceph RadosGW/S3 keys. When you have them:
|
||||
|
||||
bazel run //devtools/ci/remote-cache -- \
|
||||
-object_access_key YOURACCESSKEY -object_secret_key yourSecretAccessKey -object_bucket your-bucket
|
||||
|
||||
Then, tell Bazel to connect when building something:
|
||||
|
||||
bazel build --remote_cache=http://127.0.0.1:8080 //cluster/prodvider
|
||||
|
||||
You should see something like this, if you ended up mostly doing GETs:
|
||||
|
||||
INFO: Elapsed time: 40.149s, Critical Path: 30.40s
|
||||
INFO: 705 processes: 705 remote cache hit.
|
||||
INFO: Build completed successfully, 718 total actions
|
||||
|
||||
This will be slower than building without cache if you mostly PUT cache elements, and will likely even be slower on GETs unless you have excellent connectivity to k0. The remote cache is only a building block used to make builds faster, and we will need more things (eg. Remote Build Execution and CI) to actually get speedups for developer builds.
|
77
devtools/ci/remote-cache/main.go
Normal file
77
devtools/ci/remote-cache/main.go
Normal file
|
@ -0,0 +1,77 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"net"
|
||||
"net/http"
|
||||
|
||||
"code.hackerspace.pl/hscloud/go/mirko"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"github.com/minio/minio-go/v7"
|
||||
"github.com/minio/minio-go/v7/pkg/credentials"
|
||||
)
|
||||
|
||||
var (
|
||||
flagListenPublic = ":8080"
|
||||
flagObjectEndpoint = "object.ceph-waw3.hswaw.net"
|
||||
flagObjectAccessKey = ""
|
||||
flagObjectSecretKey = ""
|
||||
flagObjectBucket = ""
|
||||
flagObjectPrefix = "cache/"
|
||||
)
|
||||
|
||||
func main() {
|
||||
flag.StringVar(&flagListenPublic, "listen_public", flagListenPublic, "Address to listen on for Bazel HTTP caching protocol clients")
|
||||
flag.StringVar(&flagObjectEndpoint, "object_endpoint", flagObjectEndpoint, "Object Storage endpoint name")
|
||||
flag.StringVar(&flagObjectAccessKey, "object_access_key", flagObjectEndpoint, "Object Storage AccessKey")
|
||||
flag.StringVar(&flagObjectSecretKey, "object_secret_key", flagObjectEndpoint, "Object Storage SecretKey")
|
||||
flag.StringVar(&flagObjectBucket, "object_bucket", flagObjectBucket, "Object Storage bucket name")
|
||||
flag.StringVar(&flagObjectPrefix, "object_prefix", flagObjectPrefix, "Object Storage prefix for paths")
|
||||
flag.Parse()
|
||||
|
||||
if flagObjectBucket == "" {
|
||||
glog.Exitf("object_bucket must be set")
|
||||
}
|
||||
|
||||
m := mirko.New()
|
||||
if err := m.Listen(); err != nil {
|
||||
glog.Exitf("Listen(): %v", err)
|
||||
}
|
||||
|
||||
minioClient, err := minio.New(flagObjectEndpoint, &minio.Options{
|
||||
Creds: credentials.NewStaticV4(flagObjectAccessKey, flagObjectSecretKey, ""),
|
||||
Secure: true,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
glog.Exitf("Failed to initialize Object Storage client: %v", err)
|
||||
}
|
||||
|
||||
s := newService(minioClient, flagObjectBucket, flagObjectPrefix)
|
||||
|
||||
httpListen, err := net.Listen("tcp", flagListenPublic)
|
||||
if err != nil {
|
||||
glog.Exitf("net.Listen: %v", err)
|
||||
}
|
||||
httpServer := &http.Server{
|
||||
Addr: flagListenPublic,
|
||||
Handler: s.publicHandler,
|
||||
}
|
||||
|
||||
errs := make(chan error, 0)
|
||||
go func() {
|
||||
glog.Infof("Public listening on %s", flagListenPublic)
|
||||
errs <- httpServer.Serve(httpListen)
|
||||
}()
|
||||
|
||||
if err := m.Serve(); err != nil {
|
||||
glog.Exitf("Serve(): %v", err)
|
||||
}
|
||||
|
||||
select {
|
||||
case <-m.Done():
|
||||
case err := <-errs:
|
||||
glog.Exitf("Serve(): %v", err)
|
||||
}
|
||||
}
|
135
devtools/ci/remote-cache/service.go
Normal file
135
devtools/ci/remote-cache/service.go
Normal file
|
@ -0,0 +1,135 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"github.com/minio/minio-go/v7"
|
||||
)
|
||||
|
||||
type service struct {
|
||||
objectClient *minio.Client
|
||||
objectBucket string
|
||||
objectPrefix string
|
||||
publicHandler http.Handler
|
||||
}
|
||||
|
||||
func newService(objectClient *minio.Client, objectBucket, objectPrefix string) *service {
|
||||
s := &service{
|
||||
objectClient: objectClient,
|
||||
objectBucket: objectBucket,
|
||||
objectPrefix: objectPrefix,
|
||||
}
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/", s.handlePublic)
|
||||
s.publicHandler = mux
|
||||
return s
|
||||
}
|
||||
|
||||
func (s *service) handlePublic(w http.ResponseWriter, r *http.Request) {
|
||||
ctx := r.Context()
|
||||
switch r.Method {
|
||||
case "GET":
|
||||
// Always allow GET access to cache.
|
||||
case "PUT":
|
||||
// Require authentication for cache writes.
|
||||
// TODO(q3k): implement
|
||||
default:
|
||||
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
|
||||
return
|
||||
}
|
||||
|
||||
parts := strings.Split(strings.TrimPrefix(r.URL.Path, "/"), "/")
|
||||
if len(parts) != 2 {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
switch parts[0] {
|
||||
case "ac":
|
||||
case "cas":
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
|
||||
if len(parts[1]) != 64 {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
}
|
||||
|
||||
cacheKey := fmt.Sprintf("%s%s/%s", s.objectPrefix, parts[0], parts[1])
|
||||
glog.Infof("%s %s %s", r.RemoteAddr, r.Method, cacheKey)
|
||||
|
||||
if r.Method == "GET" {
|
||||
obj, err := s.objectClient.GetObject(ctx, s.objectBucket, cacheKey, minio.GetObjectOptions{})
|
||||
if err != nil {
|
||||
glog.Errorf("GetObject(%s, %s): %v", s.objectBucket, cacheKey, err)
|
||||
http.Error(w, "could not contact object store", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
_, err = obj.Stat()
|
||||
// Minio-go doesn't seem to let us do this in any nicer way :/
|
||||
if err != nil && err.Error() == "The specified key does not exist." {
|
||||
http.NotFound(w, r)
|
||||
return
|
||||
} else if err != nil {
|
||||
glog.Errorf("Stat(%s, %s): %v", s.objectBucket, cacheKey, err)
|
||||
http.Error(w, "could not contact object store", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
// Stream object to client.
|
||||
io.Copy(w, obj)
|
||||
}
|
||||
if r.Method == "PUT" {
|
||||
// Buffer the file, as we need to check its sha256.
|
||||
// TODO(q3k): check and limit body size.
|
||||
data, err := ioutil.ReadAll(r.Body)
|
||||
if err != nil {
|
||||
glog.Errorf("ReadAll: %v", err)
|
||||
return
|
||||
}
|
||||
hashBytes := sha256.Sum256(data)
|
||||
hash := hex.EncodeToString(hashBytes[:])
|
||||
// Bazel cache uploads always seem to use lowercase sha256
|
||||
// representations.
|
||||
if parts[0] == "cas" && hash != parts[1] {
|
||||
glog.Warningf("%s: sent PUT for %s with invalid hash %s", r.RemoteAddr, cacheKey, hash)
|
||||
// Don't tell the user anything - Bazel won't care, anyway, and us
|
||||
// logging this is probably good enough for debugging purposes.
|
||||
return
|
||||
}
|
||||
// If the file already exists in the cache, ignore it. S3 doesn't seem
|
||||
// to give us an upload-if-missing functionality?
|
||||
_, err = s.objectClient.StatObject(ctx, s.objectBucket, cacheKey, minio.StatObjectOptions{})
|
||||
if err == nil {
|
||||
// File already exists, return early.
|
||||
// This might not fire in case we fail to retrieve the object for
|
||||
// some reason other than its nonexistence, but an error will be
|
||||
// served for this at PutObject later on.
|
||||
return
|
||||
}
|
||||
|
||||
buffer := bytes.NewBuffer(data)
|
||||
_, err = s.objectClient.PutObject(ctx, s.objectBucket, cacheKey, buffer, int64(len(data)), minio.PutObjectOptions{
|
||||
UserMetadata: map[string]string{
|
||||
"remote-cache-origin": r.RemoteAddr,
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
// Swallow the error. Can't do much for the bazel writer, anyway.
|
||||
// Retrying here isn't easy, as we don't want to become a
|
||||
// qeueue/buffer unless really needed.
|
||||
glog.Errorf("%s: PUT %s failed: %v", r.RemoteAddr, cacheKey, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue