cluster/clustercfg: rewrite it in Go

This replaces the old clustercfg script with a brand spanking new
mostly-equivalent Go reimplementation. But it's not exactly the same,
here are the differences:

 1. No cluster deployment logic anymore - we expect everyone to use ops/
    machine at this point.
 2. All certs/keys are Ed25519 and do not expire by default - but
    support for short-lived certificates is there, and is actually more
    generic and reusable. Currently it's only used for admincreds.
 3. Speaking of admincreds: the new admincreds automatically figure out
    your username.
 4. admincreds also doesn't shell out to kubectl anymore, and doesn't
    override your default context. The generated creds can live
    peacefully alongside your normal prodaccess creds.
 5. gencerts (the new nodestrap without deployment support) now
    automatically generates certs for all nodes, based on local Nix
    modules in ops/.
 6. No secretstore support. This will be changed once we rebuild
    secretstore in Go. For now users are expected to manually run
    secretstore sync on cluster/secrets.

Change-Id: Ida935f44e04fd933df125905eee10121ac078495
Reviewed-on: https://gerrit.hackerspace.pl/c/hscloud/+/1498
Reviewed-by: q3k <q3k@hackerspace.pl>
changes/98/1498/4
q3k 2023-03-31 22:36:54 +00:00 committed by q3k
parent a03b60b310
commit 9f0e1e88f1
13 changed files with 924 additions and 552 deletions

View File

@ -1,19 +1,25 @@
load("@pydeps//:requirements.bzl", "requirement")
load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
py_binary(
name = "clustercfg",
python_version = "PY3",
go_library(
name = "go_default_library",
srcs = [
"clustercfg.py",
"ca.py",
"cmd_admincreds.go",
"cmd_gencerts.go",
"main.go",
],
visibility = ["//visibility:public"],
importpath = "code.hackerspace.pl/hscloud/cluster/clustercfg",
visibility = ["//visibility:private"],
deps = [
requirement("cffi"),
requirement("fabric"),
requirement("idna"),
requirement("six"),
"//tools:secretstore_lib",
"//tools/hscloud:python",
"//cluster/clustercfg/certs:go_default_library",
"//go/workspace:go_default_library",
"@com_github_spf13_cobra//:go_default_library",
"@io_k8s_client_go//tools/clientcmd:go_default_library",
"@io_k8s_client_go//tools/clientcmd/api:go_default_library",
],
)
go_binary(
name = "clustercfg",
embed = [":go_default_library"],
visibility = ["//visibility:public"],
)

View File

@ -1,315 +0,0 @@
# encoding: utf-8
from datetime import datetime, timezone
import json
import logging
import os
from six import StringIO
import subprocess
import tempfile
logger = logging.getLogger(__name__)
_std_subj = {
"C": "PL",
"ST": "Mazowieckie",
"L": "Warsaw",
"O": "Warsaw Hackerspace",
"OU": "clustercfg",
}
_ca_csr = {
"CN": "Prototype Test Certificate Authority",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [ _std_subj ],
}
_ca_config = {
"signing": {
"default": {
"expiry": "168h"
},
"profiles": {
"intermediate": {
"expiry": "8760h",
"usages": [
"signing",
"key encipherment",
"cert sign",
"crl sign",
"server auth",
"client auth",
],
"ca_constraint": {
"is_ca": True,
},
},
"server": {
"expiry": "8760h",
"usages": [
"signing",
"key encipherment",
"server auth"
]
},
"client": {
"expiry": "8760h",
"usages": [
"signing",
"key encipherment",
"client auth"
]
},
"client-server": {
"expiry": "8760h",
"usages": [
"signing",
"key encipherment",
"server auth",
"client auth"
]
}
}
}
}
class CAException(Exception):
pass
class CA(object):
def __init__(self, secretstore, certdir, short, cn):
self.ss = secretstore
self.cdir = certdir
self.short = short
self.cn = cn
self._init_ca()
def __str__(self):
return 'CN={} ({})'.format(self.cn, self.short)
@property
def _secret_key(self):
return 'ca-{}.key'.format(self.short)
@property
def _cert(self):
return os.path.join(self.cdir, 'ca-{}.crt'.format(self.short))
@property
def cert_data(self):
with open(self._cert) as f:
return f.read()
def _cfssl_call(self, args, obj=None, stdin=None):
p = subprocess.Popen(['cfssl'] + args,
stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
if obj is not None:
stdin = json.dumps(obj)
outs, errs = p.communicate(stdin.encode())
if p.returncode != 0:
raise Exception(
'cfssl failed. stderr: %r, stdout: %r, code: %r' % (
errs, outs, p.returncode))
out = json.loads(outs)
return out
def _init_ca(self):
if self.ss.exists(self._secret_key):
return
ca_csr = dict(_ca_csr)
ca_csr['CN'] = self.cn
logger.info("{}: Generating CA...".format(self))
out = self._cfssl_call(['gencert', '-initca', '-'], obj=ca_csr)
f = self.ss.open(self._secret_key, 'w')
f.write(out['key'])
f.close()
f = open(self._cert, 'w')
f.write(out['cert'])
f.close()
def gen_key(self, hosts, o=_std_subj['O'], ou=_std_subj['OU'], save=None):
"""お元気ですか?"""
cfg = {
"CN": hosts[0],
"hosts": hosts,
"key": {
"algo": "rsa",
"size": 4096,
},
"names": [
{
"C": _std_subj["C"],
"ST": _std_subj["ST"],
"L": _std_subj["L"],
"O": o,
"OU": ou,
},
],
}
cfg.update(_ca_config)
logger.info("{}: Generating key/CSR for {}".format(self, hosts))
out = self._cfssl_call(['genkey', '-'], obj=cfg)
key, csr = out['key'], out['csr']
if save is not None:
logging.info("{}: Saving new key to secret {}".format(self, save))
f = self.ss.open(save, 'w')
f.write(key)
f.close()
return key, csr
def gen_csr(self, key, hosts, o=_std_subj['O'], ou=_std_subj['OU']):
"""
Generate a CSR while already having a private key - for renewals, etc.
TODO(q3k): this shouldn't be a CA method, but a cert method.
"""
cfg = {
"CN": hosts[0],
"hosts": hosts,
"key": {
"algo": "rsa",
"size": 4096,
},
"names": [
{
"C": _std_subj["C"],
"ST": _std_subj["ST"],
"L": _std_subj["L"],
"O": o,
"OU": ou,
},
],
}
cfg.update(_ca_config)
logger.info("{}: Generating CSR for {}".format(self, hosts))
out = self._cfssl_call(['gencsr', '-key', key, '-'], obj=cfg)
return out['csr']
def sign(self, csr, save=None, profile='client-server'):
logging.info("{}: Signing CSR".format(self))
ca = self._cert
cakey = self.ss.plaintext(self._secret_key)
config = tempfile.NamedTemporaryFile(mode='w')
json.dump(_ca_config, config)
config.flush()
out = self._cfssl_call(['sign', '-ca=' + ca, '-ca-key=' + cakey,
'-profile='+profile, '-config='+config.name, '-'], stdin=csr)
cert = out['cert']
if save is not None:
name = os.path.join(self.cdir, save)
logging.info("{}: Saving new certificate to {}".format(self, name))
f = open(name, 'w')
f.write(cert)
f.close()
config.close()
return cert
def upload(self, c, remote_cert):
logger.info("Uploading CA {} to {}".format(self, remote_cert))
c.put(local=self._cert, remote=remote_cert)
def make_cert(self, *a, **kw):
return ManagedCertificate(self, *a, **kw)
class ManagedCertificate(object):
def __init__(self, ca, name, hosts, o=None, ou=None, profile='client-server'):
self.ca = ca
self.hosts = hosts
self.name = name
self.key = '{}.key'.format(name)
self.cert = '{}.cert'.format(name)
self.o = o
self.ou = ou
self.profile = profile
self.ensure()
def __str__(self):
return '{}'.format(self.name)
@property
def key_exists(self):
return self.ca.ss.exists(self.key)
@property
def key_data(self):
f = open(self.ca.ss.open(self.key))
d = f.read()
f.close()
return d
@property
def key_path(self):
return self.ca.ss.plaintext(self.key)
@property
def cert_path(self):
return os.path.join(self.ca.cdir, self.cert)
@property
def cert_exists(self):
return os.path.exists(self.cert_path)
@property
def cert_data(self):
with open(self.cert_path) as f:
return f.read()
@property
def cert_expires_soon(self):
if not self.cert_exists:
return False
out = self.ca._cfssl_call(['certinfo', '-cert', self.cert_path], stdin="")
not_after = datetime.strptime(out['not_after'], '%Y-%m-%dT%H:%M:%SZ').replace(tzinfo=timezone.utc)
until = not_after - datetime.now(timezone.utc)
if until.days < 30:
return True
return False
def ensure(self):
if self.key_exists and self.cert_exists and not self.cert_expires_soon:
return
key = None
if not self.key_exists:
logger.info("{}: Generating key...".format(self))
key, csr = self.ca.gen_key(self.hosts, o=self.o, ou=self.ou, save=self.key)
else:
logger.info("{}: Renewing certificate...".format(self))
# Use already existing key
csr = self.ca.gen_csr(self.key_path, self.hosts, o=self.o, ou=self.ou)
self.ca.sign(csr, save=self.cert, profile=self.profile)
def upload(self, c, remote_cert, remote_key, concat_ca=False):
logger.info("Uploading Cert {} to {} & {}".format(self, remote_cert, remote_key))
if concat_ca:
f = StringIO(self.cert_data + self.ca.cert_data)
c.put(local=f, remote=remote_cert)
else:
c.put(local=self.cert_path, remote=remote_cert)
c.put(local=self.key_path, remote=remote_key)
def upload_pki(self, c, pki, concat_ca=False):
self.upload(c, pki['cert'], pki['key'], concat_ca)

View File

@ -0,0 +1,12 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "go_default_library",
srcs = [
"certs.go",
"generator.go",
"x509.go",
],
importpath = "code.hackerspace.pl/hscloud/cluster/clustercfg/certs",
visibility = ["//visibility:public"],
)

View File

@ -0,0 +1,288 @@
package certs
import (
"net"
"time"
)
// Certificates is the set of certificates required to run our Kubernetes
// production.
type Certificates struct {
CAs CAs
ProdviderIntermediateCA *Certificate
Global Global
PerNode map[string]PerNode
}
type ensurer interface {
Ensure() error
}
// Ensure checks that all the Kubernetes production certificates and keys are
// present on disk, generating them as necessary.
//
// If the user has not decrypted cluster/secrets, an error will be returned.
// However, deeper sync checks are not currently performed.
func (c *Certificates) Ensure() error {
sub := []ensurer{
&c.CAs,
c.ProdviderIntermediateCA,
&c.Global,
}
for _, pn := range c.PerNode {
sub = append(sub, &pn)
}
for _, s := range sub {
if err := s.Ensure(); err != nil {
return err
}
}
return nil
}
// CAs are the root certificate authorities we use.
type CAs struct {
// EtcdPeer is used by etcd member nodes to authenticate peers.
EtcdPeer *Certificate
// Etcd is used by etcd membrer nodes to authenticate clients (ie.
// kube-apiservers).
Etcd *Certificate
// Kube is the main Kubernetes 'identity' CA, used to identify components
// and users.
Kube *Certificate
// KubeFront is the proxy/aggregation CA used by external apiservices to
// authenticate incoming apiserver connections.
KubeFront *Certificate
// Admitomatic is the CA used by the admitomatic webhook to authenticate
// incoming apiserver connections.
Admitomatic *Certificate
}
func (c *CAs) Ensure() error {
sub := []ensurer{
c.EtcdPeer,
c.Etcd,
c.Kube,
c.KubeFront,
c.Admitomatic,
}
for _, s := range sub {
if err := s.Ensure(); err != nil {
return err
}
}
return nil
}
// Global are all the non-per-node certificates we use.
type Global struct {
// EtcdKube is used by kubernetes apiservers to authenticate to etcd
// members.
EtcdKube *Certificate
// KubeApiserver is used by kubernetes apiservers to authenticate to other
// kubernetes components/users.
KubeApiserver *Certificate
// KubeControllerManager is used by kubernetes controller managers to
// authenticate to the kubernetes apiservers.
KubeControllerManager *Certificate
// KubeScheduler is used by the kubernetes schedulers to authenticate to
// the kubernetes apiservers.
KubeScheduler *Certificate
// KubefrontApiserver is used by the kubernetes apiserver to authenticate
// to external apiservices.
KubefrontApiserver *Certificate
// AdmitomaticWebhook is used by the admitomatic webhook to authenticate to
// the Kubernetes apiservers.
AdmitomaticWebhook *Certificate
}
func (g *Global) Ensure() error {
sub := []ensurer{
g.EtcdKube,
g.KubeApiserver,
g.KubeControllerManager,
g.KubeScheduler,
g.KubefrontApiserver,
g.AdmitomaticWebhook,
}
for _, s := range sub {
if err := s.Ensure(); err != nil {
return err
}
}
return nil
}
func (c *Certificates) MakeKubeEmergencyCreds(root, breadcrumb string) *Certificate {
return &Certificate{
name: "emergency",
duration: 7 * 24 * time.Hour,
root: root,
kind: kindClient,
cn: "admin",
san: []string{"admin", breadcrumb},
o: "system:masters",
issuer: c.CAs.Kube,
}
}
// Per node are all the per-node certificates we use.
type PerNode struct {
// EtcdPeer is used by etcd members to authenticate to other etcd members.
EtcdPeer *Certificate
// EtcdClient is used by etcd members to authenticate to their clients.
EtcdClient *Certificate
// Kubelet is used by kubelets to authenticate to other kubernetes
// components.
Kubelet *Certificate
}
func (p *PerNode) Ensure() error {
sub := []ensurer{
p.EtcdPeer,
p.EtcdClient,
p.Kubelet,
}
for _, s := range sub {
if err := s.Ensure(); err != nil {
return err
}
}
return nil
}
func mkCA(root, name, cn string) *Certificate {
return &Certificate{
name: name,
root: root,
kind: kindCA,
cn: cn,
}
}
// Prepare builds our Certificates structure at a given location on the
// filesystem, for the given nodes.
//
// Calling Ensure() on the returned Certificates will actually engage
// generation logic. Before that, no disk accesses are performed.
func Prepare(root string, fqdns []string) Certificates {
certs := Certificates{
CAs: CAs{
EtcdPeer: mkCA(root, "ca-etcdpeer", "etcd peer ca"),
Etcd: mkCA(root, "ca-etcd", "etcd ca"),
Kube: mkCA(root, "ca-kube", "kubernetes main CA"),
KubeFront: mkCA(root, "ca-kubefront", "kubernetes frontend CA"),
Admitomatic: mkCA(root, "ca-admitomatic", "admitomatic webhook CA"),
},
PerNode: make(map[string]PerNode),
}
certs.ProdviderIntermediateCA = &Certificate{
name: "ca-kube-prodvider",
root: root,
kind: kindProdvider,
cn: "kubernetes prodvider intermediate",
issuer: certs.CAs.Kube,
}
certs.Global = Global{
EtcdKube: &Certificate{
name: "etcd-kube",
root: root,
kind: kindClient,
cn: "kube etcd client certificate",
san: []string{"kube"},
issuer: certs.CAs.Etcd,
},
KubeApiserver: &Certificate{
name: "kube-apiserver",
root: root,
kind: kindClientServer,
cn: "k0.hswaw.net",
san: []string{
"k0.hswaw.net",
"kubernetes.default.svc.k0.hswaw.net",
},
ips: []net.IP{
{10, 10, 12, 1},
},
issuer: certs.CAs.Kube,
},
KubeControllerManager: &Certificate{
name: "kube-controllermanager",
root: root,
kind: kindClientServer,
cn: "system:kube-controller-manager",
san: []string{"system:kube-controller-manager"},
o: "system:kube-controller-manager",
issuer: certs.CAs.Kube,
},
KubeScheduler: &Certificate{
name: "kube-scheduler",
root: root,
kind: kindClientServer,
cn: "system:kube-scheduler",
san: []string{"system:kube-scheduler"},
o: "system:kube-scheduler",
issuer: certs.CAs.Kube,
},
KubefrontApiserver: &Certificate{
name: "kubefront-apiserver",
root: root,
kind: kindClientServer,
cn: "Kubernetes Frontend",
san: []string{"apiserver"},
issuer: certs.CAs.KubeFront,
},
AdmitomaticWebhook: &Certificate{
name: "admitomatic-webhook",
root: root,
kind: kindServer,
cn: "Admitomatic Webhook",
san: []string{"admitomatic.admitomatic.svc"},
issuer: certs.CAs.Admitomatic,
},
}
for _, fqdn := range fqdns {
certs.PerNode[fqdn] = PerNode{
EtcdPeer: &Certificate{
name: "etcdpeer-" + fqdn,
root: root,
kind: kindClientServer,
cn: "node etcd peer certificate",
san: []string{fqdn},
issuer: certs.CAs.EtcdPeer,
},
EtcdClient: &Certificate{
name: "etcd-" + fqdn,
root: root,
// etcd seems to need client too, as it's connecting to itself
// for... some reason?
// https://github.com/etcd-io/etcd/issues/9785
kind: kindClientServer,
cn: "node etcd server certificate",
san: []string{fqdn},
issuer: certs.CAs.Etcd,
},
Kubelet: &Certificate{
name: "kube-kubelet-" + fqdn,
root: root,
kind: kindClientServer,
cn: "system:node:" + fqdn,
o: "system:nodes",
san: []string{
"system:node:" + fqdn,
fqdn,
},
issuer: certs.CAs.Kube,
},
}
}
return certs
}

View File

@ -0,0 +1,297 @@
package certs
import (
"bytes"
"crypto"
"crypto/ed25519"
"crypto/rand"
"crypto/x509"
"encoding/pem"
"errors"
"fmt"
"log"
"math/big"
"net"
"os"
"path/filepath"
"time"
)
// Certificate is a higher-level descriptor of an intent to generate a
// certificate and corresponding Ed25519 keypair on disk.
type Certificate struct {
// uniquer name for this cert, used to calculate filesystem paths.
name string
// root directory where all certs are stored.
root string
// duration used to determine TimeAfter. If not set, the certificate will
// never expire.
duration time.Duration
kind certificateKind
// cn is the subject common name that's going to be produced in the X.509
// certificate.
cn string
// o is the subject organziation that's going to be produced in the X.509
// certificate.
o string
// san are the DNS alternate names that are going to be produced in the
// X.509 certificate.
san []string
// ips are the IP alternate names that are going to be produced in the
// X.509 certificate.
ips []net.IP
// issuer, if set, is the certificate that will sign this certificate. If
// not set, the certificate will be self-signed.
issuer *Certificate
}
// Paths returns local filesystem paths to the CA certificate, certificate and
// key respectively. If the certificate is self signed, the CA path returned
// will be empty. These files might or might not live on the file system - you
// should first call Ensure to make sure they do.
func (c *Certificate) Paths() (caPath, certPath, keyPath string) {
if c.issuer != nil {
caPath = c.issuer.path(fileKindCert)
}
certPath = c.path(fileKindCert)
keyPath = c.path(fileKindKey)
return
}
type certificateKind string
const (
kindServer certificateKind = "server"
kindClient certificateKind = "client"
kindClientServer certificateKind = "client-server"
kindCA certificateKind = "ca"
kindProdvider certificateKind = "prodvider"
)
type fileKind string
const (
fileKindKey fileKind = "key"
fileKindKeyEncrypted fileKind = "key-encrypted"
fileKindCert fileKind = "cert"
)
// path returns the path to the generated fileKind for this Certificate.
func (c *Certificate) path(k fileKind) string {
switch k {
case fileKindKeyEncrypted:
return filepath.Join(c.root, "secrets", "cipher", c.name+".key")
case fileKindKey:
return filepath.Join(c.root, "secrets", "plain", c.name+".key")
case fileKindCert:
// clustercfg.py compat: CA certs end in .crt, non-CA certs end in .cert.
// We're keeping this accidental convention to avoid spurious nix rebuilds
// when migrating.
//
// Feel free to fix it if it annoys you.
extension := ".cert"
if c.kind == kindCA {
extension = ".crt"
}
return filepath.Join(c.root, "certs", c.name+extension)
default:
panic("unexpected file kind type " + k)
}
}
// ensureKey loads or generates-then-saves the private key for this
// Certificate.
func (c *Certificate) ensureKey() (crypto.Signer, error) {
path := c.path(fileKindKey)
_, err := os.Stat(path)
switch {
case err == nil:
return c.loadKey()
case errors.Is(err, os.ErrNotExist):
epath := c.path(fileKindKeyEncrypted)
if _, err = os.Stat(epath); err == nil {
return nil, fmt.Errorf("plaintext key at %q not found, but exists encrypted at %q - please decrypt using secretstore", path, epath)
}
return c.generateKey()
default:
return nil, fmt.Errorf("could not read key: %w", err)
}
}
func (c *Certificate) loadKey() (crypto.Signer, error) {
path := c.path(fileKindKey)
bytes, err := os.ReadFile(path)
if err != nil {
return nil, err
}
block, _ := pem.Decode(bytes)
if block == nil {
return nil, fmt.Errorf("no PEM block found")
}
if block.Type != "PRIVATE KEY" {
return nil, fmt.Errorf("unexpected PEM block: %q", block.Type)
}
key, err := x509.ParsePKCS8PrivateKey(block.Bytes)
if err != nil {
return nil, err
}
if k, ok := key.(ed25519.PrivateKey); ok {
return k, nil
}
return nil, fmt.Errorf("not an ED25519 key")
}
func (c *Certificate) generateKey() (crypto.Signer, error) {
_, priv, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
return nil, err
}
pkcs8, err := x509.MarshalPKCS8PrivateKey(priv)
if err != nil {
return nil, err
}
block := pem.EncodeToMemory(&pem.Block{Type: "PRIVATE KEY", Bytes: pkcs8})
path := c.path(fileKindKey)
os.MkdirAll(filepath.Dir(path), 0700)
log.Printf("Saving %s key to %s ...", c.name, path)
if err := os.WriteFile(path, block, 0600); err != nil {
return nil, err
}
return priv, nil
}
// ensureCert loads or generates-then-saves the X.509 certificate for the
// Certificate.
func (c *Certificate) ensureCert() (*x509.Certificate, error) {
path := c.path(fileKindCert)
_, err := os.Stat(path)
switch {
case err == nil:
cert, err := c.loadCert()
switch err {
case nil:
return cert, nil
case errExpired:
return c.generateCert()
default:
return nil, err
}
case errors.Is(err, os.ErrNotExist):
return c.generateCert()
default:
return nil, fmt.Errorf("could not read cert: %w", err)
}
}
func (c *Certificate) generateCert() (*x509.Certificate, error) {
serialNumberLimit := new(big.Int).Lsh(big.NewInt(1), 127)
serialNumber, err := rand.Int(rand.Reader, serialNumberLimit)
if err != nil {
return nil, err
}
notAfter := unknownNotAfter
if c.duration != 0 {
notAfter = time.Now().Add(c.duration)
}
template := c.template()
template.SerialNumber = serialNumber
template.NotBefore = time.Now()
template.NotAfter = notAfter
parent := template
skey, err := c.ensureKey()
if err != nil {
return nil, fmt.Errorf("when ensuring key: %w", err)
}
pkey := skey.Public()
caskey := skey
if c.issuer != nil {
caskey, err = c.issuer.ensureKey()
if err != nil {
return nil, fmt.Errorf("when ensuring CA key: %w", err)
}
cacert, err := c.issuer.ensureCert()
if err != nil {
return nil, fmt.Errorf("when ensuring CA cert: %w", err)
}
parent = cacert
}
bytes, err := x509.CreateCertificate(rand.Reader, template, parent, pkey, caskey)
if err != nil {
return nil, fmt.Errorf("issuing certificate failed: %w", err)
}
block := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: bytes})
path := c.path(fileKindCert)
os.MkdirAll(filepath.Dir(path), 0700)
log.Printf("Saving %s cert to %s ...", c.name, path)
if err := os.WriteFile(path, block, 0600); err != nil {
return nil, err
}
return x509.ParseCertificate(bytes)
}
// errExpired is returned if the cert exists on disk but has (nearly) expired.
var errExpired = errors.New("certificate expired")
func (c *Certificate) loadCert() (*x509.Certificate, error) {
path := c.path(fileKindCert)
b, err := os.ReadFile(path)
if err != nil {
return nil, err
}
block, _ := pem.Decode(b)
if block == nil {
return nil, fmt.Errorf("no PEM block found")
}
if block.Type != "CERTIFICATE" {
return nil, fmt.Errorf("unexpected PEM block: %q", block.Type)
}
cert, err := x509.ParseCertificate(block.Bytes)
if err != nil {
return nil, err
}
if time.Now().Add(time.Hour).After(cert.NotAfter) {
return nil, errExpired
}
pkey, ok := cert.PublicKey.(ed25519.PublicKey)
if !ok {
return nil, fmt.Errorf("not a ED25519 cert")
}
skey, err := c.ensureKey()
if err != nil {
return nil, fmt.Errorf("when ensuring key: %w", err)
}
if !bytes.Equal(pkey, skey.Public().(ed25519.PublicKey)) {
return nil, fmt.Errorf("issued for different key")
}
template := c.template()
if err := compareCertData(template, cert); err != nil {
return nil, err
}
return cert, nil
}
// Ensure makes sure the given Certificate (and all of its' issuers) have
// corresponding private keys and X.509 certificates on disk, generating things
// as necessary.
func (c *Certificate) Ensure() error {
cert, err := c.ensureCert()
if err != nil {
return fmt.Errorf("when ensuring cert %s: %w", c.name, err)
}
_ = cert
return nil
}

View File

@ -0,0 +1,98 @@
package certs
import (
"bytes"
"crypto/x509"
"crypto/x509/pkix"
"fmt"
"strings"
"time"
)
var (
// From RFC 5280 Section 4.1.2.5
unknownNotAfter = time.Unix(253402300799, 0)
)
// compareCertData returns an error if any of the 'important' bits of the two
// certificates differ. Those are the bits that we template ourselves, and that
// are not issue-dependent (ie. not time or serial or kid or ...).
func compareCertData(template, cert *x509.Certificate) error {
if want, got := template.Subject.String(), cert.Subject.String(); want != got {
return fmt.Errorf("issued for different subject, wanted %s, got %s", want, got)
}
if want, got := strings.Join(template.DNSNames, ","), strings.Join(cert.DNSNames, ","); want != got {
return fmt.Errorf("issued for different DNS names, wanted %s, got %s", want, got)
}
if want, got := len(template.IPAddresses), len(cert.IPAddresses); want != got {
return fmt.Errorf("issued for different IP addresses, wanted %v, got %v", want, got)
} else {
for i := 0; i < len(template.IPAddresses); i++ {
if want, got := template.IPAddresses[i], cert.IPAddresses[i]; !bytes.Equal(want, got) {
return fmt.Errorf("issued for different IP addresses, wanted %v, got %v", want, got)
}
}
}
if want, got := template.KeyUsage, cert.KeyUsage; want != got {
return fmt.Errorf("issued for different key usage, wanted %d, got %d", want, got)
}
if want, got := len(template.ExtKeyUsage), len(cert.ExtKeyUsage); want != got {
return fmt.Errorf("issued for different ext key usage, wanted %v, got %v", want, got)
} else {
for i := 0; i < len(template.ExtKeyUsage); i++ {
if want, got := template.ExtKeyUsage[i], cert.ExtKeyUsage[i]; want != got {
return fmt.Errorf("issued for different ext key usage, wanted %v, got %v", want, got)
}
}
}
if want, got := template.IsCA, cert.IsCA; want != got {
return fmt.Errorf("issued for different IsCA, wanted %v, got %v", want, got)
}
if want, got := template.BasicConstraintsValid, cert.BasicConstraintsValid; want != got {
return fmt.Errorf("issued for different basic constraints valid, wanted %v, got %v", want, got)
}
return nil
}
// template builds an x509 'template' certificate, ie. makes an
// x509.Certificate with all the fields built up from the data contained in
// Certificate, but without any per-issue fields like times, serial number,
// etc.
func (c *Certificate) template() *x509.Certificate {
template := &x509.Certificate{
Subject: pkix.Name{
CommonName: c.cn,
},
DNSNames: c.san,
IPAddresses: c.ips,
}
if c.o != "" {
template.Subject.Organization = []string{c.o}
}
switch c.kind {
case kindServer:
template.KeyUsage = x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment
template.ExtKeyUsage = []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}
template.DNSNames = c.san
case kindClient:
template.KeyUsage = x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment
template.ExtKeyUsage = []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth}
template.DNSNames = c.san
case kindClientServer:
template.KeyUsage = x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment
template.ExtKeyUsage = []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth, x509.ExtKeyUsageServerAuth}
case kindCA:
template.IsCA = true
template.BasicConstraintsValid = true
template.KeyUsage = x509.KeyUsageCertSign | x509.KeyUsageCRLSign | x509.KeyUsageDigitalSignature
template.ExtKeyUsage = []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth, x509.ExtKeyUsageServerAuth, x509.ExtKeyUsageOCSPSigning}
template.AuthorityKeyId = template.SubjectKeyId
case kindProdvider:
template.IsCA = true
template.BasicConstraintsValid = true
template.KeyUsage = x509.KeyUsageCertSign | x509.KeyUsageCRLSign | x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment
template.ExtKeyUsage = []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth, x509.ExtKeyUsageServerAuth, x509.ExtKeyUsageOCSPSigning}
template.AuthorityKeyId = template.SubjectKeyId
}
return template
}

View File

@ -1,221 +0,0 @@
#!/usr/bin/env python
from builtins import object
import datetime
from io import BytesIO
import json
import logging
import os
import tempfile
import subprocess
import sys
from cryptography import x509
from cryptography.hazmat.backends import default_backend
import fabric
from tools import secretstore
from tools.hscloud import lib as hscloud
import ca
local_root = hscloud.workspace_location()
cluster = 'k0.hswaw.net'
ss = secretstore.SecretStore(
plain_root=os.path.join(local_root, 'cluster/secrets/plain'),
cipher_root=os.path.join(local_root, 'cluster/secrets/cipher'))
logger = logging.getLogger()
logger.setLevel(logging.INFO)
formatter = logging.Formatter('%(levelname)s - %(message)s')
sh = logging.StreamHandler()
sh.setFormatter(formatter)
logger.addHandler(sh)
def configure_k8s(username, ca, cert, key):
subprocess.check_call([
'kubectl', 'config',
'set-cluster', 'admin.' + cluster,
'--certificate-authority=' + ca,
'--embed-certs=true',
'--server=https://' + cluster + ':4001',
])
subprocess.check_call([
'kubectl', 'config',
'set-credentials', username,
'--client-certificate=' + cert,
'--client-key=' + key,
'--embed-certs=true',
])
subprocess.check_call([
'kubectl', 'config',
'set-context', 'admin.' + cluster,
'--cluster=' + 'admin.' + cluster,
'--user=' + username,
])
subprocess.check_call([
'kubectl', 'config',
'use-context', 'admin.' + cluster,
])
def admincreds(args):
if len(args) != 1:
sys.stderr.write("Usage: admincreds q3k\n")
return 1
username = args[0]
print("")
print("WARNING WARNING WARNING WARNING WARNING WARNING")
print("===============================================")
print("")
print("You are requesting ADMIN credentials.")
print("")
print("You likely shouldn't be doing this, and")
print("instead should be using `prodaccess`.")
print("")
print("===============================================")
print("WARNING WARNING WARNING WARNING WARNING WARNING")
print("")
## Make kube certificates.
certs_root = os.path.join(local_root, 'cluster/certs')
ca_kube = ca.CA(ss, certs_root, 'kube', 'kubernetes main CA')
local_key = os.path.join(local_root, '.kubectl/admin.key')
local_crt = os.path.join(local_root, '.kubectl/admin.crt')
kubectl = os.path.join(local_root, '.kubectl')
if not os.path.exists(kubectl):
os.mkdir(kubectl)
generate_cert = False
if not os.path.exists(local_key):
generate_cert = True
if os.path.exists(local_crt):
with open(local_crt, 'rb') as f:
b = f.read()
cert = x509.load_pem_x509_certificate(b, default_backend())
delta = cert.not_valid_after - datetime.datetime.now()
logger.info("admin: existing cert expiry: {}".format(delta))
if delta.total_seconds() < 3600 * 24:
logger.info("admin: expires soon, regenerating")
generate_cert = True
else:
generate_cert = True
if not generate_cert:
return configure_k8s(username, ca_kube._cert, local_crt, local_key)
key, csr = ca_kube.gen_key(hosts=['admin', username], o='system:masters', ou='Kube Admin Account')
crt = ca_kube.sign(csr)
with open(local_key, 'w') as f:
f.write(key)
with open(local_crt, 'w') as f:
f.write(crt)
configure_k8s(username, ca_kube._cert, local_crt, local_key)
def nodestrap(args, nocerts=False):
if len(args) != 1:
sys.stderr.write("Usage: nodestrap bc01n01.hswaw.net\n")
return 1
fqdn = args[0]
logger.info("Nodestrapping {}...".format(fqdn))
r = fabric.Connection('root@{}'.format(fqdn))
if not nocerts:
certs_root = os.path.join(local_root, 'cluster/certs')
# Make etcd peer certificate for node.
ca_etcd_peer = ca.CA(ss, certs_root, 'etcdpeer', 'etcd peer ca')
ca_etcd_peer.make_cert('etcdpeer-{}'.format(fqdn), hosts=[fqdn], ou='node etcd peer certificate')
# Make etcd server certificate for node and client certificate for kube.
ca_etcd = ca.CA(ss, certs_root, 'etcd', 'etcd ca')
ca_etcd.make_cert('etcd-{}'.format(fqdn), hosts=[fqdn], ou='node etcd server certificate')
ca_etcd.make_cert('etcd-kube', hosts=['kube'], ou='kube etcd client certificate')
ca_etcd.make_cert('etcd-root', hosts=['root'], ou='root etcd client certificate')
ca_etcd.make_cert('etcd-calico', hosts=['calico'], ou='root etcd client certificate')
## Make kube certificates.
ca_kube = ca.CA(ss, certs_root, 'kube', 'kubernetes main CA')
# Make prodvider intermediate CA.
ca_kube.make_cert('ca-kube-prodvider', o='Warsaw Hackerspace', ou='kubernetes prodvider intermediate', hosts=['kubernetes prodvider intermediate CA'], profile='intermediate').ensure()
# Make kubelet certificate (per node).
ca_kube.make_cert('kube-kubelet-'+fqdn, o='system:nodes', ou='Kubelet', hosts=['system:node:'+fqdn, fqdn])
# Make apiserver certificate.
ca_kube.make_cert('kube-apiserver', ou='Kubernetes API', hosts=[cluster, 'kubernetes.default.svc.'+cluster, '10.10.12.1'])
# Make service accounts decryption key (as cert for consistency).
ca_kube.make_cert('kube-serviceaccounts', ou='Kubernetes Service Accounts Signer', hosts=['serviceaccounts'])
# Make kube component certificates.
kube_components = ['controllermanager', 'scheduler', 'proxy']
for k in kube_components:
# meh
if k == 'controllermanager':
o = 'system:kube-controller-manager'
else:
o = 'system:kube-'+k
ou = 'Kubernetes Component '+k
c = ca_kube.make_cert('kube-'+k, ou=ou, o=o, hosts=[o,])
## Make kubefront certificates.
ca_kubefront = ca.CA(ss, certs_root, 'kubefront', 'kubernetes frontend CA')
ca_kubefront.make_cert('kubefront-apiserver', ou='Kubernetes Frontend', hosts=['apiserver'])
## Make admitomatic (admission controller) certificates.
ca_admitomatic = ca.CA(ss, certs_root, 'admitomatic', 'admitomatic webhook CA')
ca_admitomatic.make_cert('admitomatic-webhook', ou='Admitomatic Webhook', hosts=['admitomatic.admitomatic.svc'])
toplevel = subprocess.check_output([
"nix-build",
local_root,
"-A", "ops.machines.\"" + fqdn + "\".config.passthru.hscloud.provision",
]).decode().strip()
subprocess.check_call([toplevel])
def usage():
sys.stderr.write("Usage: clustercfg <nodestrap|admincreds>\n")
def main():
if len(sys.argv) < 2:
usage()
return 1
mode = sys.argv[1]
if mode == "nodestrap":
return nodestrap(sys.argv[2:])
elif mode == "nodestrap-nocerts":
return nodestrap(sys.argv[2:], nocerts=True)
elif mode == "admincreds":
return admincreds(sys.argv[2:])
elif mode == "smoketest":
sys.stdout.write("Smoke test passed.")
return 0
else:
usage()
return 1
if __name__ == '__main__':
sys.exit(main() or 0)

View File

@ -0,0 +1,109 @@
package main
import (
"fmt"
"log"
"os"
"os/user"
"path/filepath"
"github.com/spf13/cobra"
"k8s.io/client-go/tools/clientcmd"
clientapi "k8s.io/client-go/tools/clientcmd/api"
"code.hackerspace.pl/hscloud/cluster/clustercfg/certs"
"code.hackerspace.pl/hscloud/go/workspace"
)
var admincredsCmd = &cobra.Command{
Use: "admincreds",
Short: "Acquire emergency Kubernetes credentials",
Long: `
Use secretstore secrets to generate a Kubernetes system:masters keypair and
certificate. Only for use in emergencies.
Your local username and hostname will make part of the cert and can be used
for auditing of accesses to apiservers.
`,
Run: func(cmd *cobra.Command, args []string) {
ws, err := workspace.Get()
if err != nil {
log.Fatalf("Could not figure out workspace: %v", err)
}
uname := "UNKNOWN"
if u, err := user.Current(); err == nil {
uname = u.Username
}
hostname := "UNKNOWN"
if h, err := os.Hostname(); err == nil {
hostname = h
}
breadcrumb := fmt.Sprintf("%s@%s", uname, hostname)
root := filepath.Join(ws, "cluster")
path := filepath.Join(ws, ".kubectl", "admincreds")
c := certs.Prepare(root, nil)
creds := c.MakeKubeEmergencyCreds(path, breadcrumb)
_ = creds
log.Printf("")
log.Printf("WARNING WARNING WARNING WARNING WARNING WARNING")
log.Printf("===============================================")
log.Printf("")
log.Printf("You are requesting ADMIN credentials.")
log.Printf("")
log.Printf("You likely shouldn't be doing this, and")
log.Printf("instead should be using `prodaccess`.")
log.Printf("")
log.Printf("===============================================")
log.Printf("WARNING WARNING WARNING WARNING WARNING WARNING")
log.Printf("")
log.Printf("Issuing certs...")
if err := creds.Ensure(); err != nil {
log.Fatalf("Failed: %v", err)
}
log.Printf("Configuring kubectl...")
caPath, certPath, keyPath := creds.Paths()
if err := installKubeletConfig(caPath, certPath, keyPath, "emergency.k0"); err != nil {
log.Fatalf("Failed: %v", err)
}
log.Fatalf("Done. Use kubectl --context=emergency.k0")
},
}
func installKubeletConfig(caPath, certPath, keyPath, configName string) error {
ca := clientcmd.NewDefaultPathOptions()
config, err := ca.GetStartingConfig()
if err != nil {
return fmt.Errorf("getting initial config failed: %w", err)
}
config.AuthInfos[configName] = &clientapi.AuthInfo{
ClientCertificate: certPath,
ClientKey: keyPath,
}
config.Clusters[configName] = &clientapi.Cluster{
CertificateAuthority: caPath,
Server: "https://k0.hswaw.net:4001",
}
config.Contexts[configName] = &clientapi.Context{
AuthInfo: configName,
Cluster: configName,
Namespace: "default",
}
if err := clientcmd.ModifyConfig(ca, *config, true); err != nil {
return fmt.Errorf("modifying config failed: %w", err)
}
return nil
}
func init() {
rootCmd.AddCommand(admincredsCmd)
}

View File

@ -0,0 +1,63 @@
package main
import (
"log"
"path/filepath"
"strings"
"github.com/spf13/cobra"
"code.hackerspace.pl/hscloud/cluster/clustercfg/certs"
"code.hackerspace.pl/hscloud/go/workspace"
)
var flagFQDNs []string
var gencertsCmd = &cobra.Command{
Use: "gencerts",
Short: "(re)generate keys/certs for k0 cluster",
Long: `
If you're adding a new cluster node, run this. It will populate //cluster/secrets
and //cluster/certificates with new certs/keys.
By default, the nodes to generate certificates for are automatically discovered
by querying the local Nix machines defined in //ops, looking for anything that
has hscloud.kube.controller.enabled. That can be slow and/or incorrect. To override
node names, set --fqdn (either comma-separate them or repeat flags).
`,
Run: func(cmd *cobra.Command, args []string) {
ws, err := workspace.Get()
if err != nil {
log.Fatalf("Could not figure out workspace: %v", err)
}
path := filepath.Join(ws, "cluster")
fqdns := flagFQDNs
if len(fqdns) == 0 {
log.Printf("--fqdn not set, figuring out machines from Nix...")
err = workspace.EvalHscloudNix(cmd.Context(), &fqdns, "ops.exports.kubeMachineNames")
if err != nil {
log.Fatalf("Could not figure out Kubernetes machine FQDNs: %v", err)
}
}
for _, fqdn := range fqdns {
parts := strings.Split(fqdn, ".")
if len(parts) != 3 || parts[1] != "hswaw" || parts[2] != "net" {
log.Fatalf("Invalid FQDN %q: must be xxx.hswaw.net.", fqdn)
}
}
log.Printf("Machines: --fqdn %s", strings.Join(fqdns, ","))
c := certs.Prepare(path, fqdns)
if err := c.Ensure(); err != nil {
log.Fatalf("Failed: %v", err)
}
log.Printf("Done.")
},
}
func init() {
gencertsCmd.Flags().StringSliceVar(&flagFQDNs, "fqdn", nil, "List of machine FQDNs to generate certs for. If not set, will be automatically figured out from Nix modules in local checkout (slow).")
rootCmd.AddCommand(gencertsCmd)
}

View File

@ -0,0 +1,20 @@
package main
import (
"fmt"
"os"
"github.com/spf13/cobra"
)
var rootCmd = &cobra.Command{
Use: "clustercfg",
Short: "admin management tool for k0 cluster",
}
func main() {
if err := rootCmd.Execute(); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}

View File

@ -29,8 +29,9 @@ Provisioning nodes
- bring up a new node with nixos, the configuration doesn't matter and will be
nuked anyway
- edit cluster/nix/defs-machines.nix
- `bazel run //cluster/clustercfg nodestrap bc01nXX.hswaw.net`
- add machine to cluster/machines and ops/machines.nix
- generate certs with `bazel run //cluster/clustercfg gencerts`
- deploy using ops (see ops/README.md)
Applying kubecfg state
----------------------

14
ops/exports.nix Normal file
View File

@ -0,0 +1,14 @@
{ hscloud, pkgs, hscloudForPkgs, ... }:
{
# Used by clustercfg to figure out which machines need kube certs.
kubeMachineNames = let
isKubeMachine = n: value:
n != "__readTree" &&
(builtins.hasAttr "hscloud" value.options) &&
(builtins.hasAttr "kube" value.options.hscloud) &&
value.options.hscloud.kube.control.enable.value;
machines = pkgs.lib.filterAttrs isKubeMachine hscloud.ops.machines;
names = pkgs.lib.mapAttrsToList (name: _: name) machines;
in names;
}

View File

@ -75,7 +75,7 @@ local kube = import "../../../kube/kube.libsonnet";
//
// When contacting the API server, we hardcode the 'hswaw.net' DNS suffix as
// our API server's TLS certificate only has a CN/SAN for its full FQDN, not
// the .svc.cluster.local shorthand (see //cluster/clustercfg:clustercfg.py).
// the .svc.cluster.local shorthand (see //cluster/clustercfg).
local kubeScrapeNodeMetrics = function(name, path) kubeScrapeConfig(name, "node") {
relabel_configs: [
{