1
0
Fork 0
hscloud/cluster/kube/k0.libsonnet

384 lines
17 KiB
Plaintext
Raw Normal View History

// k0.hswaw.net kubernetes cluster
// This defines the cluster as a single object.
// Use the sibling k0*.jsonnet 'view' files to actually apply the configuration.
2019-01-13 21:06:33 +00:00
local kube = import "../../kube/kube.libsonnet";
local policies = import "../../kube/policies.libsonnet";
2019-06-20 22:24:09 +00:00
local cluster = import "cluster.libsonnet";
local admitomatic = import "lib/admitomatic.libsonnet";
2019-06-20 22:24:09 +00:00
local cockroachdb = import "lib/cockroachdb.libsonnet";
local registry = import "lib/registry.libsonnet";
2019-04-01 16:40:50 +00:00
local rook = import "lib/rook.libsonnet";
2019-01-13 23:02:59 +00:00
{
k0: {
local k0 = self,
cluster: cluster.Cluster("k0", "hswaw.net") {
cfg+: {
storageClassNameParanoid: k0.ceph.waw3Pools.blockRedundant.name,
},
metallb+: {
cfg+: {
k0.hswaw.net: pass metallb through Calico Previously, we had the following setup: .-----------. | ..... | .-----------.-| | dcr01s24 | | .-----------.-| | | dcr01s22 | | | .---|-----------| |-' .--------. | |---------. | | | dcsw01 | <----- | metallb | |-' '--------' |---------' | '-----------' Ie., each metallb on each node directly talked to dcsw01 over BGP to announce ExternalIPs to our L3 fabric. Now, we rejigger the configuration to instead have Calico's BIRD instances talk BGP to dcsw01, and have metallb talk locally to Calico. .-------------------------. | dcr01s24 | |-------------------------| .--------. |---------. .---------. | | dcsw01 | <----- | Calico |<--| metallb | | '--------' |---------' '---------' | '-------------------------' This makes Calico announce our pod/service networks into our L3 fabric! Calico and metallb talk to eachother over 127.0.0.1 (they both run with Host Networking), but that requires one side to flip to pasive mode. We chose to do that with Calico, by overriding its BIRD config and special-casing any 127.0.0.1 peer to enable passive mode. We also override Calico's Other Bird Template (bird_ipam.cfg) to fiddle with the kernel programming filter (ie. to-kernel-routing-table filter), where we disable programming unreachable routes. This is because routes coming from metallb have their next-hop set to 127.0.0.1, which makes bird mark them as unreachable. Unreachable routes in the kernel will break local access to ExternalIPs, eg. register access from containerd. All routes pass through without route reflectors and a full mesh as we use eBGP over private ASNs in our fabric. We also have to make Calico aware of metallb pools - otherwise, routes announced by metallb end up being filtered by Calico. This is all mildly hacky. Here's hoping that Calico will be able to some day gain metallb-like functionality, ie. IPAM for externalIPs/LoadBalancers/... There seems to be however one problem with this change (but I'm not fixing it yet as it's not critical): metallb would previously only announce IPs from nodes that were serving that service. Now, however, the Calico internal mesh makes those appear from every node. This can probably be fixed by disabling local meshing, enabling route reflection on dcsw01 (to recreate the mesh routing through dcsw01). Or, maybe by some more hacking of the Calico BIRD config :/. Change-Id: I3df1f6ae7fa1911dd53956ced3b073581ef0e836
2020-09-20 22:52:57 +00:00
// Peer with calico running on same node.
peers: [
{
k0.hswaw.net: pass metallb through Calico Previously, we had the following setup: .-----------. | ..... | .-----------.-| | dcr01s24 | | .-----------.-| | | dcr01s22 | | | .---|-----------| |-' .--------. | |---------. | | | dcsw01 | <----- | metallb | |-' '--------' |---------' | '-----------' Ie., each metallb on each node directly talked to dcsw01 over BGP to announce ExternalIPs to our L3 fabric. Now, we rejigger the configuration to instead have Calico's BIRD instances talk BGP to dcsw01, and have metallb talk locally to Calico. .-------------------------. | dcr01s24 | |-------------------------| .--------. |---------. .---------. | | dcsw01 | <----- | Calico |<--| metallb | | '--------' |---------' '---------' | '-------------------------' This makes Calico announce our pod/service networks into our L3 fabric! Calico and metallb talk to eachother over 127.0.0.1 (they both run with Host Networking), but that requires one side to flip to pasive mode. We chose to do that with Calico, by overriding its BIRD config and special-casing any 127.0.0.1 peer to enable passive mode. We also override Calico's Other Bird Template (bird_ipam.cfg) to fiddle with the kernel programming filter (ie. to-kernel-routing-table filter), where we disable programming unreachable routes. This is because routes coming from metallb have their next-hop set to 127.0.0.1, which makes bird mark them as unreachable. Unreachable routes in the kernel will break local access to ExternalIPs, eg. register access from containerd. All routes pass through without route reflectors and a full mesh as we use eBGP over private ASNs in our fabric. We also have to make Calico aware of metallb pools - otherwise, routes announced by metallb end up being filtered by Calico. This is all mildly hacky. Here's hoping that Calico will be able to some day gain metallb-like functionality, ie. IPAM for externalIPs/LoadBalancers/... There seems to be however one problem with this change (but I'm not fixing it yet as it's not critical): metallb would previously only announce IPs from nodes that were serving that service. Now, however, the Calico internal mesh makes those appear from every node. This can probably be fixed by disabling local meshing, enabling route reflection on dcsw01 (to recreate the mesh routing through dcsw01). Or, maybe by some more hacking of the Calico BIRD config :/. Change-Id: I3df1f6ae7fa1911dd53956ced3b073581ef0e836
2020-09-20 22:52:57 +00:00
"peer-address": "127.0.0.1",
"peer-asn": 65003,
"my-asn": 65002,
},
],
k0.hswaw.net: pass metallb through Calico Previously, we had the following setup: .-----------. | ..... | .-----------.-| | dcr01s24 | | .-----------.-| | | dcr01s22 | | | .---|-----------| |-' .--------. | |---------. | | | dcsw01 | <----- | metallb | |-' '--------' |---------' | '-----------' Ie., each metallb on each node directly talked to dcsw01 over BGP to announce ExternalIPs to our L3 fabric. Now, we rejigger the configuration to instead have Calico's BIRD instances talk BGP to dcsw01, and have metallb talk locally to Calico. .-------------------------. | dcr01s24 | |-------------------------| .--------. |---------. .---------. | | dcsw01 | <----- | Calico |<--| metallb | | '--------' |---------' '---------' | '-------------------------' This makes Calico announce our pod/service networks into our L3 fabric! Calico and metallb talk to eachother over 127.0.0.1 (they both run with Host Networking), but that requires one side to flip to pasive mode. We chose to do that with Calico, by overriding its BIRD config and special-casing any 127.0.0.1 peer to enable passive mode. We also override Calico's Other Bird Template (bird_ipam.cfg) to fiddle with the kernel programming filter (ie. to-kernel-routing-table filter), where we disable programming unreachable routes. This is because routes coming from metallb have their next-hop set to 127.0.0.1, which makes bird mark them as unreachable. Unreachable routes in the kernel will break local access to ExternalIPs, eg. register access from containerd. All routes pass through without route reflectors and a full mesh as we use eBGP over private ASNs in our fabric. We also have to make Calico aware of metallb pools - otherwise, routes announced by metallb end up being filtered by Calico. This is all mildly hacky. Here's hoping that Calico will be able to some day gain metallb-like functionality, ie. IPAM for externalIPs/LoadBalancers/... There seems to be however one problem with this change (but I'm not fixing it yet as it's not critical): metallb would previously only announce IPs from nodes that were serving that service. Now, however, the Calico internal mesh makes those appear from every node. This can probably be fixed by disabling local meshing, enabling route reflection on dcsw01 (to recreate the mesh routing through dcsw01). Or, maybe by some more hacking of the Calico BIRD config :/. Change-Id: I3df1f6ae7fa1911dd53956ced3b073581ef0e836
2020-09-20 22:52:57 +00:00
// Public IP address pools. Keep in sync with k0.calico.yaml.
addressPools: [
{
name: "public-v4-1",
protocol: "bgp",
addresses: [
"185.236.240.48/28",
],
},
{
name: "public-v4-2",
protocol: "bgp",
addresses: [
"185.236.240.112/28"
],
},
],
},
},
},
2019-06-20 22:24:09 +00:00
// Docker registry
registry: registry.Environment {
cfg+: {
domain: "registry.%s" % [k0.cluster.fqdn],
storageClassName: k0.cluster.cfg.storageClassNameParanoid,
objectStorageName: "waw-hdd-redundant-3-object",
},
},
// CockroachDB, running on bc01n{01,02,03}.
2019-06-20 22:24:09 +00:00
cockroach: {
waw2: cockroachdb.Cluster("crdb-waw1") {
2019-06-20 22:24:09 +00:00
cfg+: {
topology: [
{ name: "bc01n01", node: "bc01n01.hswaw.net" },
{ name: "bc01n02", node: "bc01n02.hswaw.net" },
{ name: "dcr01s22", node: "dcr01s22.hswaw.net" },
2019-06-20 22:24:09 +00:00
],
// Host path on SSD.
hostPath: "/var/db/crdb-waw1",
extraDNS: [
"crdb-waw1.hswaw.net",
],
2019-04-01 22:06:13 +00:00
},
},
clients: {
cccampix: k0.cockroach.waw2.Client("cccampix"),
cccampixDev: k0.cockroach.waw2.Client("cccampix-dev"),
buglessDev: k0.cockroach.waw2.Client("bugless-dev"),
sso: k0.cockroach.waw2.Client("sso"),
herpDev: k0.cockroach.waw2.Client("herp-dev"),
gitea: k0.cockroach.waw2.Client("gitea"),
issues: k0.cockroach.waw2.Client("issues"),
dns: k0.cockroach.waw2.Client("dns"),
},
2019-04-01 22:06:13 +00:00
},
2019-06-20 22:24:09 +00:00
ceph: {
// waw1 cluster - dead as of 2019/08/06, data corruption
// waw2 cluster - dead as of 2021/01/22, torn down (horrible M610 RAID controllers are horrible)
// waw3: 6TB SAS 3.5" HDDs
waw3: rook.Cluster(k0.cluster.rook, "ceph-waw3") {
2019-06-20 22:24:09 +00:00
spec: {
mon: {
count: 1,
allowMultiplePerNode: false,
},
resources: {
osd: {
requests: {
cpu: "2",
memory: "6G",
},
limits: {
cpu: "2",
memory: "8G",
},
},
},
storage: {
useAllNodes: false,
useAllDevices: false,
config: {
databaseSizeMB: "1024",
journalSizeMB: "1024",
},
nodes: [
{
name: "dcr01s22.hswaw.net",
location: "rack=dcr01 host=dcr01s22",
devices: [
// https://github.com/rook/rook/issues/1228
//{ name: "disk/by-id/wwan-0x" + wwan }
//for wwan in [
// "5000c5008508c433",
// "5000c500850989cf",
// "5000c5008508f843",
// "5000c5008508baf7",
//]
{ name: "sdn" },
{ name: "sda" },
{ name: "sdb" },
{ name: "sdc" },
],
},
{
name: "dcr01s24.hswaw.net",
location: "rack=dcr01 host=dcr01s22",
devices: [
// https://github.com/rook/rook/issues/1228
//{ name: "disk/by-id/wwan-0x" + wwan }
//for wwan in [
// "5000c5008508ee03",
// "5000c5008508c9ef",
// "5000c5008508df33",
// "5000c5008508dd3b",
//]
{ name: "sdm" },
{ name: "sda" },
{ name: "sdb" },
{ name: "sdc" },
],
},
],
2019-06-20 22:24:09 +00:00
},
benji:: {
metadataStorageClass: "waw-hdd-redundant-3",
encryptionPassword: std.split((importstr "../secrets/plain/k0-benji-encryption-password"), '\n')[0],
pools: [
"waw-hdd-redundant-3",
"waw-hdd-redundant-3-metadata",
"waw-hdd-yolo-3",
],
s3Configuration: {
awsAccessKeyId: "RPYZIROFXNLQVU2WJ4R3",
awsSecretAccessKey: std.split((importstr "../secrets/plain/k0-benji-secret-access-key"), '\n')[0],
bucketName: "benji-k0-backups-waw3",
endpointUrl: "https://s3.eu-central-1.wasabisys.com/",
},
}
2019-06-20 22:24:09 +00:00
},
2019-04-07 16:49:41 +00:00
},
waw3Pools: {
// redundant block storage
blockRedundant: rook.ECBlockPool(k0.ceph.waw3, "waw-hdd-redundant-3") {
metadataReplicas: 2,
spec: {
2019-06-20 22:24:09 +00:00
failureDomain: "host",
replicated: {
size: 2,
},
2019-06-20 22:24:09 +00:00
},
},
// yolo block storage (low usage, no host redundancy)
blockYolo: rook.ReplicatedBlockPool(k0.ceph.waw3, "waw-hdd-yolo-3") {
spec: {
failureDomain: "osd",
2019-06-20 22:24:09 +00:00
erasureCoded: {
dataChunks: 2,
codingChunks: 1,
},
},
},
// q3k's personal pool, used externally from k8s.
q3kRedundant: rook.ECBlockPool(k0.ceph.waw3, "waw-hdd-redundant-q3k-3") {
metadataReplicas: 2,
spec: {
failureDomain: "host",
replicated: {
size: 2,
},
},
},
objectRedundant: rook.S3ObjectStore(k0.ceph.waw3, "waw-hdd-redundant-3-object") {
spec: {
metadataPool: {
failureDomain: "host",
replicated: { size: 2 },
},
dataPool: {
failureDomain: "host",
replicated: { size: 2 },
2019-06-20 22:24:09 +00:00
},
},
2019-04-07 16:49:41 +00:00
},
},
// Clients for S3/radosgw storage.
clients: {
# Used for owncloud.hackerspace.pl, which for now lives on boston-packets.hackerspace.pl.
nextcloudWaw3: kube.CephObjectStoreUser("nextcloud") {
metadata+: {
namespace: "ceph-waw3",
},
spec: {
store: "waw-hdd-redundant-3-object",
displayName: "nextcloud",
},
},
# issues.hackerspace.pl (redmine) attachments bucket
issuesWaw3: kube.CephObjectStoreUser("issues") {
metadata+: {
namespace: "ceph-waw3",
},
spec: {
store: "waw-hdd-redundant-3-object",
displayName: "issues",
},
},
# nuke@hackerspace.pl's personal storage.
nukePersonalWaw3: kube.CephObjectStoreUser("nuke-personal") {
metadata+: {
namespace: "ceph-waw3",
},
spec: {
store: "waw-hdd-redundant-3-object",
displayName: "nuke-personal",
},
},
# patryk@hackerspace.pl's ArmA3 mod bucket.
cz2ArmaModsWaw3: kube.CephObjectStoreUser("cz2-arma3mods") {
metadata+: {
namespace: "ceph-waw3",
},
spec: {
store: "waw-hdd-redundant-3-object",
displayName: "cz2-arma3mods",
},
},
# Buckets for spark pipelines
# TODO(implr): consider a second yolo-backed one for temp data
implrSparkWaw3: kube.CephObjectStoreUser("implr-spark") {
metadata+: {
namespace: "ceph-waw3",
},
spec: {
store: "waw-hdd-redundant-3-object",
displayName: "implr-spark",
},
},
# q3k's personal user
q3kWaw3: kube.CephObjectStoreUser("q3k") {
metadata+: {
namespace: "ceph-waw3",
},
spec: {
store: "waw-hdd-redundant-3-object",
displayName: "q3k",
},
},
# woju's personal user
wojuWaw3: kube.CephObjectStoreUser("woju") {
metadata+: {
namespace: "ceph-waw3",
},
spec: {
store: "waw-hdd-redundant-3-object",
displayName: "woju",
},
},
# cz3's (patryk@hackerspace.pl) personal user
cz3Waw3: kube.CephObjectStoreUser("cz3") {
metadata+: {
namespace: "ceph-waw3",
},
spec: {
store: "waw-hdd-redundant-3-object",
displayName: "cz3",
},
},
},
},
# These are policies allowing for Insecure pods in some namespaces.
# A lot of them are spurious and come from the fact that we deployed
# these namespaces before we deployed the draconian PodSecurityPolicy
# we have now. This should be fixed by setting up some more granular
# policies, or fixing the workloads to not need some of the permission
# bits they use, whatever those might be.
# TODO(q3k): fix this?
unnecessarilyInsecureNamespaces: [
policies.AllowNamespaceInsecure("ceph-waw3"),
policies.AllowNamespaceInsecure("matrix"),
policies.AllowNamespaceInsecure("registry"),
policies.AllowNamespaceInsecure("internet"),
# TODO(implr): restricted policy with CAP_NET_ADMIN and tuntap, but no full root
policies.AllowNamespaceInsecure("implr-vpn"),
],
# Admission controller that permits non-privileged users to manage
# their namespaces without danger of hijacking important URLs.
admitomatic: admitomatic.Environment {
cfg+: {
proto: {
// Domains allowed in given namespaces. If a domain exists
// anywhere, ingresses will only be permitted to be created
// within namespaces in which it appears here. This works
// the same way for wildcards, if a wildcard exists in this
// list it blocks all unauthorized uses of that domain
// elsewhere.
//
// See //cluster/admitomatic for more information.
//
// Or, tl;dr:
//
// If you do a wildcard CNAME onto the k0 ingress, you
// should explicitly state *.your.name.com here.
//
// If you just want to protect your host from being
// hijacked by other cluster users, you should also state
// it here (either as a wildcard, or unary domains).
allow_domain: [
{ namespace: "covid-formity", dns: "covid19.hackerspace.pl" },
{ namespace: "covid-formity", dns: "covid.hackerspace.pl" },
{ namespace: "covid-formity", dns: "www.covid.hackerspace.pl" },
{ namespace: "devtools-prod", dns: "hackdoc.hackerspace.pl" },
{ namespace: "devtools-prod", dns: "cs.hackerspace.pl" },
{ namespace: "engelsystem-prod", dns: "engelsystem.hackerspace.pl" },
{ namespace: "gerrit", dns: "gerrit.hackerspace.pl" },
{ namespace: "gitea-prod", dns: "gitea.hackerspace.pl" },
{ namespace: "hswaw-prod", dns: "*.hackerspace.pl" },
{ namespace: "internet", dns: "internet.hackerspace.pl" },
{ namespace: "matrix", dns: "matrix.hackerspace.pl" },
{ namespace: "onlyoffice-prod", dns: "office.hackerspace.pl" },
{ namespace: "redmine", dns: "issues.hackerspace.pl" },
{ namespace: "redmine", dns: "b.hackerspace.pl" },
{ namespace: "redmine", dns: "b.hswaw.net" },
{ namespace: "redmine", dns: "xn--137h.hackerspace.pl" },
{ namespace: "redmine", dns: "xn--137h.hswaw.net" },
{ namespace: "speedtest", dns: "speedtest.hackerspace.pl" },
{ namespace: "sso", dns: "sso.hackerspace.pl" },
{ namespace: "ceph-waw3", dns: "ceph-waw3.hswaw.net" },
{ namespace: "ceph-waw3", dns: "object.ceph-waw3.hswaw.net" },
{ namespace: "monitoring-global-k0", dns: "*.hswaw.net" },
{ namespace: "registry", dns: "*.hswaw.net" },
// q3k's legacy namespace (pre-prodvider)
{ namespace: "q3k", dns: "*.q3k.org" },
{ namespace: "personal-q3k", dns: "*.q3k.org" },
],
},
},
},
2019-04-07 16:49:41 +00:00
},
2019-01-13 21:06:33 +00:00
}