1
0
Fork 0
hscloud/cluster/kube/lib/calico.libsonnet

368 lines
16 KiB
Plaintext
Raw Normal View History

# Deploy hosted calico with its own etcd.
local kube = import "../../../kube/kube.libsonnet";
local bindServiceAccountClusterRole(sa, cr) = kube.ClusterRoleBinding(cr.metadata.name) {
roleRef: {
apiGroup: "rbac.authorization.k8s.io",
kind: "ClusterRole",
name: cr.metadata.name,
},
subjects: [
{
kind: "ServiceAccount",
name: sa.metadata.name,
namespace: sa.metadata.namespace,
},
],
};
{
Environment: {
local env = self,
local cfg = env.cfg,
cfg:: {
namespace: "kube-system",
version: "v3.14.0",
imageController: "calico/kube-controllers:" + cfg.version,
imageCNI: "calico/cni:" + cfg.version,
imageNode: "calico/node:" + cfg.version,
// TODO(q3k): Separate etcd for calico
etcd: {
endpoints: ["https://bc01n%02d.hswaw.net:2379" % n for n in std.range(1, 3)],
ca: importstr "../../certs/ca-etcd.crt",
cert: importstr "../../certs/etcd-calico.cert",
key: importstr "../../secrets/plain/etcd-calico.key",
},
},
cm: kube.ConfigMap("calico-config") {
local cm = self,
secretPrefix:: "/calico-secrets/",
metadata+: {
namespace: cfg.namespace,
},
data: {
etcd_endpoints: std.join(",", cfg.etcd.endpoints),
etcd_ca: cm.secretPrefix + "etcd-ca",
etcd_cert: cm.secretPrefix + "etcd-cert",
etcd_key: cm.secretPrefix + "etcd-key",
calico_backend: "bird",
veth_mtu: "1440",
typha_service_name: "none",
cni_network_config: |||
{
"name": "k8s-pod-network",
"cniVersion": "0.3.1",
"plugins": [
{
"type": "calico",
"log_level": "info",
"etcd_endpoints": "__ETCD_ENDPOINTS__",
"etcd_key_file": "__ETCD_KEY_FILE__",
"etcd_cert_file": "__ETCD_CERT_FILE__",
"etcd_ca_cert_file": "__ETCD_CA_CERT_FILE__",
"datastore_type": "etcdv3",
"mtu": __CNI_MTU__,
"ipam": {
"type": "calico-ipam"
},
"policy": {
"type": "k8s"
},
"kubernetes": {
"kubeconfig": "__KUBECONFIG_FILEPATH__"
}
},
{
"type": "portmap",
"snat": true,
"capabilities": {"portMappings": true}
},
{
"type": "bandwidth",
"capabilities": {"bandwidth": true}
}
]
}
|||
},
},
secrets: kube.Secret("calico-secrets") {
metadata+: {
namespace: cfg.namespace,
},
data_: {
"etcd-ca": cfg.etcd.ca,
"etcd-cert": cfg.etcd.cert,
"etcd-key": cfg.etcd.key,
},
},
saNode: kube.ServiceAccount("calico-node") {
metadata+: {
namespace: cfg.namespace,
},
},
crNode: kube.ClusterRole("calico-node") {
rules: [
{
apiGroups: [""],
resources: ["pods", "nodes", "namespaces"],
verbs: ["get"],
},
{
apiGroups: [""],
resources: ["endpoints", "services"],
verbs: ["watch", "list", "get"],
},
{
apiGroups: [""],
resources: ["configmaps"],
verbs: ["get"],
},
{
apiGroups: [""],
resources: ["nodes/status"],
verbs: ["patch", "update"],
},
{
apiGroups: ["networking.k8s.io"],
resources: ["networkpolicies"],
verbs: ["watch", "list"],
},
{
apiGroups: [""],
resources: ["pods", "namespaces", "serviceaccounts"],
verbs: ["watch", "list"],
},
{
apiGroups: [""],
resources: ["pods/status"],
verbs: ["patch"],
},
{
apiGroups: [""],
resources: ["nodes"],
verbs: ["get", "list", "watch"],
},
],
},
crbNode: bindServiceAccountClusterRole(env.saNode, env.crNode),
saController: kube.ServiceAccount("calico-kube-controllers") {
metadata+: {
namespace: cfg.namespace,
},
},
crController: kube.ClusterRole("calico-kube-controllers") {
rules: [
{
apiGroups: [""],
resources: ["nodes", "pods", "namespaces", "serviceaccounts"],
verbs: ["watch", "list", "get"],
},
{
apiGroups: ["networking.k8s.io"],
resources: ["networkpolicies"],
verbs: ["watch", "list"],
},
],
},
crbController: bindServiceAccountClusterRole(env.saController, env.crController),
controller: kube.Deployment("calico-kube-controllers") {
metadata+: {
namespace: cfg.namespace,
annotations+: {
"scheduler.alpha.kubernetes.io/critical-pod": "",
},
},
spec+: {
replicas: 1,
strategy: { type: "Recreate" },
template+: {
spec+: {
hostNetwork: true,
tolerations: [
{ key: "CriticalAddonsOnly", operator: "Exists" },
],
serviceAccountName: env.saController.metadata.name,
volumes_: {
secrets: kube.SecretVolume(env.secrets),
},
containers_: {
"calico-kube-controllers": kube.Container("calico-kube-controllers") {
image: cfg.imageController,
env_: {
ETCD_ENDPOINTS: kube.ConfigMapRef(env.cm, "etcd_endpoints"),
ETCD_CA_CERT_FILE: kube.ConfigMapRef(env.cm, "etcd_ca"),
ETCD_KEY_FILE: kube.ConfigMapRef(env.cm, "etcd_key"),
ETCD_CERT_FILE: kube.ConfigMapRef(env.cm, "etcd_cert"),
ENABLED_CONTROLLERS: "policy,namespace,serviceaccount,workloadendpoint,node",
},
volumeMounts_: {
secrets: {
mountPath: env.cm.secretPrefix,
},
},
readinessProbe: {
exec: {
command: [ "/usr/bin/check-status", "-r" ],
},
},
},
},
},
},
},
},
k0.hswaw.net: pass metallb through Calico Previously, we had the following setup: .-----------. | ..... | .-----------.-| | dcr01s24 | | .-----------.-| | | dcr01s22 | | | .---|-----------| |-' .--------. | |---------. | | | dcsw01 | <----- | metallb | |-' '--------' |---------' | '-----------' Ie., each metallb on each node directly talked to dcsw01 over BGP to announce ExternalIPs to our L3 fabric. Now, we rejigger the configuration to instead have Calico's BIRD instances talk BGP to dcsw01, and have metallb talk locally to Calico. .-------------------------. | dcr01s24 | |-------------------------| .--------. |---------. .---------. | | dcsw01 | <----- | Calico |<--| metallb | | '--------' |---------' '---------' | '-------------------------' This makes Calico announce our pod/service networks into our L3 fabric! Calico and metallb talk to eachother over 127.0.0.1 (they both run with Host Networking), but that requires one side to flip to pasive mode. We chose to do that with Calico, by overriding its BIRD config and special-casing any 127.0.0.1 peer to enable passive mode. We also override Calico's Other Bird Template (bird_ipam.cfg) to fiddle with the kernel programming filter (ie. to-kernel-routing-table filter), where we disable programming unreachable routes. This is because routes coming from metallb have their next-hop set to 127.0.0.1, which makes bird mark them as unreachable. Unreachable routes in the kernel will break local access to ExternalIPs, eg. register access from containerd. All routes pass through without route reflectors and a full mesh as we use eBGP over private ASNs in our fabric. We also have to make Calico aware of metallb pools - otherwise, routes announced by metallb end up being filtered by Calico. This is all mildly hacky. Here's hoping that Calico will be able to some day gain metallb-like functionality, ie. IPAM for externalIPs/LoadBalancers/... There seems to be however one problem with this change (but I'm not fixing it yet as it's not critical): metallb would previously only announce IPs from nodes that were serving that service. Now, however, the Calico internal mesh makes those appear from every node. This can probably be fixed by disabling local meshing, enabling route reflection on dcsw01 (to recreate the mesh routing through dcsw01). Or, maybe by some more hacking of the Calico BIRD config :/. Change-Id: I3df1f6ae7fa1911dd53956ced3b073581ef0e836
2020-09-20 22:52:57 +00:00
# ConfigMap that holds overriden bird.cfg.template and bird_ipam.cfg.template.
calicoMetallbBird: kube.ConfigMap("calico-metallb-bird") {
metadata+: {
namespace: cfg.namespace,
},
data: {
"bird.cfg.template": (importstr "calico-bird.cfg.template"),
"bird_ipam.cfg.template": (importstr "calico-bird-ipam.cfg.template"),
},
},
nodeDaemon: kube.DaemonSet("calico-node") {
metadata+: {
namespace: cfg.namespace,
},
spec+: {
template+: {
metadata+: {
annotations+: {
"scheduler.alpha.kubernetes.io/critical-pod": "",
},
},
spec+: {
hostNetwork: true,
tolerations: [
{ key: "CriticalAddonsOnly", operator: "Exists" },
{ effect: "NoExecute", operator: "Exists" },
{ effect: "NoSchedule", operator: "Exists" },
],
serviceAccountName: env.saNode.metadata.name,
terminationGracePeriodSeconds: 0,
volumes_: {
cni_bin: kube.HostPathVolume("/opt/cni/bin"),
cni_config: kube.HostPathVolume("/opt/cni/conf"),
secrets: kube.SecretVolume(env.secrets),
lib_modules: kube.HostPathVolume("/run/current-system/kernel-modules/lib/modules"),
xtables_lock: kube.HostPathVolume("/run/xtables.lock"),
var_run_calico: kube.HostPathVolume("/var/run/calico"),
var_lib_calico: kube.HostPathVolume("/var/lib/calico"),
k0.hswaw.net: pass metallb through Calico Previously, we had the following setup: .-----------. | ..... | .-----------.-| | dcr01s24 | | .-----------.-| | | dcr01s22 | | | .---|-----------| |-' .--------. | |---------. | | | dcsw01 | <----- | metallb | |-' '--------' |---------' | '-----------' Ie., each metallb on each node directly talked to dcsw01 over BGP to announce ExternalIPs to our L3 fabric. Now, we rejigger the configuration to instead have Calico's BIRD instances talk BGP to dcsw01, and have metallb talk locally to Calico. .-------------------------. | dcr01s24 | |-------------------------| .--------. |---------. .---------. | | dcsw01 | <----- | Calico |<--| metallb | | '--------' |---------' '---------' | '-------------------------' This makes Calico announce our pod/service networks into our L3 fabric! Calico and metallb talk to eachother over 127.0.0.1 (they both run with Host Networking), but that requires one side to flip to pasive mode. We chose to do that with Calico, by overriding its BIRD config and special-casing any 127.0.0.1 peer to enable passive mode. We also override Calico's Other Bird Template (bird_ipam.cfg) to fiddle with the kernel programming filter (ie. to-kernel-routing-table filter), where we disable programming unreachable routes. This is because routes coming from metallb have their next-hop set to 127.0.0.1, which makes bird mark them as unreachable. Unreachable routes in the kernel will break local access to ExternalIPs, eg. register access from containerd. All routes pass through without route reflectors and a full mesh as we use eBGP over private ASNs in our fabric. We also have to make Calico aware of metallb pools - otherwise, routes announced by metallb end up being filtered by Calico. This is all mildly hacky. Here's hoping that Calico will be able to some day gain metallb-like functionality, ie. IPAM for externalIPs/LoadBalancers/... There seems to be however one problem with this change (but I'm not fixing it yet as it's not critical): metallb would previously only announce IPs from nodes that were serving that service. Now, however, the Calico internal mesh makes those appear from every node. This can probably be fixed by disabling local meshing, enabling route reflection on dcsw01 (to recreate the mesh routing through dcsw01). Or, maybe by some more hacking of the Calico BIRD config :/. Change-Id: I3df1f6ae7fa1911dd53956ced3b073581ef0e836
2020-09-20 22:52:57 +00:00
bird_cfg_template: kube.ConfigMapVolume(env.calicoMetallbBird),
},
initContainers_: {
installCNI: kube.Container("install-cni") {
image: cfg.imageCNI,
command: ["/install-cni.sh"],
env_: {
ETCD_ENDPOINTS: kube.ConfigMapRef(env.cm, "etcd_endpoints"),
CNI_CONF_NAME: "10-calico.conflist",
CNI_NETWORK_CONFIG: kube.ConfigMapRef(env.cm, "cni_network_config"),
CNI_CONF_ETCD_CA_CERT_FILE: kube.ConfigMapRef(env.cm, "etcd_ca"),
CNI_CONF_ETCD_KEY_FILE: kube.ConfigMapRef(env.cm, "etcd_key"),
CNI_CONF_ETCD_CERT_FILE: kube.ConfigMapRef(env.cm, "etcd_cert"),
CNI_MTU: kube.ConfigMapRef(env.cm, "veth_mtu"),
CNI_NET_DIR: "/opt/cni/conf",
SLEEP: "false",
KUBERNETES_NODE_NAME: { fieldRef: { fieldPath: "spec.nodeName" } },
},
volumeMounts_: {
cni_bin: { mountPath: "/host/opt/cni/bin" },
cni_config: { mountPath: "/host/etc/cni/net.d" },
secrets: { mountPath: env.cm.secretPrefix },
},
},
},
containers_: {
calicoNode: kube.Container("calico-node") {
image: cfg.imageNode,
env_: {
DATASTORE_TYPE: "etcdv3",
ETCD_ENDPOINTS: kube.ConfigMapRef(env.cm, "etcd_endpoints"),
ETCD_CA_CERT_FILE: kube.ConfigMapRef(env.cm, "etcd_ca"),
ETCD_KEY_FILE: kube.ConfigMapRef(env.cm, "etcd_key"),
ETCD_CERT_FILE: kube.ConfigMapRef(env.cm, "etcd_cert"),
CALICO_K8S_NODE_REF: kube.FieldRef("spec.nodeName"),
CALICO_NETWORKING_BACKEND: kube.ConfigMapRef(env.cm, "calico_backend"),
CLUSTER_TYPE: "k8s,bgp",
IP: "autodetect",
IP_AUTODETECTION_METHOD: "can-reach=185.236.240.1",
CALICO_IPV4POOL_IPIP: "Always",
FELIX_IPINIPMTU: kube.ConfigMapRef(env.cm, "veth_mtu"),
CALICO_IPV4POOL_CIDR: "10.10.24.0/21",
CALICO_DISABLE_FILE_LOGGING: "true",
FELIX_DEFAULTENDPOINTTOHOSTACTION: "ACCEPT",
FELIX_IPV6SUPPORT: "false",
FELIX_LOGSEVERITYSCREEN: "info",
FELIX_HEALTHENABLED: "true",
FELIX_HEALTHHOST: "127.0.0.1",
CALICO_ADVERTISE_CLUSTER_IPS: "10.10.12.0/24",
KUBERNETES_NODE_NAME: { fieldRef: { fieldPath: "spec.nodeName" } },
},
securityContext: {
privileged: true,
},
resources: {
requests: { cpu: "250m" },
},
livenessProbe: {
exec: {
command: ["/bin/calico-node", "-bird-live", "-felix-live"],
},
periodSeconds: 10,
initialDelaySeconds: 10,
failureThreshold: 6,
},
readinessProbe: {
exec: {
command: ["/bin/calico-node", "-bird-ready", "-felix-ready"],
},
periodSeconds: 10,
},
volumeMounts_: {
lib_modules: { mountPath: "/lib/modules" },
xtables_lock: { mountPath: "/run/xtables.lock" },
var_run_calico: { mountPath: "/var/run/calico" },
var_lib_calico: { mountPath: "/var/lib/calico" },
secrets: { mountPath: env.cm.secretPrefix },
},
k0.hswaw.net: pass metallb through Calico Previously, we had the following setup: .-----------. | ..... | .-----------.-| | dcr01s24 | | .-----------.-| | | dcr01s22 | | | .---|-----------| |-' .--------. | |---------. | | | dcsw01 | <----- | metallb | |-' '--------' |---------' | '-----------' Ie., each metallb on each node directly talked to dcsw01 over BGP to announce ExternalIPs to our L3 fabric. Now, we rejigger the configuration to instead have Calico's BIRD instances talk BGP to dcsw01, and have metallb talk locally to Calico. .-------------------------. | dcr01s24 | |-------------------------| .--------. |---------. .---------. | | dcsw01 | <----- | Calico |<--| metallb | | '--------' |---------' '---------' | '-------------------------' This makes Calico announce our pod/service networks into our L3 fabric! Calico and metallb talk to eachother over 127.0.0.1 (they both run with Host Networking), but that requires one side to flip to pasive mode. We chose to do that with Calico, by overriding its BIRD config and special-casing any 127.0.0.1 peer to enable passive mode. We also override Calico's Other Bird Template (bird_ipam.cfg) to fiddle with the kernel programming filter (ie. to-kernel-routing-table filter), where we disable programming unreachable routes. This is because routes coming from metallb have their next-hop set to 127.0.0.1, which makes bird mark them as unreachable. Unreachable routes in the kernel will break local access to ExternalIPs, eg. register access from containerd. All routes pass through without route reflectors and a full mesh as we use eBGP over private ASNs in our fabric. We also have to make Calico aware of metallb pools - otherwise, routes announced by metallb end up being filtered by Calico. This is all mildly hacky. Here's hoping that Calico will be able to some day gain metallb-like functionality, ie. IPAM for externalIPs/LoadBalancers/... There seems to be however one problem with this change (but I'm not fixing it yet as it's not critical): metallb would previously only announce IPs from nodes that were serving that service. Now, however, the Calico internal mesh makes those appear from every node. This can probably be fixed by disabling local meshing, enabling route reflection on dcsw01 (to recreate the mesh routing through dcsw01). Or, maybe by some more hacking of the Calico BIRD config :/. Change-Id: I3df1f6ae7fa1911dd53956ced3b073581ef0e836
2020-09-20 22:52:57 +00:00
volumeMounts+: [
{ name: "bird-cfg-template",
mountPath: "/etc/calico/confd/templates/bird.cfg.template",
subPath: "bird.cfg.template"
},
{ name: "bird-cfg-template",
mountPath: "/etc/calico/confd/templates/bird_ipam.cfg.template",
subPath: "bird_ipam.cfg.template"
},
],
},
},
},
},
},
},
},
}