1
0
Fork 0

Merge "k0.hswaw.net: pass metallb through Calico"

master
q3k 2020-10-02 22:54:57 +00:00 committed by Gerrit Code Review
commit 44628f2b9e
5 changed files with 334 additions and 2 deletions

View File

@ -0,0 +1,78 @@
# This is the current Calico configuration in k0.hswaw.net.
# Unfortunately, we do not have Calico configured to use CRDs, and instead to
# keep its resources separately from Kubernetes. Thus, this configuration
# cannot be managed by Kubernetes/jsonnet. Instead, it must be applied manially:
#
# calicoctl apply -f k0.calico.yaml
apiVersion: projectcalico.org/v3
kind: BGPConfiguration
metadata:
name: default
spec:
logSeverityScreen: Info
nodeToNodeMeshEnabled: true
asNumber: 65003
---
# metallb peer, must be compatible with the metallbc definition in k0.libsonnet.
apiVersion: projectcalico.org/v3
kind: BGPPeer
metadata:
name: metallb
spec:
peerIP: 127.0.0.1
asNumber: 65002
---
# ToR switch peering, must be compatible with the configuration on dcsw01.hswaw.net.
apiVersion: projectcalico.org/v3
kind: BGPPeer
metadata:
name: dcsw01
spec:
peerIP: 185.236.240.33
asNumber: 65001
---
# IP pool that's used by metallb. We mark it as disabled so that Calico doesn't
# allocate Service IPs from it, just allow metallb routes from that pool to
# pass through eBGP (otherwise Calico BIRD filter will filter them out).
# Keep in sync with k0.libsonnet.
apiVersion: projectcalico.org/v3
kind: IPPool
metadata:
name: public-v4-1
spec:
cidr: 185.236.240.48/28
disabled: true
---
# IP pool that's used by metallb. We mark it as disabled so that Calico doesn't
# allocate Service IPs from it, just allow metallb routes from that pool to
# pass through eBGP (otherwise Calico BIRD filter will filter them out).
# Keep in sync with k0.libsonnet.
apiVersion: projectcalico.org/v3
kind: IPPool
metadata:
name: public-v4-2
spec:
cidr: 185.236.240.112/28
disabled: true
---
# IP pool for the service network.
apiVersion: projectcalico.org/v3
kind: IPPool
metadata:
name: default-ipv4-ippool
spec:
blockSize: 26
cidr: 10.10.24.0/21
ipipMode: CrossSubnet
natOutgoing: true

View File

@ -20,13 +20,15 @@ local rook = import "lib/rook.libsonnet";
},
metallb+: {
cfg+: {
// Peer with calico running on same node.
peers: [
{
"peer-address": "185.236.240.33",
"peer-asn": 65001,
"peer-address": "127.0.0.1",
"peer-asn": 65003,
"my-asn": 65002,
},
],
// Public IP address pools. Keep in sync with k0.calico.yaml.
addressPools: [
{
name: "public-v4-1",

View File

@ -0,0 +1,66 @@
# This is forked from bird.cfg.template from calico running on k0.hswaw.net on 2020/09/21.
# Changed vs. upstream (C-f HSCLOUD):
# - do not program RTD_UNREACHABLE routes into the kernel (these come from metallb, and
# programming them seems to break things)
# Generated by confd
filter calico_export_to_bgp_peers {
calico_aggr();
{{- $static_key := "/staticroutes"}}
{{- if ls $static_key}}
# Export static routes.
{{- range ls $static_key}}
{{- $parts := split . "-"}}
{{- $cidr := join $parts "/"}}
if ( net ~ {{$cidr}} ) then { accept; }
{{- end}}
{{- end}}
{{range ls "/v1/ipam/v4/pool"}}{{$data := json (getv (printf "/v1/ipam/v4/pool/%s" .))}}
if ( net ~ {{$data.cidr}} ) then {
accept;
}
{{- end}}
reject;
}
{{$network_key := printf "/bgp/v1/host/%s/network_v4" (getenv "NODENAME")}}
filter calico_kernel_programming {
{{- $reject_key := "/rejectcidrs"}}
{{- if ls $reject_key}}
if ( dest = RTD_UNREACHABLE ) then { # HSCLOUD
reject;
}
# Don't program static routes into kernel.
{{- range ls $reject_key}}
{{- $parts := split . "-"}}
{{- $cidr := join $parts "/"}}
if ( net ~ {{$cidr}} ) then { reject; }
{{- end}}
{{- end}}
{{- if exists $network_key}}{{$network := getv $network_key}}
{{range ls "/v1/ipam/v4/pool"}}{{$data := json (getv (printf "/v1/ipam/v4/pool/%s" .))}}
if ( net ~ {{$data.cidr}} ) then {
{{- if $data.vxlan_mode}}
# Don't program VXLAN routes into the kernel - these are handled by Felix.
reject;
}
{{- else if $data.ipip_mode}}{{if eq $data.ipip_mode "cross-subnet"}}
if defined(bgp_next_hop) && ( bgp_next_hop ~ {{$network}} ) then
krt_tunnel = ""; {{- /* Destination in ipPool, mode is cross sub-net, route from-host on subnet, do not use IPIP */}}
else
krt_tunnel = "{{$data.ipip}}"; {{- /* Destination in ipPool, mode is cross sub-net, route from-host off subnet, set the tunnel (if IPIP not enabled, value will be "") */}}
accept;
} {{- else}}
krt_tunnel = "{{$data.ipip}}"; {{- /* Destination in ipPool, mode not cross sub-net, set the tunnel (if IPIP not enabled, value will be "") */}}
accept;
} {{- end}} {{- else}}
krt_tunnel = "{{$data.ipip}}"; {{- /* Destination in ipPool, mode field is not present, set the tunnel (if IPIP not enabled, value will be "") */}}
accept;
} {{- end}}
{{end}}
{{- end}}{{/* End of 'exists $network_key' */}}
accept; {{- /* Destination is not in any ipPool, accept */}}
}

View File

@ -0,0 +1,164 @@
# This is forked from bird.cfg.template from calico running on k0.hswaw.net on 2020/09/21.
# Changed vs. upstream (C-f HSCLOUD):
# - set 'passive on' on 127.0.0.1 neighbors, used for estabilishing connectivity
# with metallb.
# Generated by confd
include "bird_aggr.cfg";
include "bird_ipam.cfg";
{{- $node_ip_key := printf "/host/%s/ip_addr_v4" (getenv "NODENAME")}}{{$node_ip := getv $node_ip_key}}
{{- $router_id := getenv "CALICO_ROUTER_ID" ""}}
{{- $node_name := getenv "NODENAME"}}
router id {{if eq "hash" ($router_id) -}}
{{hashToIPv4 $node_name}};
{{- else -}}
{{if ne "" ($router_id)}}{{$router_id}}{{else}}{{$node_ip}}{{end}};
{{- end}}
{{- define "LOGGING"}}
{{- $node_logging_key := printf "/host/%s/loglevel" (getenv "NODENAME")}}
{{- if exists $node_logging_key}}
{{- $logging := getv $node_logging_key}}
{{- if eq $logging "debug"}}
debug all;
{{- else if ne $logging "none"}}
debug { states };
{{- end}}
{{- else if exists "/global/loglevel"}}
{{- $logging := getv "/global/loglevel"}}
{{- if eq $logging "debug"}}
debug all;
{{- else if ne $logging "none"}}
debug { states };
{{- end}}
{{- else}}
debug { states };
{{- end}}
{{- end}}
# Configure synchronization between routing tables and kernel.
protocol kernel {
learn; # Learn all alien routes from the kernel
persist; # Don't remove routes on bird shutdown
scan time 2; # Scan kernel routing table every 2 seconds
import all;
export filter calico_kernel_programming; # Default is export none
graceful restart; # Turn on graceful restart to reduce potential flaps in
# routes when reloading BIRD configuration. With a full
# automatic mesh, there is no way to prevent BGP from
# flapping since multiple nodes update their BGP
# configuration at the same time, GR is not guaranteed to
# work correctly in this scenario.
}
# Watch interface up/down events.
protocol device {
{{- template "LOGGING"}}
scan time 2; # Scan interfaces every 2 seconds
}
protocol direct {
{{- template "LOGGING"}}
interface -"cali*", -"kube-ipvs*", "*"; # Exclude cali* and kube-ipvs* but
# include everything else. In
# IPVS-mode, kube-proxy creates a
# kube-ipvs0 interface. We exclude
# kube-ipvs0 because this interface
# gets an address for every in use
# cluster IP. We use static routes
# for when we legitimately want to
# export cluster IPs.
}
{{if eq "" ($node_ip)}}# IPv4 disabled on this node.
{{else}}{{$node_as_key := printf "/host/%s/as_num" (getenv "NODENAME")}}
# Template for all BGP clients
template bgp bgp_template {
{{- $as_key := or (and (exists $node_as_key) $node_as_key) "/global/as_num"}}
{{- $node_as_num := getv $as_key}}
{{- template "LOGGING"}}
description "Connection to BGP peer";
local as {{$node_as_num}};
multihop;
gateway recursive; # This should be the default, but just in case.
import all; # Import all routes, since we don't know what the upstream
# topology is and therefore have to trust the ToR/RR.
export filter calico_export_to_bgp_peers; # Only want to export routes for workloads.
source address {{$node_ip}}; # The local address we use for the TCP connection
add paths on;
graceful restart; # See comment in kernel section about graceful restart.
connect delay time 2;
connect retry time 5;
error wait time 5,30;
}
# ------------- Node-to-node mesh -------------
{{- $node_cid_key := printf "/host/%s/rr_cluster_id" (getenv "NODENAME")}}
{{- $node_cluster_id := getv $node_cid_key}}
{{if (json (getv "/global/node_mesh")).enabled}}
{{range $host := lsdir "/host"}}
{{$onode_as_key := printf "/host/%s/as_num" .}}
{{$onode_ip_key := printf "/host/%s/ip_addr_v4" .}}{{if exists $onode_ip_key}}{{$onode_ip := getv $onode_ip_key}}
{{$nums := split $onode_ip "."}}{{$id := join $nums "_"}}
# For peer {{$onode_ip_key}}
{{if eq $onode_ip ($node_ip) }}# Skipping ourselves ({{$node_ip}})
{{else if ne "" $onode_ip}}protocol bgp Mesh_{{$id}} from bgp_template {
neighbor {{$onode_ip}} as {{if exists $onode_as_key}}{{getv $onode_as_key}}{{else}}{{getv "/global/as_num"}}{{end}};
{{- /*
Make the peering unidirectional. This avoids a race where
- peer A opens a connection and begins a graceful restart
- before the restart completes, peer B opens its connection
- peer A sees the new connection and aborts the graceful restart, causing a route flap.
*/ -}}
{{if gt $onode_ip $node_ip}}
passive on; # Mesh is unidirectional, peer will connect to us.
{{- end}}
}{{end}}{{end}}{{end}}
{{else}}
# Node-to-node mesh disabled
{{end}}
# ------------- Global peers -------------
{{if ls "/global/peer_v4"}}
{{range gets "/global/peer_v4/*"}}{{$data := json .Value}}
{{$nums := split $data.ip "."}}{{$id := join $nums "_"}}
# For peer {{.Key}}
{{- if eq $data.ip ($node_ip) }}
# Skipping ourselves ({{$node_ip}})
{{- else}}
protocol bgp Global_{{$id}} from bgp_template {
{{if eq $data.ip ("127.0.0.1")}}passive on; # HSCLOUD {{end}}
neighbor {{$data.ip}} as {{$data.as_num}};
{{- if and (eq $data.as_num $node_as_num) (ne "" ($node_cluster_id)) (ne $data.rr_cluster_id ($node_cluster_id))}}
rr client;
rr cluster id {{$node_cluster_id}};
{{- end}}
}
{{- end}}
{{end}}
{{else}}# No global peers configured.{{end}}
# ------------- Node-specific peers -------------
{{$node_peers_key := printf "/host/%s/peer_v4" (getenv "NODENAME")}}
{{if ls $node_peers_key}}
{{range gets (printf "%s/*" $node_peers_key)}}{{$data := json .Value}}
{{$nums := split $data.ip "."}}{{$id := join $nums "_"}}
# For peer {{.Key}}
{{- if eq $data.ip ($node_ip) }}
# Skipping ourselves ({{$node_ip}})
{{- else}}
protocol bgp Node_{{$id}} from bgp_template {
neighbor {{$data.ip}} as {{$data.as_num}};
{{- if and (eq $data.as_num $node_as_num) (ne "" ($node_cluster_id)) (ne $data.rr_cluster_id ($node_cluster_id))}}
rr client;
rr cluster id {{$node_cluster_id}};
{{- end}}
}
{{- end}}
{{end}}
{{else}}# No node-specific peers configured.{{end}}
{{end}}{{/* End of IPv4 enable check */}}

View File

@ -230,6 +230,17 @@ local bindServiceAccountClusterRole(sa, cr) = kube.ClusterRoleBinding(cr.metadat
},
},
# ConfigMap that holds overriden bird.cfg.template and bird_ipam.cfg.template.
calicoMetallbBird: kube.ConfigMap("calico-metallb-bird") {
metadata+: {
namespace: cfg.namespace,
},
data: {
"bird.cfg.template": (importstr "calico-bird.cfg.template"),
"bird_ipam.cfg.template": (importstr "calico-bird-ipam.cfg.template"),
},
},
nodeDaemon: kube.DaemonSet("calico-node") {
metadata+: {
namespace: cfg.namespace,
@ -258,6 +269,7 @@ local bindServiceAccountClusterRole(sa, cr) = kube.ClusterRoleBinding(cr.metadat
xtables_lock: kube.HostPathVolume("/run/xtables.lock"),
var_run_calico: kube.HostPathVolume("/var/run/calico"),
var_lib_calico: kube.HostPathVolume("/var/lib/calico"),
bird_cfg_template: kube.ConfigMapVolume(env.calicoMetallbBird),
},
initContainers_: {
installCNI: kube.Container("install-cni") {
@ -335,6 +347,16 @@ local bindServiceAccountClusterRole(sa, cr) = kube.ClusterRoleBinding(cr.metadat
var_lib_calico: { mountPath: "/var/lib/calico" },
secrets: { mountPath: env.cm.secretPrefix },
},
volumeMounts+: [
{ name: "bird-cfg-template",
mountPath: "/etc/calico/confd/templates/bird.cfg.template",
subPath: "bird.cfg.template"
},
{ name: "bird-cfg-template",
mountPath: "/etc/calico/confd/templates/bird_ipam.cfg.template",
subPath: "bird_ipam.cfg.template"
},
],
},
},
},