forked from hswaw/hscloud
cluster: replace docker with containerd
This removes Docker and docker-shim from our production kubernetes, and moves over to containerd/CRI. Docker support within Kubernetes was always slightly shitty, and with 1.20 the integration was dropped entirely. CRI/Containerd/runc is pretty much the new standard. Change-Id: I98c89d5433f221b5fe766fcbef261fd72db530fe
This commit is contained in:
parent
4b613303b1
commit
765e369255
3 changed files with 186 additions and 30 deletions
134
cluster/nix/modules/containerd.toml
Normal file
134
cluster/nix/modules/containerd.toml
Normal file
|
@ -0,0 +1,134 @@
|
|||
version = 2
|
||||
root = "/var/lib/containerd"
|
||||
state = "/run/containerd"
|
||||
plugin_dir = ""
|
||||
disabled_plugins = []
|
||||
required_plugins = []
|
||||
oom_score = 0
|
||||
|
||||
[grpc]
|
||||
address = "/run/containerd/containerd.sock"
|
||||
tcp_address = ""
|
||||
tcp_tls_cert = ""
|
||||
tcp_tls_key = ""
|
||||
uid = 0
|
||||
gid = 0
|
||||
max_recv_message_size = 16777216
|
||||
max_send_message_size = 16777216
|
||||
|
||||
[ttrpc]
|
||||
address = ""
|
||||
uid = 0
|
||||
gid = 0
|
||||
|
||||
[debug]
|
||||
address = ""
|
||||
uid = 0
|
||||
gid = 0
|
||||
level = ""
|
||||
|
||||
[metrics]
|
||||
address = ""
|
||||
grpc_histogram = false
|
||||
|
||||
[cgroup]
|
||||
path = ""
|
||||
|
||||
[timeouts]
|
||||
"io.containerd.timeout.shim.cleanup" = "5s"
|
||||
"io.containerd.timeout.shim.load" = "5s"
|
||||
"io.containerd.timeout.shim.shutdown" = "3s"
|
||||
"io.containerd.timeout.task.state" = "2s"
|
||||
|
||||
[plugins]
|
||||
[plugins."io.containerd.gc.v1.scheduler"]
|
||||
pause_threshold = 0.02
|
||||
deletion_threshold = 0
|
||||
mutation_threshold = 100
|
||||
schedule_delay = "0s"
|
||||
startup_delay = "100ms"
|
||||
[plugins."io.containerd.grpc.v1.cri"]
|
||||
disable_tcp_service = true
|
||||
stream_server_address = "127.0.0.1"
|
||||
stream_server_port = "0"
|
||||
stream_idle_timeout = "4h0m0s"
|
||||
enable_selinux = false
|
||||
selinux_category_range = 1024
|
||||
sandbox_image = "k8s.gcr.io/pause:3.2"
|
||||
stats_collect_period = 10
|
||||
systemd_cgroup = false
|
||||
enable_tls_streaming = false
|
||||
max_container_log_line_size = 16384
|
||||
disable_cgroup = false
|
||||
disable_apparmor = false
|
||||
restrict_oom_score_adj = false
|
||||
max_concurrent_downloads = 3
|
||||
disable_proc_mount = false
|
||||
unset_seccomp_profile = ""
|
||||
tolerate_missing_hugetlb_controller = true
|
||||
disable_hugetlb_controller = true
|
||||
ignore_image_defined_volumes = false
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd]
|
||||
snapshotter = "overlayfs"
|
||||
default_runtime_name = "runc"
|
||||
no_pivot = false
|
||||
disable_snapshot_annotations = true
|
||||
discard_unpacked_layers = false
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.default_runtime]
|
||||
runtime_type = ""
|
||||
runtime_engine = ""
|
||||
runtime_root = ""
|
||||
privileged_without_host_devices = false
|
||||
base_runtime_spec = ""
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime]
|
||||
runtime_type = ""
|
||||
runtime_engine = ""
|
||||
runtime_root = ""
|
||||
privileged_without_host_devices = false
|
||||
base_runtime_spec = ""
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
|
||||
runtime_type = "io.containerd.runc.v2"
|
||||
runtime_engine = ""
|
||||
runtime_root = ""
|
||||
privileged_without_host_devices = false
|
||||
base_runtime_spec = ""
|
||||
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
|
||||
SystemdCgroup = true
|
||||
[plugins."io.containerd.grpc.v1.cri".cni]
|
||||
bin_dir = "/opt/cni/bin"
|
||||
conf_dir = "/opt/cni/conf"
|
||||
max_conf_num = 1
|
||||
conf_template = ""
|
||||
[plugins."io.containerd.grpc.v1.cri".registry]
|
||||
[plugins."io.containerd.grpc.v1.cri".registry.mirrors]
|
||||
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"]
|
||||
endpoint = ["https://registry-1.docker.io"]
|
||||
[plugins."io.containerd.grpc.v1.cri".image_decryption]
|
||||
key_model = ""
|
||||
[plugins."io.containerd.grpc.v1.cri".x509_key_pair_streaming]
|
||||
tls_cert_file = ""
|
||||
tls_key_file = ""
|
||||
[plugins."io.containerd.internal.v1.opt"]
|
||||
path = "/opt/containerd"
|
||||
[plugins."io.containerd.internal.v1.restart"]
|
||||
interval = "10s"
|
||||
[plugins."io.containerd.metadata.v1.bolt"]
|
||||
content_sharing_policy = "shared"
|
||||
[plugins."io.containerd.monitor.v1.cgroups"]
|
||||
no_prometheus = false
|
||||
[plugins."io.containerd.runtime.v1.linux"]
|
||||
shim = "containerd-shim"
|
||||
runtime = "runc"
|
||||
runtime_root = ""
|
||||
no_shim = false
|
||||
shim_debug = false
|
||||
[plugins."io.containerd.runtime.v2.task"]
|
||||
platforms = ["linux/amd64"]
|
||||
[plugins."io.containerd.service.v1.diff-service"]
|
||||
default = ["walking"]
|
||||
[plugins."io.containerd.snapshotter.v1.devmapper"]
|
||||
root_path = ""
|
||||
pool_name = ""
|
||||
base_image_size = ""
|
||||
async_remove = false
|
|
@ -16,7 +16,7 @@ let
|
|||
name = "pause";
|
||||
tag = "latest";
|
||||
contents = top.package.pause;
|
||||
config.Cmd = "/bin/pause";
|
||||
config.Cmd = ["/bin/pause"];
|
||||
};
|
||||
|
||||
kubeconfig = top.lib.mkKubeConfig "kubelet" cfg.kubeconfig;
|
||||
|
@ -45,12 +45,6 @@ let
|
|||
taints = concatMapStringsSep "," (v: "${v.key}=${v.value}:${v.effect}") (mapAttrsToList (n: v: v) cfg.taints);
|
||||
in
|
||||
{
|
||||
imports = [
|
||||
#(mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "applyManifests" ] "")
|
||||
#(mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "cadvisorPort" ] "")
|
||||
#(mkRemovedOptionModule [ "services" "kubernetes" "kubelet" "allowPrivileged" ] "")
|
||||
];
|
||||
|
||||
# services/cluster/kubernetes/default.nix still wants to poke flannel,
|
||||
# but since we nuke that module we have to add a fake tunable for it.
|
||||
options.services.kubernetes.flannel = {
|
||||
|
@ -203,15 +197,57 @@ in
|
|||
(mkIf cfg.enable {
|
||||
services.kubernetes.kubelet.seedDockerImages = [infraContainer];
|
||||
|
||||
# Drop crictl into administrative command line.
|
||||
environment.systemPackages = with pkgs; [ cri-tools ];
|
||||
|
||||
# Force disable Docker.
|
||||
virtualisation.docker.enable = false;
|
||||
|
||||
# TODO(q3k): move to unified cgroups (cgroup v2) once we upgrade to
|
||||
# Kubelet 1.19.
|
||||
systemd.enableUnifiedCgroupHierarchy = false;
|
||||
|
||||
# Run containerd service. This is exposes the CRI API that is consumed by
|
||||
# crictl and Kubelet.
|
||||
systemd.services.containerd = {
|
||||
description = "containerd container runtime";
|
||||
wantedBy = [ "kubernetes.target" ];
|
||||
after = [ "network.target" ];
|
||||
path = with pkgs; [ runc iptables ];
|
||||
serviceConfig = {
|
||||
Delegate = "yes";
|
||||
KillMode = "process";
|
||||
Restart = "always";
|
||||
RestartSec = "5";
|
||||
LimitNPROC = "infinity";
|
||||
LimitCORE = "infinity";
|
||||
# https://github.com/coreos/fedora-coreos-tracker/issues/329
|
||||
LimitNOFILE = "1048576";
|
||||
TasksMax = "infinity";
|
||||
OOMScoreAdjust = "-999";
|
||||
|
||||
ExecStart = "${pkgs.containerd}/bin/containerd -c ${./containerd.toml}";
|
||||
};
|
||||
};
|
||||
|
||||
systemd.services.kubelet = {
|
||||
description = "Kubernetes Kubelet Service";
|
||||
wantedBy = [ "kubernetes.target" ];
|
||||
after = [ "network.target" "docker.service" "kube-apiserver.service" ];
|
||||
path = with pkgs; [ gitMinimal openssh docker utillinux iproute ethtool thin-provisioning-tools iptables socat ] ++ top.path;
|
||||
after = [ "network.target" "containerd.service" "kube-apiserver.service" ];
|
||||
path = with pkgs; [ gitMinimal openssh utillinux iproute ethtool thin-provisioning-tools iptables socat cri-tools containerd gzip ] ++ top.path;
|
||||
|
||||
# Mildly hacky - by moving over to OCI image build infrastructure in
|
||||
# NixOS we should be able to get rid of the gunzip.
|
||||
# TODO(q3k): figure this out, check if this is even being used by
|
||||
# kubelet.
|
||||
preStart = ''
|
||||
${concatMapStrings (img: ''
|
||||
echo "Seeding docker image: ${img}"
|
||||
docker load <${img}
|
||||
echo "Seeding OCI image: ${img}"
|
||||
cp ${img} /tmp/image.tar.gz
|
||||
rm -f /tmp/image.tar
|
||||
gunzip /tmp/image.tar.gz
|
||||
ctr -n=k8s.io images import /tmp/image.tar || true
|
||||
rm /tmp/image.tar
|
||||
'') cfg.seedDockerImages}
|
||||
'';
|
||||
serviceConfig = {
|
||||
|
@ -221,6 +257,9 @@ in
|
|||
Restart = "on-failure";
|
||||
RestartSec = "1000ms";
|
||||
ExecStart = ''${cfg.package}/bin/kubelet \
|
||||
--cgroup-driver=systemd \
|
||||
--container-runtime=remote \
|
||||
--container-runtime-endpoint=unix:///var/run/containerd/containerd.sock \
|
||||
--address=${cfg.address} \
|
||||
--authentication-token-webhook \
|
||||
--authentication-token-webhook-cache-ttl="10s" \
|
||||
|
@ -263,7 +302,8 @@ in
|
|||
};
|
||||
};
|
||||
|
||||
boot.kernelModules = ["br_netfilter"];
|
||||
boot.kernelModules = [ "br_netfilter" "overlay" ];
|
||||
boot.kernel.sysctl."net.ipv4.ip_forward" = "1";
|
||||
|
||||
services.kubernetes.kubelet.hostname = with config.networking;
|
||||
mkDefault (hostName + optionalString (domain != null) ".${domain}");
|
||||
|
|
|
@ -30,24 +30,6 @@ in rec {
|
|||
./kubelet.nix
|
||||
];
|
||||
|
||||
# List services that you want to enable:
|
||||
virtualisation.docker.enable = true;
|
||||
virtualisation.docker.extraOptions = "--iptables=false --ip-masq=false --ip-forward=true";
|
||||
|
||||
# Docker 1.13 sets iptables FORWARD to DROP. Unfuck this.
|
||||
systemd.services."docker-iptables-unfuck" = {
|
||||
enable = true;
|
||||
wantedBy = [ "kubernetes.target" ];
|
||||
description = "Docker iptable Unfuck";
|
||||
after = [ "docker.service" ];
|
||||
requires = [ "docker.service" ];
|
||||
path = [ pkgs.iptables ];
|
||||
script = ''
|
||||
iptables -P FORWARD ACCEPT
|
||||
'';
|
||||
serviceConfig.Type = "oneshot";
|
||||
};
|
||||
|
||||
networking.firewall.enable = false;
|
||||
|
||||
# Point k8s apiserver address at ourselves, as every machine runs an apiserver with this cert name.
|
||||
|
|
Loading…
Add table
Reference in a new issue