From 55a486ae49f1c92d9d4658fcd902d53ee568d81d Mon Sep 17 00:00:00 2001 From: Serge Bazanski Date: Sat, 11 Jun 2022 18:27:01 +0000 Subject: [PATCH] cluster: refactor nix machinery to fit //ops This is a chonky refactor that get rids of the previous cluster-centric defs-* plain nix file setup. Now, nodes are configured individually in plain nixos modules, and are provided a view of all other nodes in the 'machines' attribute. Cluster logic is moved into modules which inspect this array to find other nodes within the same cluster. Kubernetes options are not fully clusterified yet (ie., they are still hardcode to only provide the 'k0' cluster) but that can be fixed later. The Ceph machinery is a good example of how that can be done. The new NixOS configs are zero-diff against prod. While this is done mostly by keeping the logic, we had to keep a few newly discovered 'bugs' around by adding some temporary options which keeps things as they are. These will be removed in a future CL, then introducing a diff (but no functional changes, hopefully). We also remove the nix eval from clustercfg as it was not used anymore (basically since we refactored certs at some point). Change-Id: Id79772a96249b0e6344046f96f9c2cb481c4e1f4 Reviewed-on: https://gerrit.hackerspace.pl/c/hscloud/+/1322 Reviewed-by: informatic --- cluster/clustercfg/clustercfg.py | 19 -- cluster/machines/bc01n01.hswaw.net.nix | 39 ++++ cluster/machines/bc01n02.hswaw.net.nix | 36 ++++ cluster/machines/dcr01s22.hswaw.net.nix | 41 ++++ cluster/machines/dcr01s24.hswaw.net.nix | 41 ++++ cluster/machines/modules/base.nix | 100 +++++++++ cluster/{nix => machines}/modules/ceph.nix | 116 +++++++---- .../{nix => machines}/modules/containerd.toml | 0 cluster/machines/modules/kube-common.nix | 94 +++++++++ .../machines/modules/kube-controlplane.nix | 178 ++++++++++++++++ cluster/machines/modules/kube-dataplane.nix | 96 +++++++++ cluster/{nix => machines}/modules/kubelet.nix | 0 cluster/nix/defs-cluster-k0.nix | 130 ------------ cluster/nix/defs-machines.nix | 58 ------ cluster/nix/modules/base.nix | 77 ------- cluster/nix/modules/kubernetes.nix | 195 ------------------ ops/machines.nix | 55 +++-- 17 files changed, 730 insertions(+), 545 deletions(-) create mode 100644 cluster/machines/bc01n01.hswaw.net.nix create mode 100644 cluster/machines/bc01n02.hswaw.net.nix create mode 100644 cluster/machines/dcr01s22.hswaw.net.nix create mode 100644 cluster/machines/dcr01s24.hswaw.net.nix create mode 100644 cluster/machines/modules/base.nix rename cluster/{nix => machines}/modules/ceph.nix (59%) rename cluster/{nix => machines}/modules/containerd.toml (100%) create mode 100644 cluster/machines/modules/kube-common.nix create mode 100644 cluster/machines/modules/kube-controlplane.nix create mode 100644 cluster/machines/modules/kube-dataplane.nix rename cluster/{nix => machines}/modules/kubelet.nix (100%) delete mode 100644 cluster/nix/defs-cluster-k0.nix delete mode 100644 cluster/nix/defs-machines.nix delete mode 100644 cluster/nix/modules/base.nix delete mode 100644 cluster/nix/modules/kubernetes.nix diff --git a/cluster/clustercfg/clustercfg.py b/cluster/clustercfg/clustercfg.py index 7024f4ad..30b87b82 100644 --- a/cluster/clustercfg/clustercfg.py +++ b/cluster/clustercfg/clustercfg.py @@ -38,21 +38,6 @@ sh.setFormatter(formatter) logger.addHandler(sh) - -def pki_config(key, fqdn): - machine_name = fqdn.split('.')[0] - raw = subprocess.check_output([ - 'nix', 'eval', '--raw', '--impure', '--expr', - '( ((import ' + local_root + '/cluster/nix/defs-cluster-k0.nix ) "' + machine_name + '").pki.' + key + '.json )', - ]) - return json.loads(raw) - - -def _file_exists(c, filename): - res = c.run('stat "{}"'.format(filename), warn=True, hide=True) - return res.exited == 0 - - def configure_k8s(username, ca, cert, key): subprocess.check_call([ 'kubectl', 'config', @@ -150,9 +135,6 @@ def nodestrap(args, nocerts=False): r = fabric.Connection('root@{}'.format(fqdn)) if not nocerts: - cfg = dict((k, pki_config(k, fqdn)) for k in [ - 'etcdPeer', 'etcd.server', 'etcd.kube' - ]) certs_root = os.path.join(local_root, 'cluster/certs') # Make etcd peer certificate for node. @@ -187,7 +169,6 @@ def nodestrap(args, nocerts=False): # Make kube component certificates. kube_components = ['controllermanager', 'scheduler', 'proxy'] - cfg = dict((k, pki_config('kube.' + k, fqdn)) for k in kube_components) for k in kube_components: # meh if k == 'controllermanager': diff --git a/cluster/machines/bc01n01.hswaw.net.nix b/cluster/machines/bc01n01.hswaw.net.nix new file mode 100644 index 00000000..defcbca8 --- /dev/null +++ b/cluster/machines/bc01n01.hswaw.net.nix @@ -0,0 +1,39 @@ +{ config, pkgs, ... }: + +with builtins; + +rec { + networking.hostName = "bc01n01"; + # TODO: undefine fqdn and define domain after big nix change + hscloud.base.fqdn = "${networking.hostName}.hswaw.net"; + #networking.domain = "hswaw.net"; + system.stateVersion = "18.09"; + nix.maxJobs = 16; + + boot.loader.grub.device = "/dev/disk/by-id/scsi-360024e8078a9060023b1043107388af5"; + fileSystems."/".device = "/dev/disk/by-uuid/518ecac1-00ea-4ef0-9418-9eca6ce6d918"; + + hscloud.base = { + mgmtIf = "eno1"; + ipAddr = "185.236.240.35"; + ipAddrBits = 28; + gw = "185.236.240.33"; + }; + + hscloud.kube.control.enable = true; + hscloud.kube.data = { + enable = true; + podNet = "10.10.16.0/24"; + }; + + hscloud.ceph = { + name = "k0"; + fsid = "74592dc2-31b7-4dbe-88cf-40459dfeb354"; + enable = true; + }; + + environment.systemPackages = [ + pkgs.shadow + ]; +} + diff --git a/cluster/machines/bc01n02.hswaw.net.nix b/cluster/machines/bc01n02.hswaw.net.nix new file mode 100644 index 00000000..43b61d08 --- /dev/null +++ b/cluster/machines/bc01n02.hswaw.net.nix @@ -0,0 +1,36 @@ +{ config, pkgs, ... }: + +with builtins; + +rec { + networking.hostName = "bc01n02"; + # TODO: undefine fqdn and define domain after big nix change + hscloud.base.fqdn = "${networking.hostName}.hswaw.net"; + #networking.domain = "hswaw.net"; + system.stateVersion = "18.09"; + nix.maxJobs = 16; + + boot.loader.grub.device = "/dev/disk/by-id/scsi-360024e8078b0250023b10f8706d3c99e"; + fileSystems."/".device = "/dev/disk/by-uuid/2d45c87b-029b-463e-a7cb-afd5a3089327"; + + hscloud.base = { + mgmtIf = "eno1"; + ipAddr = "185.236.240.36"; + ipAddrBits = 28; + gw = "185.236.240.33"; + }; + + hscloud.kube = { + control.enable = true; + data.enable = true; + data.podNet = "10.10.17.0/24"; + }; + + hscloud.ceph = { + name = "k0"; + fsid = "74592dc2-31b7-4dbe-88cf-40459dfeb354"; + + control.enable = true; + }; +} + diff --git a/cluster/machines/dcr01s22.hswaw.net.nix b/cluster/machines/dcr01s22.hswaw.net.nix new file mode 100644 index 00000000..742a5413 --- /dev/null +++ b/cluster/machines/dcr01s22.hswaw.net.nix @@ -0,0 +1,41 @@ +{ config, pkgs, ... }: + +with builtins; + +rec { + networking.hostName = "dcr01s22"; + # TODO: undefine fqdn and define domain after big nix change + hscloud.base.fqdn = "${networking.hostName}.hswaw.net"; + #networking.domain = "hswaw.net"; + system.stateVersion = "19.09"; + nix.maxJobs = 48; + + boot.loader.grub.device = "/dev/disk/by-id/ata-Samsung_SSD_860_EVO_250GB_S3YJNX1M604518E"; + fileSystems."/".device = "/dev/disk/by-uuid/b4149083-49fe-4951-a143-aff4cedaf33a"; + + hscloud.base = { + mgmtIf = "enp130s0f0"; + ipAddr = "185.236.240.39"; + ipAddrBits = 28; + gw = "185.236.240.33"; + }; + + hscloud.kube = { + control.enable = true; + data.enable = true; + data.podNet = "10.10.19.0/24"; + }; + + hscloud.ceph = { + name = "k0"; + fsid = "74592dc2-31b7-4dbe-88cf-40459dfeb354"; + + osd.devices = [ + { id = 0; path = "/dev/disk/by-id/scsi-35000c500850293e3"; uuid = "314034c5-474c-4d0d-ba41-36a881c52560";} + { id = 1; path = "/dev/disk/by-id/scsi-35000c500850312cb"; uuid = "a7f1baa0-0fc3-4ab1-9895-67abdc29de03";} + { id = 2; path = "/dev/disk/by-id/scsi-35000c5008508e3ef"; uuid = "11ac8316-6a87-48a7-a0c7-74c3cef6c2fa";} + { id = 3; path = "/dev/disk/by-id/scsi-35000c5008508e23f"; uuid = "c6b838d1-b08c-4788-936c-293041ed2d4d";} + ]; + }; +} + diff --git a/cluster/machines/dcr01s24.hswaw.net.nix b/cluster/machines/dcr01s24.hswaw.net.nix new file mode 100644 index 00000000..c3ad18e6 --- /dev/null +++ b/cluster/machines/dcr01s24.hswaw.net.nix @@ -0,0 +1,41 @@ +{ config, pkgs, ... }: + +with builtins; + +rec { + networking.hostName = "dcr01s24"; + # TODO: undefine fqdn and define domain after big nix change + hscloud.base.fqdn = "${networking.hostName}.hswaw.net"; + #networking.domain = "hswaw.net"; + system.stateVersion = "19.09"; + nix.maxJobs = 48; + + boot.loader.grub.device = "/dev/disk/by-id/ata-Samsung_SSD_860_EVO_250GB_S3YJNF0M717009H"; + fileSystems."/".device = "/dev/disk/by-uuid/fc5c6456-5bbd-4b9e-a93e-7f9073ffe09a"; + + hscloud.base = { + mgmtIf = "enp130s0f0"; + ipAddr = "185.236.240.40"; + ipAddrBits = 28; + gw = "185.236.240.33"; + }; + + hscloud.kube = { + control.enable = true; + data.enable = true; + data.podNet = "10.10.20.0/24"; + }; + + hscloud.ceph = { + name = "k0"; + fsid = "74592dc2-31b7-4dbe-88cf-40459dfeb354"; + + osd.devices = [ + { id = 4; path = "/dev/disk/by-id/scsi-35000c5008509199b"; uuid = "a2b4663d-bd8f-49b3-b0b0-195c56ba252f";} + { id = 5; path = "/dev/disk/by-id/scsi-35000c50085046abf"; uuid = "a2242989-ccce-4367-8813-519b64b5afdb";} + { id = 6; path = "/dev/disk/by-id/scsi-35000c5008502929b"; uuid = "7deac89c-22dd-4c2b-b3cc-43ff7f990fd6";} + { id = 7; path = "/dev/disk/by-id/scsi-35000c5008502a323"; uuid = "e305ebb3-9cac-44d2-9f1d-bbb72c8ab51f";} + ]; + }; +} + diff --git a/cluster/machines/modules/base.nix b/cluster/machines/modules/base.nix new file mode 100644 index 00000000..66335ef8 --- /dev/null +++ b/cluster/machines/modules/base.nix @@ -0,0 +1,100 @@ +{ config, pkgs, lib, ... }: + +with lib; + +let + cfg = config.hscloud.base; + +in { + options.hscloud.base = { + fqdn = mkOption { + type = types.str; + description = "Node's FQDN."; + default = "${config.networking.hostName}.${config.networking.domain}"; + }; + mgmtIf = mkOption { + type = types.str; + description = "Main network interface. Called mgmtIf for legacy reasons."; + }; + ipAddr = mkOption { + type = types.str; + description = "IPv4 address on main network interface."; + }; + ipAddrBits = mkOption { + type = types.int; + description = "IPv4 CIDR mask bits."; + }; + gw = mkOption { + type = types.str; + description = "IPv4 address of gateway."; + }; + }; + config = rec { + boot.loader.grub.enable = true; + boot.loader.grub.version = 2; + + fileSystems."/" = + { # device = ""; needs to be defined + fsType = "ext4"; + }; + swapDevices = [ ]; + + boot.kernelPackages = pkgs.linuxPackages_latest; + boot.kernelParams = [ "boot.shell_on_fail" ]; + boot.kernel.sysctl."net.ipv4.conf.all.rp_filter" = "0"; + boot.kernel.sysctl."net.ipv4.conf.default.rp_filter" = "0"; + boot.initrd.availableKernelModules = [ "uhci_hcd" "ehci_pci" "megaraid_sas" "usb_storage" "usbhid" "sd_mod" "sr_mod" ]; + boot.kernelModules = [ "kvm-intel" ]; + boot.extraModulePackages = []; + hardware.enableRedistributableFirmware = true; + + time.timeZone = "Europe/Warsaw"; + + environment.systemPackages = with pkgs; [ + wget vim htop tcpdump + rxvt_unicode.terminfo + ]; + programs.mtr.enable = true; + + networking.useDHCP = false; + networking.interfaces."${cfg.mgmtIf}" = { + ipv4.addresses = [ + { + address = cfg.ipAddr; + prefixLength = cfg.ipAddrBits; + } + ]; + }; + networking.defaultGateway = cfg.gw; + networking.nameservers = ["185.236.240.1"]; + + # Instead of using nixpkgs from the root/nixos channel, use pkgs pin from this file. + nix.nixPath = [ "nixpkgs=${pkgs.path}" "nixos-config=/etc/nixos/configuration.nix" ]; + + # Otherwise fetchGit nixpkgs pin fails. + systemd.services.nixos-upgrade.path = [ pkgs.git ]; + + # Use Chrony instead of systemd-timesyncd + services.chrony.enable = true; + + # Symlink lvm into /sbin/lvm on activation. This is needed by Rook OSD + # instances running on Kubernetes. + # See: https://github.com/rook/rook/commit/f3c4975e353e3ce3599c958ec6d2cae8ee8f6f61 + system.activationScripts.sbinlvm = + '' + mkdir -m 0755 -p /sbin + ln -sfn ${pkgs.lvm2.bin}/bin/lvm /sbin/lvm + ''; + + # Enable the OpenSSH daemon. + services.openssh.enable = true; + users.users.root.openssh.authorizedKeys.keys = [ + "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDD4VJXAXEHEXZk2dxNwehneuJcEGkfXG/U7z4fO79vDVIENdedtXQUyLyhZJc5RTEfHhQj66FwIqzl7mzBHd9x9PuDp6QAYXrkVNMj48s6JXqZqBvF6H/weRqFMf4a2TZv+hG8D0kpvmLheCwWAVRls7Jofnp/My+yDd57GMdsbG/yFEf6WPMiOnA7hxdSJSVihCsCSw2p8PD4GhBe8CVt7xIuinhutjm9zYBjV78NT8acjDUfJh0B1ODTjs7nuW1CC4jybSe2j/OU3Yczj4AxRxBNWuFxUq+jBo9BfpbKLh+Tt7re+zBkaicM77KM/oV6943JJxgHNBBOsv9scZE7 q3k@amnesia" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIG599UildOrAq+LIOQjKqtGMwjgjIxozI1jtQQRKHtCP q3k@mimeomia" + "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDQb3YQoiYFZLKwvHYKbu1bMqzNeDCAszQhAe1+QI5SLDOotclyY/vFmOReZOsmyMFl71G2d7d+FbYNusUnNNjTxRYQ021tVc+RkMdLJaORRURmQfEFEKbai6QSFTwErXzuoIzyEPK0lbsQuGgqT9WaVnRzHJ2Q/4+qQbxAS34PuR5NqEkmn4G6LMo3OyJ5mwPkCj9lsqz4BcxRaMWFO3mNcwGDfSW+sqgc3E8N6LKrTpZq3ke7xacpQmcG5DU9VO+2QVPdltl9jWbs3gXjmF92YRNOuKPVfAOZBBsp8JOznfx8s9wDgs7RwPmDpjIAJEyoABqW5hlXfqRbTnfnMvuR informatic@InformaticPC" + "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDGkMgEVwQM8yeuFUYL2TwlJIq9yUNBmHnwce46zeL2PK2CkMz7sxT/om7sp/K5XDiqeD05Nioe+Dr3drP6B8uI33S5NgxPIfaqQsRS+CBEgk6cqFlcdlKETU/DT+/WsdoO173n7mgGeafPInEuQuGDUID0Fl099kIxtqfAhdeZFMM6/szAZEZsElLJ8K6dp1Ni/jmnXCZhjivZH3AZUlnqrmtDG7FY1bgcOfDXAal45LItughGPtrdiigXe9DK2fW3+9DBZZduh5DMJTNlphAZ+nfSrbyHVKUg6WsgMSprur4KdU47q1QwzqqvEj75JcdP1jOWoZi4F6VJDte9Wb9lhD1jGgjxY9O6Gs4CH35bx15W7CN9hgNa0C8NbPJe/fZYIeMZmJ1m7O2xmnYwP8j+t7RNJWu7Pa3Em4mOEXvhBF07Zfq+Ye/4SluoRgADy5eII2x5fFo5EBhInxK0/X8wF6XZvysalVifoCh7T4Edejoi91oAxFgYAxbboXGlod0eEHIi2hla8SM9+IBHOChmgawKBYp2kzAJyAmHNBF+Pah9G4arVCj/axp/SJZDZbJQoI7UT/fJzEtvlb5RWrHXRq+y6IvjpUq4pzpDWW04+9UMqEEXRmhWOakHfEVM9rN8h3aJBflLUBBnh0Z/hVsKNh8bCRHaKtah8TrD9i+wMw== patryk.jakuszew@gmail.com" + "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC33naG1ptCvUcRWX9cj9wXM1nW1lyQC4SvMJzWlr9aMD96O8hQ2JMkuIUgUJvorAY02QRplQ2BuoVoVkdkzwjMyi1bL3OdgcKo7Z1yByClGTTocqNJYY0lcUb6EJH8+6e6F9ydrQlSxNzL1uCaA7phZr+yPcmAmWbSfioXn98yXNkE0emHxzJv/nypJY56sDCMC2IXDRd8L2goDtPwgPEW7bWfAQdIFMJ75xOidZOTxJ8eqyXLw/kxY5UlyX66jdoYz1sE5XUHuoQl1AOG9UdlMo0aMhUvP4pX5l7r7EnA9OttKMFB3oWqkVK/R6ynZ52YNOU5BZ9V+Ppaj34W0xNu+p0mbHcCtXYCTrf/OU0hcZDbDaNTjs6Vtcm2wYw9iAKX7Tex+eOMwUwlrlcyPNRV5BTot7lGNYfauHCSIuWJKN4NhCLR/NtVNh4/94eKkPTwJsY6XqDcS7q49wPAs4DAH7BJgsbHPOqygVHrY0YYEfz3Pj0HTxJHQMCP/hQX4fXEGt0BjgoVJbXPAQtPyeg0JuxiUg+b4CgVVfQ6R060MlM1BZzhmh+FY5MJH6nJppS0aHYCvSg8Z68NUlCPKy0jpcyfuAIWQWwSGG1O010WShQG2ELsvNdg5/4HVdCGNl5mmoom6JOd72FOZyQlHDFfeQUQRn9HOeCq/c51rK99SQ== bartek@IHM" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICTR292kx/2CNuWYIsZ6gykQ036aBGrmheIuZa6S1D2x implr@thonk" + ]; + }; +} diff --git a/cluster/nix/modules/ceph.nix b/cluster/machines/modules/ceph.nix similarity index 59% rename from cluster/nix/modules/ceph.nix rename to cluster/machines/modules/ceph.nix index c258f5bc..4f15bdd3 100644 --- a/cluster/nix/modules/ceph.nix +++ b/cluster/machines/modules/ceph.nix @@ -18,23 +18,27 @@ # don't have hundreds of clusters, none of the above is automated, especially # as that kind of automation is quite tricky to do reliably. -{ config, lib, pkgs, ... }: +{ config, lib, pkgs, machines, ... }: -with builtins; with lib; -with (( import ../defs-cluster-k0.nix ) config.networking.hostName); - let + cfg = config.hscloud.ceph; + + allNodes = let + list = mapAttrsToList (_: v: v) machines; + filtered = filter (m: (m.config ? hscloud.ceph) && (m.config.hscloud.ceph.enable)) list; + sorted = sort (a: b: a.config.hscloud.base.fqdn < b.config.hscloud.base.fqdn) filtered; + in sorted; + + monNodes = filter (m: m.config.hscloud.ceph.control.enable) allNodes; + machineName = config.networking.hostName; - isMon = hasAttr machineName cephCluster.mons; - isOsd = hasAttr machineName cephCluster.osds; - hasCeph = isMon || isOsd; # This NixOS Ceph option fragment is present on every machine that runs a # mon, and basically tells the NixOS machinery to run mons/mgrs if needed on # this machine. - cephMonConfig = if isMon then { + cephMonConfig = if cfg.control.enable then { mon = { enable = true; daemons = [ machineName ]; @@ -46,10 +50,10 @@ let } else {}; # Same as for cephMonConfig, but this time for OSDs. - cephOsdConfig = if isOsd then { + cephOsdConfig = if (length cfg.osd.devices) > 0 then { osd = { enable = true; - daemons = map (el: "${toString el.id}") cephCluster.osds.${machineName}; + daemons = map (el: "${toString el.id}") cfg.osd.devices; }; rgw = { enable = true; @@ -57,19 +61,6 @@ let }; } else {}; - # The full option fragment for services.ceph. It contains ceph.conf fragments - # (in .global.*) and merges ceph{Mon,Osd}Config. - cephConfig = { - enable = true; - global = { - fsid = cephCluster.fsid; - clusterName = cephCluster.name; - - # Every Ceph node always attempts to connect to all mons. - monHost = concatStringsSep "," (mapAttrsToList (k: _: machinesByName.${k}.ipAddr) cephCluster.mons); - monInitialMembers = concatStringsSep "," (builtins.attrNames cephCluster.mons); - }; - } // cephMonConfig // cephOsdConfig; # Merge ceph-volume lvm activate into ceph-osd-ID services. # @@ -113,7 +104,7 @@ let ("+" + (toString (pkgs.writeScript "ceph-osd-${osdId}-activate.sh" '' #!/bin/sh set -e - dir="/var/lib/ceph/osd/${cephCluster.name}-${osdId}/" + dir="/var/lib/ceph/osd/${cfg.name}-${osdId}/" disk="${el.path}" uuid="${osdUuid}" if [ -d "$dir" ] && [ -f "$dir"/keyring ]; then @@ -125,25 +116,78 @@ let ''))) - "${pkgs.ceph.lib}/libexec/ceph/ceph-osd-prestart.sh --id ${osdId} --cluster ${cephCluster.name}" + "${pkgs.ceph.lib}/libexec/ceph/ceph-osd-prestart.sh --id ${osdId} --cluster ${cfg.name}" ]; }; unitConfig = { ConditionPathExists = lib.mkForce el.path; }; }; - }) (if isOsd then cephCluster.osds.${machineName} else [])); + }) cfg.osd.devices); in rec { - services.ceph = if hasCeph then cephConfig else {}; + options = { + hscloud.ceph = { + enable = mkOption { + type = types.bool; + description = "Enable Ceph storage cluster (native NixOS), not rook."; + default = ((length cfg.osd.devices) > 0) || cfg.control.enable; + }; + name = mkOption { + type = types.str; + description = "Short identifier of cluster."; + }; + fsid = mkOption { + type = types.str; + description = "UUID of cluster, as generated by first mon."; + }; + control = { + enable = mkEnableOption "mon and mgr on this host"; + }; + osd = { + devices = mkOption { + type = types.listOf (types.submodule { + options = { + id = mkOption { + description = "Numeric ID of OSD."; + type = types.int; + }; + path = mkOption { + description = "Path to underlying block device for OSD storage."; + type = types.str; + }; + uuid = mkOption { + description = "UUID of generated OSD storage."; + type = types.str; + }; + }; + }); + default = []; + }; + }; + }; + }; + config = mkIf cfg.enable { + services.ceph = { + enable = cfg.control.enable || (length cfg.osd.devices) > 0; + global = { + fsid = cfg.fsid; + clusterName = cfg.name; - environment.systemPackages = with pkgs; [ - ceph cryptsetup smartmontools - ]; - - systemd.services = osdActivateServices; - - # Hack - the upstream ceph module should generate ${clusterName}.conf instead - # of ceph.conf, let's just symlink it. - environment.etc."ceph/${cephCluster.name}.conf".source = "/etc/ceph/ceph.conf"; + # Every Ceph node always attempts to connect to all mons. + monHost = concatStringsSep "," (map (n: n.config.hscloud.base.ipAddr) monNodes); + monInitialMembers = concatStringsSep "," (map (n: n.config.networking.hostName) monNodes); + }; + } // cephMonConfig // cephOsdConfig; + + environment.systemPackages = with pkgs; [ + ceph cryptsetup smartmontools + ]; + + systemd.services = osdActivateServices; + + # Hack - the upstream ceph module should generate ${clusterName}.conf instead + # of ceph.conf, let's just symlink it. + environment.etc."ceph/${cfg.name}.conf".source = "/etc/ceph/ceph.conf"; + }; } diff --git a/cluster/nix/modules/containerd.toml b/cluster/machines/modules/containerd.toml similarity index 100% rename from cluster/nix/modules/containerd.toml rename to cluster/machines/modules/containerd.toml diff --git a/cluster/machines/modules/kube-common.nix b/cluster/machines/modules/kube-common.nix new file mode 100644 index 00000000..6707efaf --- /dev/null +++ b/cluster/machines/modules/kube-common.nix @@ -0,0 +1,94 @@ +{ config, pkgs, lib, machines, ... }: + +with lib; + +let + cfg = config.hscloud.kube; + fqdn = config.hscloud.base.fqdn; + +in { + options.hscloud.kube = { + package = mkOption { + description = "Kubernetes package to use for everything but kubelet."; + type = types.package; + default = (import (fetchGit { + # Now at 1.16.5 + name = "nixos-unstable-2020-01-22"; + url = https://github.com/nixos/nixpkgs-channels/; + rev = "a96ed5d70427bdc2fbb9e805784e1b9621157a98"; + }) {}).kubernetes; + defaultText = "pkgs.kubernetes"; + }; + packageKubelet = mkOption { + description = "Kubernetes package to use for kubelet."; + type = types.package; + default = cfg.package; + defaultText = "pkgs.kubernetes"; + }; + portAPIServerSecure = mkOption { + type = types.int; + description = "Port at which k8s apiserver will listen."; + default = 4001; + }; + pki = let + mk = (radix: name: rec { + ca = ./../../certs + "/ca-${radix}.crt"; + cert = ./../../certs + "/${radix}-${name}.cert"; + key = ./../../secrets/plain + "/${radix}-${name}.key"; + }); + mkKube = (name: (mk "kube" name) // { + config = { + server = "https://k0.hswaw.net:${toString cfg.portAPIServerSecure}"; + certFile = (mk "kube" name).cert; + keyFile = (mk "kube" name).key; + }; + }); + in mkOption { + type = types.attrs; + default = { + kube = rec { + ca = apiserver.ca; + + # Used to identify apiserver. + apiserver = mkKube "apiserver"; + + # Used to identify controller-manager. + controllermanager = mkKube "controllermanager"; + + # Used to identify scheduler. + scheduler = mkKube "scheduler"; + + # Used to encrypt service accounts. + serviceaccounts = mkKube "serviceaccounts"; + + # Used to identify kube-proxy. + proxy = mkKube "proxy"; + + # Used to identify kubelet. + kubelet = mkKube "kubelet-${fqdn}"; + }; + + kubeFront = { + apiserver = mk "kubefront" "apiserver"; + }; + + etcd = { + peer = mk "etcdpeer" fqdn; + server = mk "etcd" fqdn; + kube = mk "etcd" "kube"; + }; + }; + }; + }; + + config = { + services.kubernetes = { + # We do not use any nixpkgs predefined roles for k8s. Instead, we enable + # k8s components manually. + roles = []; + caFile = cfg.pki.kube.apiserver.ca; + clusterCidr = "10.10.16.0/20"; + addons.dns.enable = false; + }; + }; +} diff --git a/cluster/machines/modules/kube-controlplane.nix b/cluster/machines/modules/kube-controlplane.nix new file mode 100644 index 00000000..8efda584 --- /dev/null +++ b/cluster/machines/modules/kube-controlplane.nix @@ -0,0 +1,178 @@ +{ config, pkgs, lib, machines, ... }: + +with lib; + +let + cfg = config.hscloud.kube.control; + + # All control plane nodes. + allNodes = let + list = mapAttrsToList (_: v: v) machines; + filtered = filter (m: (m.config ? hscloud.kube.control) && (m.config.hscloud.kube.control.enable)) list; + sorted = sort (a: b: a.config.hscloud.base.fqdn < b.config.hscloud.base.fqdn) filtered; + in sorted; + + # All control plane nodes that aren't the node being evaluated. + otherNodes = (filter (m: m.config.networking.hostName != config.networking.hostName) allNodes); + + fqdn = config.hscloud.base.fqdn; + + pki = config.hscloud.kube.pki; + +in { + imports = [ + ./kube-common.nix + ]; + + options.hscloud.kube.control = { + enable = mkEnableOption "kubernetes control plane"; + portControllerManagerSecure = mkOption { + type = types.int; + description = "Port at which k8s controller-manager will listen."; + default = 4003; + }; + portSchedulerSecure = mkOption { + type = types.int; + description = "Port at which k8s scheduler will listen."; + default = 4005; + }; + }; + + config = mkIf cfg.enable { + networking.firewall.enable = false; + + # Point k8s apiserver address at ourselves, as we _are_ the apiserver. + networking.extraHosts = '' + 127.0.0.1 k0.hswaw.net + ''; + + services.etcd = rec { + enable = true; + name = fqdn; + listenClientUrls = ["https://0.0.0.0:2379"]; + advertiseClientUrls = ["https://${fqdn}:2379"]; + listenPeerUrls = ["https://0.0.0.0:2380"]; + initialAdvertisePeerUrls = ["https://${fqdn}:2380"]; + initialCluster = (map (n: "${n.config.hscloud.base.fqdn}=https://${n.config.hscloud.base.fqdn}:2380") allNodes); + initialClusterState = "existing"; + + clientCertAuth = true; + trustedCaFile = pki.etcd.server.ca; + certFile = pki.etcd.server.cert; + keyFile = pki.etcd.server.key; + + peerClientCertAuth = true; + peerTrustedCaFile = pki.etcd.peer.ca; + peerCertFile = pki.etcd.peer.cert; + peerKeyFile = pki.etcd.peer.key; + + extraConf = { + PEER_CLIENT_CERT_AUTH = "true"; + }; + }; + + # https://github.com/NixOS/nixpkgs/issues/60687 + systemd.services.kube-control-plane-online = { + preStart = pkgs.lib.mkForce ""; + }; + + services.kubernetes = { + package = config.hscloud.kube.package; + # We do not use any nixpkgs predefined roles for k8s. Instead, we enable + # k8s components manually. + roles = []; + addons.dns.enable = false; + caFile = pki.kube.apiserver.ca; + clusterCidr = "10.10.16.0/20"; + + apiserver = rec { + enable = true; + # BUG: should be 0. + insecurePort = 4000; + securePort = config.hscloud.kube.portAPIServerSecure; + advertiseAddress = config.hscloud.base.ipAddr; + + etcd = { + # Only point at our own etcd. + servers = [ "https://${fqdn}:2379" ]; + caFile = pki.etcd.kube.ca; + keyFile = pki.etcd.kube.key; + certFile = pki.etcd.kube.cert; + }; + + tlsCertFile = pki.kube.apiserver.cert; + tlsKeyFile = pki.kube.apiserver.key; + clientCaFile = pki.kube.apiserver.ca; + + kubeletHttps = true; + # Same CA as main APIServer CA. + kubeletClientCaFile = pki.kube.apiserver.ca; + kubeletClientCertFile = pki.kube.apiserver.cert; + kubeletClientKeyFile = pki.kube.apiserver.key; + + serviceAccountKeyFile = pki.kube.serviceaccounts.key; + + allowPrivileged = true; + serviceClusterIpRange = "10.10.12.0/24"; + runtimeConfig = "api/all,authentication.k8s.io/v1beta1"; + authorizationMode = [ + "Node" "RBAC" + ]; + enableAdmissionPlugins = [ + "NamespaceLifecycle" "NodeRestriction" "LimitRanger" "ServiceAccount" + "DefaultStorageClass" "ResourceQuota" "PodSecurityPolicy" + ]; + extraOpts = '' + --apiserver-count=5 \ + --proxy-client-cert-file=${pki.kubeFront.apiserver.cert} \ + --proxy-client-key-file=${pki.kubeFront.apiserver.key} \ + --requestheader-allowed-names= \ + --requestheader-client-ca-file=${pki.kubeFront.apiserver.ca} \ + --requestheader-extra-headers-prefix=X-Remote-Extra- \ + --requestheader-group-headers=X-Remote-Group \ + --requestheader-username-headers=X-Remote-User \ + -v=5 + ''; + }; + + controllerManager = let + top = config.services.kubernetes; + kubeconfig = top.lib.mkKubeConfig "controller-manager" pki.kube.controllermanager.config; + in { + enable = true; + bindAddress = "0.0.0.0"; + insecurePort = 0; + leaderElect = true; + serviceAccountKeyFile = pki.kube.serviceaccounts.key; + rootCaFile = pki.kube.ca; + extraOpts = '' + --service-cluster-ip-range=10.10.12.0/24 \ + --use-service-account-credentials=true \ + --secure-port=${toString cfg.portControllerManagerSecure}\ + --authentication-kubeconfig=${kubeconfig}\ + --authorization-kubeconfig=${kubeconfig}\ + ''; + kubeconfig = pki.kube.controllermanager.config; + }; + + scheduler = let + top = config.services.kubernetes; + # BUG: this should be scheduler + # TODO(q3k): change after big nix change + kubeconfig = top.lib.mkKubeConfig "scheduler" pki.kube.controllermanager.config; + in { + enable = true; + address = "0.0.0.0"; + port = 0; + leaderElect = true; + kubeconfig = pki.kube.scheduler.config; + extraOpts = '' + --secure-port=${toString cfg.portSchedulerSecure}\ + --authentication-kubeconfig=${kubeconfig}\ + --authorization-kubeconfig=${kubeconfig}\ + ''; + }; + }; + }; +} + diff --git a/cluster/machines/modules/kube-dataplane.nix b/cluster/machines/modules/kube-dataplane.nix new file mode 100644 index 00000000..f38ad84d --- /dev/null +++ b/cluster/machines/modules/kube-dataplane.nix @@ -0,0 +1,96 @@ +{ config, pkgs, lib, machines, ... }: + +with lib; + +let + # Pin for kubelet and proxy. + k8spkgs = import (fetchGit { + # Now at 1.16.5 + name = "nixos-unstable-2020-01-22"; + url = https://github.com/nixos/nixpkgs-channels/; + rev = "a96ed5d70427bdc2fbb9e805784e1b9621157a98"; + }) {}; + + cfg = config.hscloud.kube.data; + + # All control plane nodes. + controlNodes = let + list = mapAttrsToList (_: v: v) machines; + filtered = filter (m: (m.config ? hscloud.kube.control) && (m.config.hscloud.kube.control.enable)) list; + sorted = sort (a: b: a.config.hscloud.base.fqdn < b.config.hscloud.base.fqdn) filtered; + in sorted; + + fqdn = config.hscloud.base.fqdn; + + pki = config.hscloud.kube.pki; + +in { + options.hscloud.kube.data = { + enable = mkEnableOption "kubernetes data plane"; + podNet = mkOption { + type = types.str; + description = "Subnet in which this node will run pods. Must be exclusive with podNets of other nodes."; + }; + }; + + # Disable kubelet service and bring in our own override. + # Also nuke flannel from the orbit. + disabledModules = [ + "services/cluster/kubernetes/kubelet.nix" + "services/cluster/kubernetes/flannel.nix" + ]; + + imports = [ + ./kubelet.nix + ./kube-common.nix + ]; + + + config = mkIf cfg.enable { + # If we're not running the control plane, render a hostsfile that points at + # all other control plane nodes. Otherwise, the control plane module will + # make this hostsfile contain the node itself. + networking.extraHosts = mkIf (!config.hscloud.kube.control.enable) (concatStringsSep "\n" (map + (n: '' + ${n.config.hscloud.base.mgmtIf} ${n.config.hscloud.base.fqdn} + '') + controlNodes)); + + # this seems to depend on flannel + # TODO(q3k): file issue + systemd.services.kubelet-online = { + script = pkgs.lib.mkForce "sleep 1"; + }; + + services.kubernetes = { + # The kubelet wants to mkfs.ext4 when mounting pvcs. + path = [ pkgs.e2fsprogs ]; + + proxy = { + enable = true; + kubeconfig = pki.kube.proxy.config; + extraOpts = '' + --hostname-override=${fqdn}\ + --proxy-mode=iptables + ''; + }; + + kubelet = { + enable = true; + unschedulable = false; + hostname = fqdn; + tlsCertFile = pki.kube.kubelet.cert; + tlsKeyFile = pki.kube.kubelet.key; + clientCaFile = pki.kube.kubelet.ca; + nodeIp = config.hscloud.base.ipAddr; + networkPlugin = "cni"; + clusterDns = "10.10.12.254"; + kubeconfig = pki.kube.kubelet.config; + extraOpts = '' + --read-only-port=0 + ''; + package = config.hscloud.kube.packageKubelet; + }; + }; + }; +} diff --git a/cluster/nix/modules/kubelet.nix b/cluster/machines/modules/kubelet.nix similarity index 100% rename from cluster/nix/modules/kubelet.nix rename to cluster/machines/modules/kubelet.nix diff --git a/cluster/nix/defs-cluster-k0.nix b/cluster/nix/defs-cluster-k0.nix deleted file mode 100644 index cd0fcacf..00000000 --- a/cluster/nix/defs-cluster-k0.nix +++ /dev/null @@ -1,130 +0,0 @@ -machineName: - -let - machines = (import ./defs-machines.nix); -in rec { - domain = ".hswaw.net"; - k8sapi = "k0.hswaw.net"; - acmeEmail = "q3k@hackerspace.pl"; - - fqdn = machineName + domain; - machine = (builtins.head (builtins.filter (n: n.fqdn == fqdn) machines)); - otherMachines = (builtins.filter (n: n.fqdn != fqdn) machines); - machinesByName = builtins.listToAttrs (map (m: { name = m.name; value = m; }) machines); - inherit machines; - - # Ceph cluster to run systemd modules for. - cephCluster = { - fsid = "74592dc2-31b7-4dbe-88cf-40459dfeb354"; - name = "k0"; - - # Map from node name to mon configuration (currently always empty). - # - # Each mon also runs a mgr daemon (which is a leader-elected kitchen - # sink^W^Whousekeeping service hanging off of a mon cluster). - # - # Consult the Ceph documentation - # (https://docs.ceph.com/en/pacific/rados/operations/add-or-rm-mons/) on - # how to actually carry out mon-related maintenance operations. - mons = { - bc01n02 = {}; - }; - - # Map from node name to list of disks on node. - # Each disk is: - # id: OSD numerical ID, eg. 0 for osd.0. You get this after running - # ceph-lvm volume create. - # path: Filesystem path for disk backing drive. This should be something - # in /dev/disk/by-id for safety. This is only used to gate OSD - # daemon startup by disk presence. - # uuid: OSD uuid/fsid. You get this after running ceph-lvm volume create. - # - # Quick guide how to set up a new OSD (but please refer to the Ceph manual): - # 0. Copy /var/lib/ceph/bootstrap-osd/k0.keyring from another OSD node to - # the new OSD node, if this is a new node. Remember to chown ceph:ceph - # chmod 0600! - # 1. nix-shell -p ceph lvm2 cryptsetup (if on a node that's not yet an OSD) - # 2. ceph-volume --cluster k0 lvm create --bluestore --data /dev/sdX --no-systemd --dmcrypt - # 3. The above will mount a tmpfs on /var/lib/ceph/osd/k0-X. X is the new - # osd id. A file named fsid inside this directory is the new OSD fsid/uuid. - # 4. Configure osds below with the above information, redeploy node from nix. - osds = { - dcr01s22 = [ - { id = 0; path = "/dev/disk/by-id/scsi-35000c500850293e3"; uuid = "314034c5-474c-4d0d-ba41-36a881c52560";} - { id = 1; path = "/dev/disk/by-id/scsi-35000c500850312cb"; uuid = "a7f1baa0-0fc3-4ab1-9895-67abdc29de03";} - { id = 2; path = "/dev/disk/by-id/scsi-35000c5008508e3ef"; uuid = "11ac8316-6a87-48a7-a0c7-74c3cef6c2fa";} - { id = 3; path = "/dev/disk/by-id/scsi-35000c5008508e23f"; uuid = "c6b838d1-b08c-4788-936c-293041ed2d4d";} - ]; - dcr01s24 = [ - { id = 4; path = "/dev/disk/by-id/scsi-35000c5008509199b"; uuid = "a2b4663d-bd8f-49b3-b0b0-195c56ba252f";} - { id = 5; path = "/dev/disk/by-id/scsi-35000c50085046abf"; uuid = "a2242989-ccce-4367-8813-519b64b5afdb";} - { id = 6; path = "/dev/disk/by-id/scsi-35000c5008502929b"; uuid = "7deac89c-22dd-4c2b-b3cc-43ff7f990fd6";} - { id = 7; path = "/dev/disk/by-id/scsi-35000c5008502a323"; uuid = "e305ebb3-9cac-44d2-9f1d-bbb72c8ab51f";} - ]; - }; - }; - - pki = rec { - make = (radix: name: rec { - ca = ./../certs + "/ca-${radix}.crt"; - cert = ./../certs + "/${radix}-${name}.cert"; - key = ./../secrets/plain + "/${radix}-${name}.key"; - - json = (builtins.toJSON { - ca = (builtins.toString ca); - cert = (builtins.toString cert); - key = (builtins.toString key); - }); - }); - - etcdPeer = (make "etcdpeer" fqdn); - - etcd = { - server = (make "etcd" fqdn); - kube = (make "etcd" "kube"); - }; - - makeKube = (name: (make "kube" name) // { - config = { - server = "https://${k8sapi}:${toString ports.k8sAPIServerSecure}"; - certFile = (make "kube" name).cert; - keyFile = (make "kube" name).key; - }; - }); - - kube = rec { - ca = apiserver.ca; - - # Used to identify apiserver. - apiserver = (makeKube "apiserver"); - - # Used to identify controller-manager. - controllermanager = (makeKube "controllermanager"); - - # Used to identify scheduler. - scheduler = (makeKube "scheduler"); - - # Used to identify kube-proxy. - proxy = (makeKube "proxy"); - - # Used to identify kubelet. - kubelet = (makeKube "kubelet-${fqdn}"); - - # Used to encrypt service accounts. - serviceaccounts = (makeKube "serviceaccounts"); - }; - - kubeFront = { - apiserver = (make "kubefront" "apiserver"); - }; - }; - - ports = { - k8sAPIServerPlain = 4000; - k8sAPIServerSecure = 4001; - k8sControllerManagerPlain = 0; # would be 4002; do not serve plain http - k8sControllerManagerSecure = 4003; - k8sSchedulerPlain = 0; # would be 4004; do not serve plain http - k8sSchedulerSecure = 4005; - }; -} diff --git a/cluster/nix/defs-machines.nix b/cluster/nix/defs-machines.nix deleted file mode 100644 index da9150c1..00000000 --- a/cluster/nix/defs-machines.nix +++ /dev/null @@ -1,58 +0,0 @@ -[ - rec { - name = "bc01n01"; - threads = 16; - fqdn = "${name}.hswaw.net"; - ipAddr = "185.236.240.35"; - ipAddrBits = 28; - gw = "185.236.240.33"; - podNet = "10.10.16.0/24"; - diskBoot = "/dev/disk/by-id/scsi-360024e8078a9060023b1043107388af5"; - fsRoot = "/dev/disk/by-uuid/518ecac1-00ea-4ef0-9418-9eca6ce6d918"; - mgmtIf = "eno1"; - stateVersion = "18.09"; - } - rec { - name = "bc01n02"; - threads = 16; - fqdn = "${name}.hswaw.net"; - ipAddr = "185.236.240.36"; - ipAddrBits = 28; - gw = "185.236.240.33"; - podNet = "10.10.17.0/24"; - diskBoot = "/dev/disk/by-id/scsi-360024e8078b0250023b10f8706d3c99e"; - fsRoot = "/dev/disk/by-uuid/2d45c87b-029b-463e-a7cb-afd5a3089327"; - mgmtIf = "eno1"; - stateVersion = "18.09"; - } - # Tombstone - bc01n03 suffered from hardware failure on 2021/01/10. - # rec { - # name = "bc01n03"; - # } - rec { - name = "dcr01s22"; - threads = 48; - fqdn = "${name}.hswaw.net"; - ipAddr = "185.236.240.39"; - ipAddrBits = 28; - gw = "185.236.240.33"; - podNet = "10.10.19.0/24"; - diskBoot = "/dev/disk/by-id/ata-Samsung_SSD_860_EVO_250GB_S3YJNX1M604518E"; - fsRoot = "/dev/disk/by-uuid/b4149083-49fe-4951-a143-aff4cedaf33a"; - mgmtIf = "enp130s0f0"; - stateVersion = "19.09"; - } - rec { - name = "dcr01s24"; - threads = 48; - fqdn = "${name}.hswaw.net"; - ipAddr = "185.236.240.40"; - ipAddrBits = 28; - gw = "185.236.240.33"; - podNet = "10.10.20.0/24"; - diskBoot = "/dev/disk/by-id/ata-Samsung_SSD_860_EVO_250GB_S3YJNF0M717009H"; - fsRoot = "/dev/disk/by-uuid/fc5c6456-5bbd-4b9e-a93e-7f9073ffe09a"; - mgmtIf = "enp130s0f0"; - stateVersion = "19.09"; - } -] diff --git a/cluster/nix/modules/base.nix b/cluster/nix/modules/base.nix deleted file mode 100644 index 29f2072a..00000000 --- a/cluster/nix/modules/base.nix +++ /dev/null @@ -1,77 +0,0 @@ -{ config, pkgs, lib, ... }: - -with (( import ../defs-cluster-k0.nix ) config.networking.hostName); - -rec { - system.stateVersion = machine.stateVersion; - nix.maxJobs = machine.threads; - - boot.loader.grub.enable = true; - boot.loader.grub.version = 2; - boot.loader.grub.device = machine.diskBoot; - - fileSystems."/" = - { device = machine.fsRoot; - fsType = "ext4"; - }; - swapDevices = [ ]; - - boot.kernelPackages = pkgs.linuxPackages_latest; - boot.kernelParams = [ "boot.shell_on_fail" ]; - boot.kernel.sysctl."net.ipv4.conf.all.rp_filter" = "0"; - boot.kernel.sysctl."net.ipv4.conf.default.rp_filter" = "0"; - boot.initrd.availableKernelModules = [ "uhci_hcd" "ehci_pci" "megaraid_sas" "usb_storage" "usbhid" "sd_mod" "sr_mod" ]; - boot.kernelModules = [ "kvm-intel" ]; - boot.extraModulePackages = []; - hardware.enableRedistributableFirmware = true; - - time.timeZone = "Europe/Warsaw"; - - environment.systemPackages = with pkgs; [ - wget vim htop tcpdump - rxvt_unicode.terminfo - ]; - programs.mtr.enable = true; - - networking.useDHCP = false; - networking.interfaces."${machine.mgmtIf}" = { - ipv4.addresses = [ - { - address = machine.ipAddr; - prefixLength = machine.ipAddrBits; - } - ]; - }; - networking.defaultGateway = machine.gw; - networking.nameservers = ["185.236.240.1"]; - - # Instead of using nixpkgs from the root/nixos channel, use pkgs pin from this file. - nix.nixPath = [ "nixpkgs=${pkgs.path}" "nixos-config=/etc/nixos/configuration.nix" ]; - - # Otherwise fetchGit nixpkgs pin fails. - systemd.services.nixos-upgrade.path = [ pkgs.git ]; - - # Use Chrony instead of systemd-timesyncd - services.chrony.enable = true; - - # Symlink lvm into /sbin/lvm on activation. This is needed by Rook OSD - # instances running on Kubernetes. - # See: https://github.com/rook/rook/commit/f3c4975e353e3ce3599c958ec6d2cae8ee8f6f61 - system.activationScripts.sbinlvm = - '' - mkdir -m 0755 -p /sbin - ln -sfn ${pkgs.lvm2.bin}/bin/lvm /sbin/lvm - ''; - - # Enable the OpenSSH daemon. - services.openssh.enable = true; - users.users.root.openssh.authorizedKeys.keys = [ - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDD4VJXAXEHEXZk2dxNwehneuJcEGkfXG/U7z4fO79vDVIENdedtXQUyLyhZJc5RTEfHhQj66FwIqzl7mzBHd9x9PuDp6QAYXrkVNMj48s6JXqZqBvF6H/weRqFMf4a2TZv+hG8D0kpvmLheCwWAVRls7Jofnp/My+yDd57GMdsbG/yFEf6WPMiOnA7hxdSJSVihCsCSw2p8PD4GhBe8CVt7xIuinhutjm9zYBjV78NT8acjDUfJh0B1ODTjs7nuW1CC4jybSe2j/OU3Yczj4AxRxBNWuFxUq+jBo9BfpbKLh+Tt7re+zBkaicM77KM/oV6943JJxgHNBBOsv9scZE7 q3k@amnesia" - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIG599UildOrAq+LIOQjKqtGMwjgjIxozI1jtQQRKHtCP q3k@mimeomia" - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDQb3YQoiYFZLKwvHYKbu1bMqzNeDCAszQhAe1+QI5SLDOotclyY/vFmOReZOsmyMFl71G2d7d+FbYNusUnNNjTxRYQ021tVc+RkMdLJaORRURmQfEFEKbai6QSFTwErXzuoIzyEPK0lbsQuGgqT9WaVnRzHJ2Q/4+qQbxAS34PuR5NqEkmn4G6LMo3OyJ5mwPkCj9lsqz4BcxRaMWFO3mNcwGDfSW+sqgc3E8N6LKrTpZq3ke7xacpQmcG5DU9VO+2QVPdltl9jWbs3gXjmF92YRNOuKPVfAOZBBsp8JOznfx8s9wDgs7RwPmDpjIAJEyoABqW5hlXfqRbTnfnMvuR informatic@InformaticPC" - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDGkMgEVwQM8yeuFUYL2TwlJIq9yUNBmHnwce46zeL2PK2CkMz7sxT/om7sp/K5XDiqeD05Nioe+Dr3drP6B8uI33S5NgxPIfaqQsRS+CBEgk6cqFlcdlKETU/DT+/WsdoO173n7mgGeafPInEuQuGDUID0Fl099kIxtqfAhdeZFMM6/szAZEZsElLJ8K6dp1Ni/jmnXCZhjivZH3AZUlnqrmtDG7FY1bgcOfDXAal45LItughGPtrdiigXe9DK2fW3+9DBZZduh5DMJTNlphAZ+nfSrbyHVKUg6WsgMSprur4KdU47q1QwzqqvEj75JcdP1jOWoZi4F6VJDte9Wb9lhD1jGgjxY9O6Gs4CH35bx15W7CN9hgNa0C8NbPJe/fZYIeMZmJ1m7O2xmnYwP8j+t7RNJWu7Pa3Em4mOEXvhBF07Zfq+Ye/4SluoRgADy5eII2x5fFo5EBhInxK0/X8wF6XZvysalVifoCh7T4Edejoi91oAxFgYAxbboXGlod0eEHIi2hla8SM9+IBHOChmgawKBYp2kzAJyAmHNBF+Pah9G4arVCj/axp/SJZDZbJQoI7UT/fJzEtvlb5RWrHXRq+y6IvjpUq4pzpDWW04+9UMqEEXRmhWOakHfEVM9rN8h3aJBflLUBBnh0Z/hVsKNh8bCRHaKtah8TrD9i+wMw== patryk.jakuszew@gmail.com" - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC33naG1ptCvUcRWX9cj9wXM1nW1lyQC4SvMJzWlr9aMD96O8hQ2JMkuIUgUJvorAY02QRplQ2BuoVoVkdkzwjMyi1bL3OdgcKo7Z1yByClGTTocqNJYY0lcUb6EJH8+6e6F9ydrQlSxNzL1uCaA7phZr+yPcmAmWbSfioXn98yXNkE0emHxzJv/nypJY56sDCMC2IXDRd8L2goDtPwgPEW7bWfAQdIFMJ75xOidZOTxJ8eqyXLw/kxY5UlyX66jdoYz1sE5XUHuoQl1AOG9UdlMo0aMhUvP4pX5l7r7EnA9OttKMFB3oWqkVK/R6ynZ52YNOU5BZ9V+Ppaj34W0xNu+p0mbHcCtXYCTrf/OU0hcZDbDaNTjs6Vtcm2wYw9iAKX7Tex+eOMwUwlrlcyPNRV5BTot7lGNYfauHCSIuWJKN4NhCLR/NtVNh4/94eKkPTwJsY6XqDcS7q49wPAs4DAH7BJgsbHPOqygVHrY0YYEfz3Pj0HTxJHQMCP/hQX4fXEGt0BjgoVJbXPAQtPyeg0JuxiUg+b4CgVVfQ6R060MlM1BZzhmh+FY5MJH6nJppS0aHYCvSg8Z68NUlCPKy0jpcyfuAIWQWwSGG1O010WShQG2ELsvNdg5/4HVdCGNl5mmoom6JOd72FOZyQlHDFfeQUQRn9HOeCq/c51rK99SQ== bartek@IHM" - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICTR292kx/2CNuWYIsZ6gykQ036aBGrmheIuZa6S1D2x implr@thonk" - ]; - -} diff --git a/cluster/nix/modules/kubernetes.nix b/cluster/nix/modules/kubernetes.nix deleted file mode 100644 index df82effc..00000000 --- a/cluster/nix/modules/kubernetes.nix +++ /dev/null @@ -1,195 +0,0 @@ -{ config, pkgs, lib, ... }: - -with (( import ../defs-cluster-k0.nix ) config.networking.hostName); -let - # Pin for k8s packages. This is so that upagrading the system will not upgrade the k8s control or data planes. - k8spkgs = import (fetchGit { - # Now at 1.16.5 - name = "nixos-unstable-2020-01-22"; - url = https://github.com/nixos/nixpkgs-channels/; - rev = "a96ed5d70427bdc2fbb9e805784e1b9621157a98"; - }) {}; - # Pin for kubelet - k8spkgsKubelet = import (fetchGit { - # Now at 1.16.5 - name = "nixos-unstable-2020-01-22"; - url = https://github.com/nixos/nixpkgs-channels/; - rev = "a96ed5d70427bdc2fbb9e805784e1b9621157a98"; - }) {}; - -in rec { - # Disable kubelet service and bring in our own override. - # Also nuke flannel from the orbit. - disabledModules = [ - "services/cluster/kubernetes/kubelet.nix" - "services/cluster/kubernetes/flannel.nix" - ]; - - imports = - [ - ./kubelet.nix - ]; - - networking.firewall.enable = false; - - # Point k8s apiserver address at ourselves, as every machine runs an apiserver with this cert name. - networking.extraHosts = '' - 127.0.0.1 ${k8sapi} - ''; - - services.etcd = rec { - enable = true; - name = fqdn; - listenClientUrls = ["https://0.0.0.0:2379"]; - advertiseClientUrls = ["https://${fqdn}:2379"]; - listenPeerUrls = ["https://0.0.0.0:2380"]; - initialAdvertisePeerUrls = ["https://${fqdn}:2380"]; - initialCluster = (map (n: "${n.fqdn}=https://${n.fqdn}:2380") machines); - initialClusterState = "existing"; - - clientCertAuth = true; - trustedCaFile = pki.etcd.server.ca; - certFile = pki.etcd.server.cert; - keyFile = pki.etcd.server.key; - - peerClientCertAuth = true; - peerTrustedCaFile = pki.etcdPeer.ca; - peerCertFile = pki.etcdPeer.cert; - peerKeyFile = pki.etcdPeer.key; - - extraConf = { - PEER_CLIENT_CERT_AUTH = "true"; - }; - }; - - services.kubernetes = { - # Pin to specific k8s package. - package = k8spkgs.kubernetes; - roles = []; # We do not use any nixpkgs predefined roles for k8s. Instead, - # we enable k8s components manually. - - caFile = pki.kube.apiserver.ca; - clusterCidr = "10.10.16.0/20"; - - path = [ pkgs.e2fsprogs ]; # kubelet wants to mkfs.ext4 when mounting pvcs - - addons.dns.enable = false; - - apiserver = rec { - enable = true; - insecurePort = ports.k8sAPIServerPlain; - securePort = ports.k8sAPIServerSecure; - advertiseAddress = "${machine.ipAddr}"; - - etcd = { - # https://github.com/kubernetes/kubernetes/issues/72102 - servers = (map (n: "https://${n.fqdn}:2379") ( [ machine ] )); - caFile = pki.etcd.kube.ca; - keyFile = pki.etcd.kube.key; - certFile = pki.etcd.kube.cert; - }; - - tlsCertFile = pki.kube.apiserver.cert; - tlsKeyFile = pki.kube.apiserver.key; - - clientCaFile = pki.kube.apiserver.ca; - - kubeletHttps = true; - kubeletClientCaFile = pki.kube.apiserver.ca; - kubeletClientCertFile = pki.kube.apiserver.cert; - kubeletClientKeyFile = pki.kube.apiserver.key; - - serviceAccountKeyFile = pki.kube.serviceaccounts.key; - - allowPrivileged = true; - serviceClusterIpRange = "10.10.12.0/24"; - runtimeConfig = "api/all,authentication.k8s.io/v1beta1"; - authorizationMode = ["Node" "RBAC"]; - enableAdmissionPlugins = ["NamespaceLifecycle" "NodeRestriction" "LimitRanger" "ServiceAccount" "DefaultStorageClass" "ResourceQuota" "PodSecurityPolicy"]; - extraOpts = '' - --apiserver-count=5 \ - --proxy-client-cert-file=${pki.kubeFront.apiserver.cert} \ - --proxy-client-key-file=${pki.kubeFront.apiserver.key} \ - --requestheader-allowed-names= \ - --requestheader-client-ca-file=${pki.kubeFront.apiserver.ca} \ - --requestheader-extra-headers-prefix=X-Remote-Extra- \ - --requestheader-group-headers=X-Remote-Group \ - --requestheader-username-headers=X-Remote-User \ - -v=5 - ''; - }; - - controllerManager = let - top = config.services.kubernetes; - kubeconfig = top.lib.mkKubeConfig "controller-manager" pki.kube.controllermanager.config; - in { - enable = true; - bindAddress = "0.0.0.0"; - insecurePort = ports.k8sControllerManagerPlain; - leaderElect = true; - serviceAccountKeyFile = pki.kube.serviceaccounts.key; - rootCaFile = pki.kube.ca; - extraOpts = '' - --service-cluster-ip-range=10.10.12.0/24 \ - --use-service-account-credentials=true \ - --secure-port=${toString ports.k8sControllerManagerSecure}\ - --authentication-kubeconfig=${kubeconfig}\ - --authorization-kubeconfig=${kubeconfig}\ - ''; - kubeconfig = pki.kube.controllermanager.config; - }; - - scheduler = let - top = config.services.kubernetes; - kubeconfig = top.lib.mkKubeConfig "scheduler" pki.kube.controllermanager.config; - in { - enable = true; - address = "0.0.0.0"; - port = ports.k8sSchedulerPlain; - leaderElect = true; - kubeconfig = pki.kube.scheduler.config; - extraOpts = '' - --secure-port=${toString ports.k8sSchedulerSecure}\ - --authentication-kubeconfig=${kubeconfig}\ - --authorization-kubeconfig=${kubeconfig}\ - ''; - }; - - proxy = { - enable = true; - kubeconfig = pki.kube.proxy.config; - extraOpts = '' - --hostname-override=${fqdn}\ - --proxy-mode=iptables - ''; - }; - - kubelet = { - enable = true; - unschedulable = false; - hostname = fqdn; - tlsCertFile = pki.kube.kubelet.cert; - tlsKeyFile = pki.kube.kubelet.key; - clientCaFile = pki.kube.kubelet.ca; - nodeIp = machine.ipAddr; - networkPlugin = "cni"; - clusterDns = "10.10.12.254"; - kubeconfig = pki.kube.kubelet.config; - extraOpts = '' - --read-only-port=0 - ''; - package = k8spkgsKubelet.kubernetes; - }; - - }; - - # https://github.com/NixOS/nixpkgs/issues/60687 - systemd.services.kube-control-plane-online = { - preStart = pkgs.lib.mkForce ""; - }; - # this seems to depend on flannel - # TODO(q3k): file issue - systemd.services.kubelet-online = { - script = pkgs.lib.mkForce "sleep 1"; - }; -} diff --git a/ops/machines.nix b/ops/machines.nix index 7dd12321..208279ec 100644 --- a/ops/machines.nix +++ b/ops/machines.nix @@ -60,23 +60,6 @@ let sha256 = "0p7df7yzi35kblxr5ks0rxxp9cfh269g88xpj60sdhdjvfnn6cp7"; }) {}; - # Stopgap measure to import //cluster/nix machine definitions into new - # //ops/ infrastructure. - # - # TODO(q3k): inject defs-cluster-k0.nix / defs-machines.nix content via - # nixos options instead of having module definitions loading it themselves, - # deduplicate list of machines below with defs-machines.nix somehow. - clusterMachineConfig = name: [({ config, pkgs, ...}: { - # The hostname is used by //cluster/nix machinery to load the appropriate - # config from defs-machines into defs-cluster-k0. - networking.hostName = name; - imports = [ - ../cluster/nix/modules/base.nix - ../cluster/nix/modules/kubernetes.nix - ../cluster/nix/modules/ceph.nix - ]; - })]; - # mkMachine builds NixOS modules into a NixOS derivation. # It: # 1) injects passthru.hscloud.provision which deploys that configuration @@ -85,7 +68,7 @@ let # of the hscloud readTree object. It will contain whatever nixpkgs # checkout this file has been invoked with, ie. will not be 'mixed in' # with the pkgs argument. - mkMachine = pkgs: paths: pkgs.nixos ({ config, pkgs, ... }: { + mkMachine = machines: pkgs: paths: pkgs.nixos ({ config, pkgs, ... }: { imports = paths; config = let @@ -139,20 +122,32 @@ let # TODO(q3k): this should be named hscloud, but that seems to not work. Debug and rename. _module.args.workspace = hscloud; + _module.args.machines = machines; }; }); -in { - "bc01n01.hswaw.net" = mkMachine nixpkgsCluster (clusterMachineConfig "bc01n01"); - "bc01n02.hswaw.net" = mkMachine nixpkgsCluster (clusterMachineConfig "bc01n02"); - "dcr01s22.hswaw.net" = mkMachine nixpkgsCluster (clusterMachineConfig "dcr01s22"); - "dcr01s24.hswaw.net" = mkMachine nixpkgsCluster (clusterMachineConfig "dcr01s24"); - "edge01.waw.bgp.wtf" = mkMachine nixpkgsBgpwtf [ - ../bgpwtf/machines/edge01.waw.bgp.wtf.nix - ../bgpwtf/machines/edge01.waw.bgp.wtf-hardware.nix + mkClusterMachine = machines: path: mkMachine machines nixpkgsCluster [ + ../cluster/machines/modules/base.nix + ../cluster/machines/modules/kube-controlplane.nix + ../cluster/machines/modules/kube-dataplane.nix + ../cluster/machines/modules/ceph.nix + path ]; - "customs.hackerspace.pl" = mkMachine pkgs [ - ../hswaw/machines/customs.hackerspace.pl/configuration.nix - ]; -} + machines = self: { + "bc01n01.hswaw.net" = mkClusterMachine self ../cluster/machines/bc01n01.hswaw.net.nix; + "bc01n02.hswaw.net" = mkClusterMachine self ../cluster/machines/bc01n02.hswaw.net.nix; + "dcr01s22.hswaw.net" = mkClusterMachine self ../cluster/machines/dcr01s22.hswaw.net.nix; + "dcr01s24.hswaw.net" = mkClusterMachine self ../cluster/machines/dcr01s24.hswaw.net.nix; + + "edge01.waw.bgp.wtf" = mkMachine self nixpkgsBgpwtf [ + ../bgpwtf/machines/edge01.waw.bgp.wtf.nix + ../bgpwtf/machines/edge01.waw.bgp.wtf-hardware.nix + ]; + + "customs.hackerspace.pl" = mkMachine self pkgs [ + ../hswaw/machines/customs.hackerspace.pl/configuration.nix + ]; + }; + +in pkgs.lib.fix machines