diff --git a/cluster/clustercfg/clustercfg.py b/cluster/clustercfg/clustercfg.py index 7024f4ad..30b87b82 100644 --- a/cluster/clustercfg/clustercfg.py +++ b/cluster/clustercfg/clustercfg.py @@ -38,21 +38,6 @@ sh.setFormatter(formatter) logger.addHandler(sh) - -def pki_config(key, fqdn): - machine_name = fqdn.split('.')[0] - raw = subprocess.check_output([ - 'nix', 'eval', '--raw', '--impure', '--expr', - '( ((import ' + local_root + '/cluster/nix/defs-cluster-k0.nix ) "' + machine_name + '").pki.' + key + '.json )', - ]) - return json.loads(raw) - - -def _file_exists(c, filename): - res = c.run('stat "{}"'.format(filename), warn=True, hide=True) - return res.exited == 0 - - def configure_k8s(username, ca, cert, key): subprocess.check_call([ 'kubectl', 'config', @@ -150,9 +135,6 @@ def nodestrap(args, nocerts=False): r = fabric.Connection('root@{}'.format(fqdn)) if not nocerts: - cfg = dict((k, pki_config(k, fqdn)) for k in [ - 'etcdPeer', 'etcd.server', 'etcd.kube' - ]) certs_root = os.path.join(local_root, 'cluster/certs') # Make etcd peer certificate for node. @@ -187,7 +169,6 @@ def nodestrap(args, nocerts=False): # Make kube component certificates. kube_components = ['controllermanager', 'scheduler', 'proxy'] - cfg = dict((k, pki_config('kube.' + k, fqdn)) for k in kube_components) for k in kube_components: # meh if k == 'controllermanager': diff --git a/cluster/machines/bc01n01.hswaw.net.nix b/cluster/machines/bc01n01.hswaw.net.nix new file mode 100644 index 00000000..defcbca8 --- /dev/null +++ b/cluster/machines/bc01n01.hswaw.net.nix @@ -0,0 +1,39 @@ +{ config, pkgs, ... }: + +with builtins; + +rec { + networking.hostName = "bc01n01"; + # TODO: undefine fqdn and define domain after big nix change + hscloud.base.fqdn = "${networking.hostName}.hswaw.net"; + #networking.domain = "hswaw.net"; + system.stateVersion = "18.09"; + nix.maxJobs = 16; + + boot.loader.grub.device = "/dev/disk/by-id/scsi-360024e8078a9060023b1043107388af5"; + fileSystems."/".device = "/dev/disk/by-uuid/518ecac1-00ea-4ef0-9418-9eca6ce6d918"; + + hscloud.base = { + mgmtIf = "eno1"; + ipAddr = "185.236.240.35"; + ipAddrBits = 28; + gw = "185.236.240.33"; + }; + + hscloud.kube.control.enable = true; + hscloud.kube.data = { + enable = true; + podNet = "10.10.16.0/24"; + }; + + hscloud.ceph = { + name = "k0"; + fsid = "74592dc2-31b7-4dbe-88cf-40459dfeb354"; + enable = true; + }; + + environment.systemPackages = [ + pkgs.shadow + ]; +} + diff --git a/cluster/machines/bc01n02.hswaw.net.nix b/cluster/machines/bc01n02.hswaw.net.nix new file mode 100644 index 00000000..43b61d08 --- /dev/null +++ b/cluster/machines/bc01n02.hswaw.net.nix @@ -0,0 +1,36 @@ +{ config, pkgs, ... }: + +with builtins; + +rec { + networking.hostName = "bc01n02"; + # TODO: undefine fqdn and define domain after big nix change + hscloud.base.fqdn = "${networking.hostName}.hswaw.net"; + #networking.domain = "hswaw.net"; + system.stateVersion = "18.09"; + nix.maxJobs = 16; + + boot.loader.grub.device = "/dev/disk/by-id/scsi-360024e8078b0250023b10f8706d3c99e"; + fileSystems."/".device = "/dev/disk/by-uuid/2d45c87b-029b-463e-a7cb-afd5a3089327"; + + hscloud.base = { + mgmtIf = "eno1"; + ipAddr = "185.236.240.36"; + ipAddrBits = 28; + gw = "185.236.240.33"; + }; + + hscloud.kube = { + control.enable = true; + data.enable = true; + data.podNet = "10.10.17.0/24"; + }; + + hscloud.ceph = { + name = "k0"; + fsid = "74592dc2-31b7-4dbe-88cf-40459dfeb354"; + + control.enable = true; + }; +} + diff --git a/cluster/machines/dcr01s22.hswaw.net.nix b/cluster/machines/dcr01s22.hswaw.net.nix new file mode 100644 index 00000000..742a5413 --- /dev/null +++ b/cluster/machines/dcr01s22.hswaw.net.nix @@ -0,0 +1,41 @@ +{ config, pkgs, ... }: + +with builtins; + +rec { + networking.hostName = "dcr01s22"; + # TODO: undefine fqdn and define domain after big nix change + hscloud.base.fqdn = "${networking.hostName}.hswaw.net"; + #networking.domain = "hswaw.net"; + system.stateVersion = "19.09"; + nix.maxJobs = 48; + + boot.loader.grub.device = "/dev/disk/by-id/ata-Samsung_SSD_860_EVO_250GB_S3YJNX1M604518E"; + fileSystems."/".device = "/dev/disk/by-uuid/b4149083-49fe-4951-a143-aff4cedaf33a"; + + hscloud.base = { + mgmtIf = "enp130s0f0"; + ipAddr = "185.236.240.39"; + ipAddrBits = 28; + gw = "185.236.240.33"; + }; + + hscloud.kube = { + control.enable = true; + data.enable = true; + data.podNet = "10.10.19.0/24"; + }; + + hscloud.ceph = { + name = "k0"; + fsid = "74592dc2-31b7-4dbe-88cf-40459dfeb354"; + + osd.devices = [ + { id = 0; path = "/dev/disk/by-id/scsi-35000c500850293e3"; uuid = "314034c5-474c-4d0d-ba41-36a881c52560";} + { id = 1; path = "/dev/disk/by-id/scsi-35000c500850312cb"; uuid = "a7f1baa0-0fc3-4ab1-9895-67abdc29de03";} + { id = 2; path = "/dev/disk/by-id/scsi-35000c5008508e3ef"; uuid = "11ac8316-6a87-48a7-a0c7-74c3cef6c2fa";} + { id = 3; path = "/dev/disk/by-id/scsi-35000c5008508e23f"; uuid = "c6b838d1-b08c-4788-936c-293041ed2d4d";} + ]; + }; +} + diff --git a/cluster/machines/dcr01s24.hswaw.net.nix b/cluster/machines/dcr01s24.hswaw.net.nix new file mode 100644 index 00000000..c3ad18e6 --- /dev/null +++ b/cluster/machines/dcr01s24.hswaw.net.nix @@ -0,0 +1,41 @@ +{ config, pkgs, ... }: + +with builtins; + +rec { + networking.hostName = "dcr01s24"; + # TODO: undefine fqdn and define domain after big nix change + hscloud.base.fqdn = "${networking.hostName}.hswaw.net"; + #networking.domain = "hswaw.net"; + system.stateVersion = "19.09"; + nix.maxJobs = 48; + + boot.loader.grub.device = "/dev/disk/by-id/ata-Samsung_SSD_860_EVO_250GB_S3YJNF0M717009H"; + fileSystems."/".device = "/dev/disk/by-uuid/fc5c6456-5bbd-4b9e-a93e-7f9073ffe09a"; + + hscloud.base = { + mgmtIf = "enp130s0f0"; + ipAddr = "185.236.240.40"; + ipAddrBits = 28; + gw = "185.236.240.33"; + }; + + hscloud.kube = { + control.enable = true; + data.enable = true; + data.podNet = "10.10.20.0/24"; + }; + + hscloud.ceph = { + name = "k0"; + fsid = "74592dc2-31b7-4dbe-88cf-40459dfeb354"; + + osd.devices = [ + { id = 4; path = "/dev/disk/by-id/scsi-35000c5008509199b"; uuid = "a2b4663d-bd8f-49b3-b0b0-195c56ba252f";} + { id = 5; path = "/dev/disk/by-id/scsi-35000c50085046abf"; uuid = "a2242989-ccce-4367-8813-519b64b5afdb";} + { id = 6; path = "/dev/disk/by-id/scsi-35000c5008502929b"; uuid = "7deac89c-22dd-4c2b-b3cc-43ff7f990fd6";} + { id = 7; path = "/dev/disk/by-id/scsi-35000c5008502a323"; uuid = "e305ebb3-9cac-44d2-9f1d-bbb72c8ab51f";} + ]; + }; +} + diff --git a/cluster/machines/modules/base.nix b/cluster/machines/modules/base.nix new file mode 100644 index 00000000..66335ef8 --- /dev/null +++ b/cluster/machines/modules/base.nix @@ -0,0 +1,100 @@ +{ config, pkgs, lib, ... }: + +with lib; + +let + cfg = config.hscloud.base; + +in { + options.hscloud.base = { + fqdn = mkOption { + type = types.str; + description = "Node's FQDN."; + default = "${config.networking.hostName}.${config.networking.domain}"; + }; + mgmtIf = mkOption { + type = types.str; + description = "Main network interface. Called mgmtIf for legacy reasons."; + }; + ipAddr = mkOption { + type = types.str; + description = "IPv4 address on main network interface."; + }; + ipAddrBits = mkOption { + type = types.int; + description = "IPv4 CIDR mask bits."; + }; + gw = mkOption { + type = types.str; + description = "IPv4 address of gateway."; + }; + }; + config = rec { + boot.loader.grub.enable = true; + boot.loader.grub.version = 2; + + fileSystems."/" = + { # device = ""; needs to be defined + fsType = "ext4"; + }; + swapDevices = [ ]; + + boot.kernelPackages = pkgs.linuxPackages_latest; + boot.kernelParams = [ "boot.shell_on_fail" ]; + boot.kernel.sysctl."net.ipv4.conf.all.rp_filter" = "0"; + boot.kernel.sysctl."net.ipv4.conf.default.rp_filter" = "0"; + boot.initrd.availableKernelModules = [ "uhci_hcd" "ehci_pci" "megaraid_sas" "usb_storage" "usbhid" "sd_mod" "sr_mod" ]; + boot.kernelModules = [ "kvm-intel" ]; + boot.extraModulePackages = []; + hardware.enableRedistributableFirmware = true; + + time.timeZone = "Europe/Warsaw"; + + environment.systemPackages = with pkgs; [ + wget vim htop tcpdump + rxvt_unicode.terminfo + ]; + programs.mtr.enable = true; + + networking.useDHCP = false; + networking.interfaces."${cfg.mgmtIf}" = { + ipv4.addresses = [ + { + address = cfg.ipAddr; + prefixLength = cfg.ipAddrBits; + } + ]; + }; + networking.defaultGateway = cfg.gw; + networking.nameservers = ["185.236.240.1"]; + + # Instead of using nixpkgs from the root/nixos channel, use pkgs pin from this file. + nix.nixPath = [ "nixpkgs=${pkgs.path}" "nixos-config=/etc/nixos/configuration.nix" ]; + + # Otherwise fetchGit nixpkgs pin fails. + systemd.services.nixos-upgrade.path = [ pkgs.git ]; + + # Use Chrony instead of systemd-timesyncd + services.chrony.enable = true; + + # Symlink lvm into /sbin/lvm on activation. This is needed by Rook OSD + # instances running on Kubernetes. + # See: https://github.com/rook/rook/commit/f3c4975e353e3ce3599c958ec6d2cae8ee8f6f61 + system.activationScripts.sbinlvm = + '' + mkdir -m 0755 -p /sbin + ln -sfn ${pkgs.lvm2.bin}/bin/lvm /sbin/lvm + ''; + + # Enable the OpenSSH daemon. + services.openssh.enable = true; + users.users.root.openssh.authorizedKeys.keys = [ + "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDD4VJXAXEHEXZk2dxNwehneuJcEGkfXG/U7z4fO79vDVIENdedtXQUyLyhZJc5RTEfHhQj66FwIqzl7mzBHd9x9PuDp6QAYXrkVNMj48s6JXqZqBvF6H/weRqFMf4a2TZv+hG8D0kpvmLheCwWAVRls7Jofnp/My+yDd57GMdsbG/yFEf6WPMiOnA7hxdSJSVihCsCSw2p8PD4GhBe8CVt7xIuinhutjm9zYBjV78NT8acjDUfJh0B1ODTjs7nuW1CC4jybSe2j/OU3Yczj4AxRxBNWuFxUq+jBo9BfpbKLh+Tt7re+zBkaicM77KM/oV6943JJxgHNBBOsv9scZE7 q3k@amnesia" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIG599UildOrAq+LIOQjKqtGMwjgjIxozI1jtQQRKHtCP q3k@mimeomia" + "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDQb3YQoiYFZLKwvHYKbu1bMqzNeDCAszQhAe1+QI5SLDOotclyY/vFmOReZOsmyMFl71G2d7d+FbYNusUnNNjTxRYQ021tVc+RkMdLJaORRURmQfEFEKbai6QSFTwErXzuoIzyEPK0lbsQuGgqT9WaVnRzHJ2Q/4+qQbxAS34PuR5NqEkmn4G6LMo3OyJ5mwPkCj9lsqz4BcxRaMWFO3mNcwGDfSW+sqgc3E8N6LKrTpZq3ke7xacpQmcG5DU9VO+2QVPdltl9jWbs3gXjmF92YRNOuKPVfAOZBBsp8JOznfx8s9wDgs7RwPmDpjIAJEyoABqW5hlXfqRbTnfnMvuR informatic@InformaticPC" + "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDGkMgEVwQM8yeuFUYL2TwlJIq9yUNBmHnwce46zeL2PK2CkMz7sxT/om7sp/K5XDiqeD05Nioe+Dr3drP6B8uI33S5NgxPIfaqQsRS+CBEgk6cqFlcdlKETU/DT+/WsdoO173n7mgGeafPInEuQuGDUID0Fl099kIxtqfAhdeZFMM6/szAZEZsElLJ8K6dp1Ni/jmnXCZhjivZH3AZUlnqrmtDG7FY1bgcOfDXAal45LItughGPtrdiigXe9DK2fW3+9DBZZduh5DMJTNlphAZ+nfSrbyHVKUg6WsgMSprur4KdU47q1QwzqqvEj75JcdP1jOWoZi4F6VJDte9Wb9lhD1jGgjxY9O6Gs4CH35bx15W7CN9hgNa0C8NbPJe/fZYIeMZmJ1m7O2xmnYwP8j+t7RNJWu7Pa3Em4mOEXvhBF07Zfq+Ye/4SluoRgADy5eII2x5fFo5EBhInxK0/X8wF6XZvysalVifoCh7T4Edejoi91oAxFgYAxbboXGlod0eEHIi2hla8SM9+IBHOChmgawKBYp2kzAJyAmHNBF+Pah9G4arVCj/axp/SJZDZbJQoI7UT/fJzEtvlb5RWrHXRq+y6IvjpUq4pzpDWW04+9UMqEEXRmhWOakHfEVM9rN8h3aJBflLUBBnh0Z/hVsKNh8bCRHaKtah8TrD9i+wMw== patryk.jakuszew@gmail.com" + "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC33naG1ptCvUcRWX9cj9wXM1nW1lyQC4SvMJzWlr9aMD96O8hQ2JMkuIUgUJvorAY02QRplQ2BuoVoVkdkzwjMyi1bL3OdgcKo7Z1yByClGTTocqNJYY0lcUb6EJH8+6e6F9ydrQlSxNzL1uCaA7phZr+yPcmAmWbSfioXn98yXNkE0emHxzJv/nypJY56sDCMC2IXDRd8L2goDtPwgPEW7bWfAQdIFMJ75xOidZOTxJ8eqyXLw/kxY5UlyX66jdoYz1sE5XUHuoQl1AOG9UdlMo0aMhUvP4pX5l7r7EnA9OttKMFB3oWqkVK/R6ynZ52YNOU5BZ9V+Ppaj34W0xNu+p0mbHcCtXYCTrf/OU0hcZDbDaNTjs6Vtcm2wYw9iAKX7Tex+eOMwUwlrlcyPNRV5BTot7lGNYfauHCSIuWJKN4NhCLR/NtVNh4/94eKkPTwJsY6XqDcS7q49wPAs4DAH7BJgsbHPOqygVHrY0YYEfz3Pj0HTxJHQMCP/hQX4fXEGt0BjgoVJbXPAQtPyeg0JuxiUg+b4CgVVfQ6R060MlM1BZzhmh+FY5MJH6nJppS0aHYCvSg8Z68NUlCPKy0jpcyfuAIWQWwSGG1O010WShQG2ELsvNdg5/4HVdCGNl5mmoom6JOd72FOZyQlHDFfeQUQRn9HOeCq/c51rK99SQ== bartek@IHM" + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICTR292kx/2CNuWYIsZ6gykQ036aBGrmheIuZa6S1D2x implr@thonk" + ]; + }; +} diff --git a/cluster/nix/modules/ceph.nix b/cluster/machines/modules/ceph.nix similarity index 59% rename from cluster/nix/modules/ceph.nix rename to cluster/machines/modules/ceph.nix index c258f5bc..4f15bdd3 100644 --- a/cluster/nix/modules/ceph.nix +++ b/cluster/machines/modules/ceph.nix @@ -18,23 +18,27 @@ # don't have hundreds of clusters, none of the above is automated, especially # as that kind of automation is quite tricky to do reliably. -{ config, lib, pkgs, ... }: +{ config, lib, pkgs, machines, ... }: -with builtins; with lib; -with (( import ../defs-cluster-k0.nix ) config.networking.hostName); - let + cfg = config.hscloud.ceph; + + allNodes = let + list = mapAttrsToList (_: v: v) machines; + filtered = filter (m: (m.config ? hscloud.ceph) && (m.config.hscloud.ceph.enable)) list; + sorted = sort (a: b: a.config.hscloud.base.fqdn < b.config.hscloud.base.fqdn) filtered; + in sorted; + + monNodes = filter (m: m.config.hscloud.ceph.control.enable) allNodes; + machineName = config.networking.hostName; - isMon = hasAttr machineName cephCluster.mons; - isOsd = hasAttr machineName cephCluster.osds; - hasCeph = isMon || isOsd; # This NixOS Ceph option fragment is present on every machine that runs a # mon, and basically tells the NixOS machinery to run mons/mgrs if needed on # this machine. - cephMonConfig = if isMon then { + cephMonConfig = if cfg.control.enable then { mon = { enable = true; daemons = [ machineName ]; @@ -46,10 +50,10 @@ let } else {}; # Same as for cephMonConfig, but this time for OSDs. - cephOsdConfig = if isOsd then { + cephOsdConfig = if (length cfg.osd.devices) > 0 then { osd = { enable = true; - daemons = map (el: "${toString el.id}") cephCluster.osds.${machineName}; + daemons = map (el: "${toString el.id}") cfg.osd.devices; }; rgw = { enable = true; @@ -57,19 +61,6 @@ let }; } else {}; - # The full option fragment for services.ceph. It contains ceph.conf fragments - # (in .global.*) and merges ceph{Mon,Osd}Config. - cephConfig = { - enable = true; - global = { - fsid = cephCluster.fsid; - clusterName = cephCluster.name; - - # Every Ceph node always attempts to connect to all mons. - monHost = concatStringsSep "," (mapAttrsToList (k: _: machinesByName.${k}.ipAddr) cephCluster.mons); - monInitialMembers = concatStringsSep "," (builtins.attrNames cephCluster.mons); - }; - } // cephMonConfig // cephOsdConfig; # Merge ceph-volume lvm activate into ceph-osd-ID services. # @@ -113,7 +104,7 @@ let ("+" + (toString (pkgs.writeScript "ceph-osd-${osdId}-activate.sh" '' #!/bin/sh set -e - dir="/var/lib/ceph/osd/${cephCluster.name}-${osdId}/" + dir="/var/lib/ceph/osd/${cfg.name}-${osdId}/" disk="${el.path}" uuid="${osdUuid}" if [ -d "$dir" ] && [ -f "$dir"/keyring ]; then @@ -125,25 +116,78 @@ let ''))) - "${pkgs.ceph.lib}/libexec/ceph/ceph-osd-prestart.sh --id ${osdId} --cluster ${cephCluster.name}" + "${pkgs.ceph.lib}/libexec/ceph/ceph-osd-prestart.sh --id ${osdId} --cluster ${cfg.name}" ]; }; unitConfig = { ConditionPathExists = lib.mkForce el.path; }; }; - }) (if isOsd then cephCluster.osds.${machineName} else [])); + }) cfg.osd.devices); in rec { - services.ceph = if hasCeph then cephConfig else {}; + options = { + hscloud.ceph = { + enable = mkOption { + type = types.bool; + description = "Enable Ceph storage cluster (native NixOS), not rook."; + default = ((length cfg.osd.devices) > 0) || cfg.control.enable; + }; + name = mkOption { + type = types.str; + description = "Short identifier of cluster."; + }; + fsid = mkOption { + type = types.str; + description = "UUID of cluster, as generated by first mon."; + }; + control = { + enable = mkEnableOption "mon and mgr on this host"; + }; + osd = { + devices = mkOption { + type = types.listOf (types.submodule { + options = { + id = mkOption { + description = "Numeric ID of OSD."; + type = types.int; + }; + path = mkOption { + description = "Path to underlying block device for OSD storage."; + type = types.str; + }; + uuid = mkOption { + description = "UUID of generated OSD storage."; + type = types.str; + }; + }; + }); + default = []; + }; + }; + }; + }; + config = mkIf cfg.enable { + services.ceph = { + enable = cfg.control.enable || (length cfg.osd.devices) > 0; + global = { + fsid = cfg.fsid; + clusterName = cfg.name; - environment.systemPackages = with pkgs; [ - ceph cryptsetup smartmontools - ]; - - systemd.services = osdActivateServices; - - # Hack - the upstream ceph module should generate ${clusterName}.conf instead - # of ceph.conf, let's just symlink it. - environment.etc."ceph/${cephCluster.name}.conf".source = "/etc/ceph/ceph.conf"; + # Every Ceph node always attempts to connect to all mons. + monHost = concatStringsSep "," (map (n: n.config.hscloud.base.ipAddr) monNodes); + monInitialMembers = concatStringsSep "," (map (n: n.config.networking.hostName) monNodes); + }; + } // cephMonConfig // cephOsdConfig; + + environment.systemPackages = with pkgs; [ + ceph cryptsetup smartmontools + ]; + + systemd.services = osdActivateServices; + + # Hack - the upstream ceph module should generate ${clusterName}.conf instead + # of ceph.conf, let's just symlink it. + environment.etc."ceph/${cfg.name}.conf".source = "/etc/ceph/ceph.conf"; + }; } diff --git a/cluster/nix/modules/containerd.toml b/cluster/machines/modules/containerd.toml similarity index 100% rename from cluster/nix/modules/containerd.toml rename to cluster/machines/modules/containerd.toml diff --git a/cluster/machines/modules/kube-common.nix b/cluster/machines/modules/kube-common.nix new file mode 100644 index 00000000..6707efaf --- /dev/null +++ b/cluster/machines/modules/kube-common.nix @@ -0,0 +1,94 @@ +{ config, pkgs, lib, machines, ... }: + +with lib; + +let + cfg = config.hscloud.kube; + fqdn = config.hscloud.base.fqdn; + +in { + options.hscloud.kube = { + package = mkOption { + description = "Kubernetes package to use for everything but kubelet."; + type = types.package; + default = (import (fetchGit { + # Now at 1.16.5 + name = "nixos-unstable-2020-01-22"; + url = https://github.com/nixos/nixpkgs-channels/; + rev = "a96ed5d70427bdc2fbb9e805784e1b9621157a98"; + }) {}).kubernetes; + defaultText = "pkgs.kubernetes"; + }; + packageKubelet = mkOption { + description = "Kubernetes package to use for kubelet."; + type = types.package; + default = cfg.package; + defaultText = "pkgs.kubernetes"; + }; + portAPIServerSecure = mkOption { + type = types.int; + description = "Port at which k8s apiserver will listen."; + default = 4001; + }; + pki = let + mk = (radix: name: rec { + ca = ./../../certs + "/ca-${radix}.crt"; + cert = ./../../certs + "/${radix}-${name}.cert"; + key = ./../../secrets/plain + "/${radix}-${name}.key"; + }); + mkKube = (name: (mk "kube" name) // { + config = { + server = "https://k0.hswaw.net:${toString cfg.portAPIServerSecure}"; + certFile = (mk "kube" name).cert; + keyFile = (mk "kube" name).key; + }; + }); + in mkOption { + type = types.attrs; + default = { + kube = rec { + ca = apiserver.ca; + + # Used to identify apiserver. + apiserver = mkKube "apiserver"; + + # Used to identify controller-manager. + controllermanager = mkKube "controllermanager"; + + # Used to identify scheduler. + scheduler = mkKube "scheduler"; + + # Used to encrypt service accounts. + serviceaccounts = mkKube "serviceaccounts"; + + # Used to identify kube-proxy. + proxy = mkKube "proxy"; + + # Used to identify kubelet. + kubelet = mkKube "kubelet-${fqdn}"; + }; + + kubeFront = { + apiserver = mk "kubefront" "apiserver"; + }; + + etcd = { + peer = mk "etcdpeer" fqdn; + server = mk "etcd" fqdn; + kube = mk "etcd" "kube"; + }; + }; + }; + }; + + config = { + services.kubernetes = { + # We do not use any nixpkgs predefined roles for k8s. Instead, we enable + # k8s components manually. + roles = []; + caFile = cfg.pki.kube.apiserver.ca; + clusterCidr = "10.10.16.0/20"; + addons.dns.enable = false; + }; + }; +} diff --git a/cluster/machines/modules/kube-controlplane.nix b/cluster/machines/modules/kube-controlplane.nix new file mode 100644 index 00000000..8efda584 --- /dev/null +++ b/cluster/machines/modules/kube-controlplane.nix @@ -0,0 +1,178 @@ +{ config, pkgs, lib, machines, ... }: + +with lib; + +let + cfg = config.hscloud.kube.control; + + # All control plane nodes. + allNodes = let + list = mapAttrsToList (_: v: v) machines; + filtered = filter (m: (m.config ? hscloud.kube.control) && (m.config.hscloud.kube.control.enable)) list; + sorted = sort (a: b: a.config.hscloud.base.fqdn < b.config.hscloud.base.fqdn) filtered; + in sorted; + + # All control plane nodes that aren't the node being evaluated. + otherNodes = (filter (m: m.config.networking.hostName != config.networking.hostName) allNodes); + + fqdn = config.hscloud.base.fqdn; + + pki = config.hscloud.kube.pki; + +in { + imports = [ + ./kube-common.nix + ]; + + options.hscloud.kube.control = { + enable = mkEnableOption "kubernetes control plane"; + portControllerManagerSecure = mkOption { + type = types.int; + description = "Port at which k8s controller-manager will listen."; + default = 4003; + }; + portSchedulerSecure = mkOption { + type = types.int; + description = "Port at which k8s scheduler will listen."; + default = 4005; + }; + }; + + config = mkIf cfg.enable { + networking.firewall.enable = false; + + # Point k8s apiserver address at ourselves, as we _are_ the apiserver. + networking.extraHosts = '' + 127.0.0.1 k0.hswaw.net + ''; + + services.etcd = rec { + enable = true; + name = fqdn; + listenClientUrls = ["https://0.0.0.0:2379"]; + advertiseClientUrls = ["https://${fqdn}:2379"]; + listenPeerUrls = ["https://0.0.0.0:2380"]; + initialAdvertisePeerUrls = ["https://${fqdn}:2380"]; + initialCluster = (map (n: "${n.config.hscloud.base.fqdn}=https://${n.config.hscloud.base.fqdn}:2380") allNodes); + initialClusterState = "existing"; + + clientCertAuth = true; + trustedCaFile = pki.etcd.server.ca; + certFile = pki.etcd.server.cert; + keyFile = pki.etcd.server.key; + + peerClientCertAuth = true; + peerTrustedCaFile = pki.etcd.peer.ca; + peerCertFile = pki.etcd.peer.cert; + peerKeyFile = pki.etcd.peer.key; + + extraConf = { + PEER_CLIENT_CERT_AUTH = "true"; + }; + }; + + # https://github.com/NixOS/nixpkgs/issues/60687 + systemd.services.kube-control-plane-online = { + preStart = pkgs.lib.mkForce ""; + }; + + services.kubernetes = { + package = config.hscloud.kube.package; + # We do not use any nixpkgs predefined roles for k8s. Instead, we enable + # k8s components manually. + roles = []; + addons.dns.enable = false; + caFile = pki.kube.apiserver.ca; + clusterCidr = "10.10.16.0/20"; + + apiserver = rec { + enable = true; + # BUG: should be 0. + insecurePort = 4000; + securePort = config.hscloud.kube.portAPIServerSecure; + advertiseAddress = config.hscloud.base.ipAddr; + + etcd = { + # Only point at our own etcd. + servers = [ "https://${fqdn}:2379" ]; + caFile = pki.etcd.kube.ca; + keyFile = pki.etcd.kube.key; + certFile = pki.etcd.kube.cert; + }; + + tlsCertFile = pki.kube.apiserver.cert; + tlsKeyFile = pki.kube.apiserver.key; + clientCaFile = pki.kube.apiserver.ca; + + kubeletHttps = true; + # Same CA as main APIServer CA. + kubeletClientCaFile = pki.kube.apiserver.ca; + kubeletClientCertFile = pki.kube.apiserver.cert; + kubeletClientKeyFile = pki.kube.apiserver.key; + + serviceAccountKeyFile = pki.kube.serviceaccounts.key; + + allowPrivileged = true; + serviceClusterIpRange = "10.10.12.0/24"; + runtimeConfig = "api/all,authentication.k8s.io/v1beta1"; + authorizationMode = [ + "Node" "RBAC" + ]; + enableAdmissionPlugins = [ + "NamespaceLifecycle" "NodeRestriction" "LimitRanger" "ServiceAccount" + "DefaultStorageClass" "ResourceQuota" "PodSecurityPolicy" + ]; + extraOpts = '' + --apiserver-count=5 \ + --proxy-client-cert-file=${pki.kubeFront.apiserver.cert} \ + --proxy-client-key-file=${pki.kubeFront.apiserver.key} \ + --requestheader-allowed-names= \ + --requestheader-client-ca-file=${pki.kubeFront.apiserver.ca} \ + --requestheader-extra-headers-prefix=X-Remote-Extra- \ + --requestheader-group-headers=X-Remote-Group \ + --requestheader-username-headers=X-Remote-User \ + -v=5 + ''; + }; + + controllerManager = let + top = config.services.kubernetes; + kubeconfig = top.lib.mkKubeConfig "controller-manager" pki.kube.controllermanager.config; + in { + enable = true; + bindAddress = "0.0.0.0"; + insecurePort = 0; + leaderElect = true; + serviceAccountKeyFile = pki.kube.serviceaccounts.key; + rootCaFile = pki.kube.ca; + extraOpts = '' + --service-cluster-ip-range=10.10.12.0/24 \ + --use-service-account-credentials=true \ + --secure-port=${toString cfg.portControllerManagerSecure}\ + --authentication-kubeconfig=${kubeconfig}\ + --authorization-kubeconfig=${kubeconfig}\ + ''; + kubeconfig = pki.kube.controllermanager.config; + }; + + scheduler = let + top = config.services.kubernetes; + # BUG: this should be scheduler + # TODO(q3k): change after big nix change + kubeconfig = top.lib.mkKubeConfig "scheduler" pki.kube.controllermanager.config; + in { + enable = true; + address = "0.0.0.0"; + port = 0; + leaderElect = true; + kubeconfig = pki.kube.scheduler.config; + extraOpts = '' + --secure-port=${toString cfg.portSchedulerSecure}\ + --authentication-kubeconfig=${kubeconfig}\ + --authorization-kubeconfig=${kubeconfig}\ + ''; + }; + }; + }; +} + diff --git a/cluster/machines/modules/kube-dataplane.nix b/cluster/machines/modules/kube-dataplane.nix new file mode 100644 index 00000000..f38ad84d --- /dev/null +++ b/cluster/machines/modules/kube-dataplane.nix @@ -0,0 +1,96 @@ +{ config, pkgs, lib, machines, ... }: + +with lib; + +let + # Pin for kubelet and proxy. + k8spkgs = import (fetchGit { + # Now at 1.16.5 + name = "nixos-unstable-2020-01-22"; + url = https://github.com/nixos/nixpkgs-channels/; + rev = "a96ed5d70427bdc2fbb9e805784e1b9621157a98"; + }) {}; + + cfg = config.hscloud.kube.data; + + # All control plane nodes. + controlNodes = let + list = mapAttrsToList (_: v: v) machines; + filtered = filter (m: (m.config ? hscloud.kube.control) && (m.config.hscloud.kube.control.enable)) list; + sorted = sort (a: b: a.config.hscloud.base.fqdn < b.config.hscloud.base.fqdn) filtered; + in sorted; + + fqdn = config.hscloud.base.fqdn; + + pki = config.hscloud.kube.pki; + +in { + options.hscloud.kube.data = { + enable = mkEnableOption "kubernetes data plane"; + podNet = mkOption { + type = types.str; + description = "Subnet in which this node will run pods. Must be exclusive with podNets of other nodes."; + }; + }; + + # Disable kubelet service and bring in our own override. + # Also nuke flannel from the orbit. + disabledModules = [ + "services/cluster/kubernetes/kubelet.nix" + "services/cluster/kubernetes/flannel.nix" + ]; + + imports = [ + ./kubelet.nix + ./kube-common.nix + ]; + + + config = mkIf cfg.enable { + # If we're not running the control plane, render a hostsfile that points at + # all other control plane nodes. Otherwise, the control plane module will + # make this hostsfile contain the node itself. + networking.extraHosts = mkIf (!config.hscloud.kube.control.enable) (concatStringsSep "\n" (map + (n: '' + ${n.config.hscloud.base.mgmtIf} ${n.config.hscloud.base.fqdn} + '') + controlNodes)); + + # this seems to depend on flannel + # TODO(q3k): file issue + systemd.services.kubelet-online = { + script = pkgs.lib.mkForce "sleep 1"; + }; + + services.kubernetes = { + # The kubelet wants to mkfs.ext4 when mounting pvcs. + path = [ pkgs.e2fsprogs ]; + + proxy = { + enable = true; + kubeconfig = pki.kube.proxy.config; + extraOpts = '' + --hostname-override=${fqdn}\ + --proxy-mode=iptables + ''; + }; + + kubelet = { + enable = true; + unschedulable = false; + hostname = fqdn; + tlsCertFile = pki.kube.kubelet.cert; + tlsKeyFile = pki.kube.kubelet.key; + clientCaFile = pki.kube.kubelet.ca; + nodeIp = config.hscloud.base.ipAddr; + networkPlugin = "cni"; + clusterDns = "10.10.12.254"; + kubeconfig = pki.kube.kubelet.config; + extraOpts = '' + --read-only-port=0 + ''; + package = config.hscloud.kube.packageKubelet; + }; + }; + }; +} diff --git a/cluster/nix/modules/kubelet.nix b/cluster/machines/modules/kubelet.nix similarity index 100% rename from cluster/nix/modules/kubelet.nix rename to cluster/machines/modules/kubelet.nix diff --git a/cluster/nix/defs-cluster-k0.nix b/cluster/nix/defs-cluster-k0.nix deleted file mode 100644 index cd0fcacf..00000000 --- a/cluster/nix/defs-cluster-k0.nix +++ /dev/null @@ -1,130 +0,0 @@ -machineName: - -let - machines = (import ./defs-machines.nix); -in rec { - domain = ".hswaw.net"; - k8sapi = "k0.hswaw.net"; - acmeEmail = "q3k@hackerspace.pl"; - - fqdn = machineName + domain; - machine = (builtins.head (builtins.filter (n: n.fqdn == fqdn) machines)); - otherMachines = (builtins.filter (n: n.fqdn != fqdn) machines); - machinesByName = builtins.listToAttrs (map (m: { name = m.name; value = m; }) machines); - inherit machines; - - # Ceph cluster to run systemd modules for. - cephCluster = { - fsid = "74592dc2-31b7-4dbe-88cf-40459dfeb354"; - name = "k0"; - - # Map from node name to mon configuration (currently always empty). - # - # Each mon also runs a mgr daemon (which is a leader-elected kitchen - # sink^W^Whousekeeping service hanging off of a mon cluster). - # - # Consult the Ceph documentation - # (https://docs.ceph.com/en/pacific/rados/operations/add-or-rm-mons/) on - # how to actually carry out mon-related maintenance operations. - mons = { - bc01n02 = {}; - }; - - # Map from node name to list of disks on node. - # Each disk is: - # id: OSD numerical ID, eg. 0 for osd.0. You get this after running - # ceph-lvm volume create. - # path: Filesystem path for disk backing drive. This should be something - # in /dev/disk/by-id for safety. This is only used to gate OSD - # daemon startup by disk presence. - # uuid: OSD uuid/fsid. You get this after running ceph-lvm volume create. - # - # Quick guide how to set up a new OSD (but please refer to the Ceph manual): - # 0. Copy /var/lib/ceph/bootstrap-osd/k0.keyring from another OSD node to - # the new OSD node, if this is a new node. Remember to chown ceph:ceph - # chmod 0600! - # 1. nix-shell -p ceph lvm2 cryptsetup (if on a node that's not yet an OSD) - # 2. ceph-volume --cluster k0 lvm create --bluestore --data /dev/sdX --no-systemd --dmcrypt - # 3. The above will mount a tmpfs on /var/lib/ceph/osd/k0-X. X is the new - # osd id. A file named fsid inside this directory is the new OSD fsid/uuid. - # 4. Configure osds below with the above information, redeploy node from nix. - osds = { - dcr01s22 = [ - { id = 0; path = "/dev/disk/by-id/scsi-35000c500850293e3"; uuid = "314034c5-474c-4d0d-ba41-36a881c52560";} - { id = 1; path = "/dev/disk/by-id/scsi-35000c500850312cb"; uuid = "a7f1baa0-0fc3-4ab1-9895-67abdc29de03";} - { id = 2; path = "/dev/disk/by-id/scsi-35000c5008508e3ef"; uuid = "11ac8316-6a87-48a7-a0c7-74c3cef6c2fa";} - { id = 3; path = "/dev/disk/by-id/scsi-35000c5008508e23f"; uuid = "c6b838d1-b08c-4788-936c-293041ed2d4d";} - ]; - dcr01s24 = [ - { id = 4; path = "/dev/disk/by-id/scsi-35000c5008509199b"; uuid = "a2b4663d-bd8f-49b3-b0b0-195c56ba252f";} - { id = 5; path = "/dev/disk/by-id/scsi-35000c50085046abf"; uuid = "a2242989-ccce-4367-8813-519b64b5afdb";} - { id = 6; path = "/dev/disk/by-id/scsi-35000c5008502929b"; uuid = "7deac89c-22dd-4c2b-b3cc-43ff7f990fd6";} - { id = 7; path = "/dev/disk/by-id/scsi-35000c5008502a323"; uuid = "e305ebb3-9cac-44d2-9f1d-bbb72c8ab51f";} - ]; - }; - }; - - pki = rec { - make = (radix: name: rec { - ca = ./../certs + "/ca-${radix}.crt"; - cert = ./../certs + "/${radix}-${name}.cert"; - key = ./../secrets/plain + "/${radix}-${name}.key"; - - json = (builtins.toJSON { - ca = (builtins.toString ca); - cert = (builtins.toString cert); - key = (builtins.toString key); - }); - }); - - etcdPeer = (make "etcdpeer" fqdn); - - etcd = { - server = (make "etcd" fqdn); - kube = (make "etcd" "kube"); - }; - - makeKube = (name: (make "kube" name) // { - config = { - server = "https://${k8sapi}:${toString ports.k8sAPIServerSecure}"; - certFile = (make "kube" name).cert; - keyFile = (make "kube" name).key; - }; - }); - - kube = rec { - ca = apiserver.ca; - - # Used to identify apiserver. - apiserver = (makeKube "apiserver"); - - # Used to identify controller-manager. - controllermanager = (makeKube "controllermanager"); - - # Used to identify scheduler. - scheduler = (makeKube "scheduler"); - - # Used to identify kube-proxy. - proxy = (makeKube "proxy"); - - # Used to identify kubelet. - kubelet = (makeKube "kubelet-${fqdn}"); - - # Used to encrypt service accounts. - serviceaccounts = (makeKube "serviceaccounts"); - }; - - kubeFront = { - apiserver = (make "kubefront" "apiserver"); - }; - }; - - ports = { - k8sAPIServerPlain = 4000; - k8sAPIServerSecure = 4001; - k8sControllerManagerPlain = 0; # would be 4002; do not serve plain http - k8sControllerManagerSecure = 4003; - k8sSchedulerPlain = 0; # would be 4004; do not serve plain http - k8sSchedulerSecure = 4005; - }; -} diff --git a/cluster/nix/defs-machines.nix b/cluster/nix/defs-machines.nix deleted file mode 100644 index da9150c1..00000000 --- a/cluster/nix/defs-machines.nix +++ /dev/null @@ -1,58 +0,0 @@ -[ - rec { - name = "bc01n01"; - threads = 16; - fqdn = "${name}.hswaw.net"; - ipAddr = "185.236.240.35"; - ipAddrBits = 28; - gw = "185.236.240.33"; - podNet = "10.10.16.0/24"; - diskBoot = "/dev/disk/by-id/scsi-360024e8078a9060023b1043107388af5"; - fsRoot = "/dev/disk/by-uuid/518ecac1-00ea-4ef0-9418-9eca6ce6d918"; - mgmtIf = "eno1"; - stateVersion = "18.09"; - } - rec { - name = "bc01n02"; - threads = 16; - fqdn = "${name}.hswaw.net"; - ipAddr = "185.236.240.36"; - ipAddrBits = 28; - gw = "185.236.240.33"; - podNet = "10.10.17.0/24"; - diskBoot = "/dev/disk/by-id/scsi-360024e8078b0250023b10f8706d3c99e"; - fsRoot = "/dev/disk/by-uuid/2d45c87b-029b-463e-a7cb-afd5a3089327"; - mgmtIf = "eno1"; - stateVersion = "18.09"; - } - # Tombstone - bc01n03 suffered from hardware failure on 2021/01/10. - # rec { - # name = "bc01n03"; - # } - rec { - name = "dcr01s22"; - threads = 48; - fqdn = "${name}.hswaw.net"; - ipAddr = "185.236.240.39"; - ipAddrBits = 28; - gw = "185.236.240.33"; - podNet = "10.10.19.0/24"; - diskBoot = "/dev/disk/by-id/ata-Samsung_SSD_860_EVO_250GB_S3YJNX1M604518E"; - fsRoot = "/dev/disk/by-uuid/b4149083-49fe-4951-a143-aff4cedaf33a"; - mgmtIf = "enp130s0f0"; - stateVersion = "19.09"; - } - rec { - name = "dcr01s24"; - threads = 48; - fqdn = "${name}.hswaw.net"; - ipAddr = "185.236.240.40"; - ipAddrBits = 28; - gw = "185.236.240.33"; - podNet = "10.10.20.0/24"; - diskBoot = "/dev/disk/by-id/ata-Samsung_SSD_860_EVO_250GB_S3YJNF0M717009H"; - fsRoot = "/dev/disk/by-uuid/fc5c6456-5bbd-4b9e-a93e-7f9073ffe09a"; - mgmtIf = "enp130s0f0"; - stateVersion = "19.09"; - } -] diff --git a/cluster/nix/modules/base.nix b/cluster/nix/modules/base.nix deleted file mode 100644 index 29f2072a..00000000 --- a/cluster/nix/modules/base.nix +++ /dev/null @@ -1,77 +0,0 @@ -{ config, pkgs, lib, ... }: - -with (( import ../defs-cluster-k0.nix ) config.networking.hostName); - -rec { - system.stateVersion = machine.stateVersion; - nix.maxJobs = machine.threads; - - boot.loader.grub.enable = true; - boot.loader.grub.version = 2; - boot.loader.grub.device = machine.diskBoot; - - fileSystems."/" = - { device = machine.fsRoot; - fsType = "ext4"; - }; - swapDevices = [ ]; - - boot.kernelPackages = pkgs.linuxPackages_latest; - boot.kernelParams = [ "boot.shell_on_fail" ]; - boot.kernel.sysctl."net.ipv4.conf.all.rp_filter" = "0"; - boot.kernel.sysctl."net.ipv4.conf.default.rp_filter" = "0"; - boot.initrd.availableKernelModules = [ "uhci_hcd" "ehci_pci" "megaraid_sas" "usb_storage" "usbhid" "sd_mod" "sr_mod" ]; - boot.kernelModules = [ "kvm-intel" ]; - boot.extraModulePackages = []; - hardware.enableRedistributableFirmware = true; - - time.timeZone = "Europe/Warsaw"; - - environment.systemPackages = with pkgs; [ - wget vim htop tcpdump - rxvt_unicode.terminfo - ]; - programs.mtr.enable = true; - - networking.useDHCP = false; - networking.interfaces."${machine.mgmtIf}" = { - ipv4.addresses = [ - { - address = machine.ipAddr; - prefixLength = machine.ipAddrBits; - } - ]; - }; - networking.defaultGateway = machine.gw; - networking.nameservers = ["185.236.240.1"]; - - # Instead of using nixpkgs from the root/nixos channel, use pkgs pin from this file. - nix.nixPath = [ "nixpkgs=${pkgs.path}" "nixos-config=/etc/nixos/configuration.nix" ]; - - # Otherwise fetchGit nixpkgs pin fails. - systemd.services.nixos-upgrade.path = [ pkgs.git ]; - - # Use Chrony instead of systemd-timesyncd - services.chrony.enable = true; - - # Symlink lvm into /sbin/lvm on activation. This is needed by Rook OSD - # instances running on Kubernetes. - # See: https://github.com/rook/rook/commit/f3c4975e353e3ce3599c958ec6d2cae8ee8f6f61 - system.activationScripts.sbinlvm = - '' - mkdir -m 0755 -p /sbin - ln -sfn ${pkgs.lvm2.bin}/bin/lvm /sbin/lvm - ''; - - # Enable the OpenSSH daemon. - services.openssh.enable = true; - users.users.root.openssh.authorizedKeys.keys = [ - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDD4VJXAXEHEXZk2dxNwehneuJcEGkfXG/U7z4fO79vDVIENdedtXQUyLyhZJc5RTEfHhQj66FwIqzl7mzBHd9x9PuDp6QAYXrkVNMj48s6JXqZqBvF6H/weRqFMf4a2TZv+hG8D0kpvmLheCwWAVRls7Jofnp/My+yDd57GMdsbG/yFEf6WPMiOnA7hxdSJSVihCsCSw2p8PD4GhBe8CVt7xIuinhutjm9zYBjV78NT8acjDUfJh0B1ODTjs7nuW1CC4jybSe2j/OU3Yczj4AxRxBNWuFxUq+jBo9BfpbKLh+Tt7re+zBkaicM77KM/oV6943JJxgHNBBOsv9scZE7 q3k@amnesia" - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIG599UildOrAq+LIOQjKqtGMwjgjIxozI1jtQQRKHtCP q3k@mimeomia" - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDQb3YQoiYFZLKwvHYKbu1bMqzNeDCAszQhAe1+QI5SLDOotclyY/vFmOReZOsmyMFl71G2d7d+FbYNusUnNNjTxRYQ021tVc+RkMdLJaORRURmQfEFEKbai6QSFTwErXzuoIzyEPK0lbsQuGgqT9WaVnRzHJ2Q/4+qQbxAS34PuR5NqEkmn4G6LMo3OyJ5mwPkCj9lsqz4BcxRaMWFO3mNcwGDfSW+sqgc3E8N6LKrTpZq3ke7xacpQmcG5DU9VO+2QVPdltl9jWbs3gXjmF92YRNOuKPVfAOZBBsp8JOznfx8s9wDgs7RwPmDpjIAJEyoABqW5hlXfqRbTnfnMvuR informatic@InformaticPC" - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDGkMgEVwQM8yeuFUYL2TwlJIq9yUNBmHnwce46zeL2PK2CkMz7sxT/om7sp/K5XDiqeD05Nioe+Dr3drP6B8uI33S5NgxPIfaqQsRS+CBEgk6cqFlcdlKETU/DT+/WsdoO173n7mgGeafPInEuQuGDUID0Fl099kIxtqfAhdeZFMM6/szAZEZsElLJ8K6dp1Ni/jmnXCZhjivZH3AZUlnqrmtDG7FY1bgcOfDXAal45LItughGPtrdiigXe9DK2fW3+9DBZZduh5DMJTNlphAZ+nfSrbyHVKUg6WsgMSprur4KdU47q1QwzqqvEj75JcdP1jOWoZi4F6VJDte9Wb9lhD1jGgjxY9O6Gs4CH35bx15W7CN9hgNa0C8NbPJe/fZYIeMZmJ1m7O2xmnYwP8j+t7RNJWu7Pa3Em4mOEXvhBF07Zfq+Ye/4SluoRgADy5eII2x5fFo5EBhInxK0/X8wF6XZvysalVifoCh7T4Edejoi91oAxFgYAxbboXGlod0eEHIi2hla8SM9+IBHOChmgawKBYp2kzAJyAmHNBF+Pah9G4arVCj/axp/SJZDZbJQoI7UT/fJzEtvlb5RWrHXRq+y6IvjpUq4pzpDWW04+9UMqEEXRmhWOakHfEVM9rN8h3aJBflLUBBnh0Z/hVsKNh8bCRHaKtah8TrD9i+wMw== patryk.jakuszew@gmail.com" - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQC33naG1ptCvUcRWX9cj9wXM1nW1lyQC4SvMJzWlr9aMD96O8hQ2JMkuIUgUJvorAY02QRplQ2BuoVoVkdkzwjMyi1bL3OdgcKo7Z1yByClGTTocqNJYY0lcUb6EJH8+6e6F9ydrQlSxNzL1uCaA7phZr+yPcmAmWbSfioXn98yXNkE0emHxzJv/nypJY56sDCMC2IXDRd8L2goDtPwgPEW7bWfAQdIFMJ75xOidZOTxJ8eqyXLw/kxY5UlyX66jdoYz1sE5XUHuoQl1AOG9UdlMo0aMhUvP4pX5l7r7EnA9OttKMFB3oWqkVK/R6ynZ52YNOU5BZ9V+Ppaj34W0xNu+p0mbHcCtXYCTrf/OU0hcZDbDaNTjs6Vtcm2wYw9iAKX7Tex+eOMwUwlrlcyPNRV5BTot7lGNYfauHCSIuWJKN4NhCLR/NtVNh4/94eKkPTwJsY6XqDcS7q49wPAs4DAH7BJgsbHPOqygVHrY0YYEfz3Pj0HTxJHQMCP/hQX4fXEGt0BjgoVJbXPAQtPyeg0JuxiUg+b4CgVVfQ6R060MlM1BZzhmh+FY5MJH6nJppS0aHYCvSg8Z68NUlCPKy0jpcyfuAIWQWwSGG1O010WShQG2ELsvNdg5/4HVdCGNl5mmoom6JOd72FOZyQlHDFfeQUQRn9HOeCq/c51rK99SQ== bartek@IHM" - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAICTR292kx/2CNuWYIsZ6gykQ036aBGrmheIuZa6S1D2x implr@thonk" - ]; - -} diff --git a/cluster/nix/modules/kubernetes.nix b/cluster/nix/modules/kubernetes.nix deleted file mode 100644 index df82effc..00000000 --- a/cluster/nix/modules/kubernetes.nix +++ /dev/null @@ -1,195 +0,0 @@ -{ config, pkgs, lib, ... }: - -with (( import ../defs-cluster-k0.nix ) config.networking.hostName); -let - # Pin for k8s packages. This is so that upagrading the system will not upgrade the k8s control or data planes. - k8spkgs = import (fetchGit { - # Now at 1.16.5 - name = "nixos-unstable-2020-01-22"; - url = https://github.com/nixos/nixpkgs-channels/; - rev = "a96ed5d70427bdc2fbb9e805784e1b9621157a98"; - }) {}; - # Pin for kubelet - k8spkgsKubelet = import (fetchGit { - # Now at 1.16.5 - name = "nixos-unstable-2020-01-22"; - url = https://github.com/nixos/nixpkgs-channels/; - rev = "a96ed5d70427bdc2fbb9e805784e1b9621157a98"; - }) {}; - -in rec { - # Disable kubelet service and bring in our own override. - # Also nuke flannel from the orbit. - disabledModules = [ - "services/cluster/kubernetes/kubelet.nix" - "services/cluster/kubernetes/flannel.nix" - ]; - - imports = - [ - ./kubelet.nix - ]; - - networking.firewall.enable = false; - - # Point k8s apiserver address at ourselves, as every machine runs an apiserver with this cert name. - networking.extraHosts = '' - 127.0.0.1 ${k8sapi} - ''; - - services.etcd = rec { - enable = true; - name = fqdn; - listenClientUrls = ["https://0.0.0.0:2379"]; - advertiseClientUrls = ["https://${fqdn}:2379"]; - listenPeerUrls = ["https://0.0.0.0:2380"]; - initialAdvertisePeerUrls = ["https://${fqdn}:2380"]; - initialCluster = (map (n: "${n.fqdn}=https://${n.fqdn}:2380") machines); - initialClusterState = "existing"; - - clientCertAuth = true; - trustedCaFile = pki.etcd.server.ca; - certFile = pki.etcd.server.cert; - keyFile = pki.etcd.server.key; - - peerClientCertAuth = true; - peerTrustedCaFile = pki.etcdPeer.ca; - peerCertFile = pki.etcdPeer.cert; - peerKeyFile = pki.etcdPeer.key; - - extraConf = { - PEER_CLIENT_CERT_AUTH = "true"; - }; - }; - - services.kubernetes = { - # Pin to specific k8s package. - package = k8spkgs.kubernetes; - roles = []; # We do not use any nixpkgs predefined roles for k8s. Instead, - # we enable k8s components manually. - - caFile = pki.kube.apiserver.ca; - clusterCidr = "10.10.16.0/20"; - - path = [ pkgs.e2fsprogs ]; # kubelet wants to mkfs.ext4 when mounting pvcs - - addons.dns.enable = false; - - apiserver = rec { - enable = true; - insecurePort = ports.k8sAPIServerPlain; - securePort = ports.k8sAPIServerSecure; - advertiseAddress = "${machine.ipAddr}"; - - etcd = { - # https://github.com/kubernetes/kubernetes/issues/72102 - servers = (map (n: "https://${n.fqdn}:2379") ( [ machine ] )); - caFile = pki.etcd.kube.ca; - keyFile = pki.etcd.kube.key; - certFile = pki.etcd.kube.cert; - }; - - tlsCertFile = pki.kube.apiserver.cert; - tlsKeyFile = pki.kube.apiserver.key; - - clientCaFile = pki.kube.apiserver.ca; - - kubeletHttps = true; - kubeletClientCaFile = pki.kube.apiserver.ca; - kubeletClientCertFile = pki.kube.apiserver.cert; - kubeletClientKeyFile = pki.kube.apiserver.key; - - serviceAccountKeyFile = pki.kube.serviceaccounts.key; - - allowPrivileged = true; - serviceClusterIpRange = "10.10.12.0/24"; - runtimeConfig = "api/all,authentication.k8s.io/v1beta1"; - authorizationMode = ["Node" "RBAC"]; - enableAdmissionPlugins = ["NamespaceLifecycle" "NodeRestriction" "LimitRanger" "ServiceAccount" "DefaultStorageClass" "ResourceQuota" "PodSecurityPolicy"]; - extraOpts = '' - --apiserver-count=5 \ - --proxy-client-cert-file=${pki.kubeFront.apiserver.cert} \ - --proxy-client-key-file=${pki.kubeFront.apiserver.key} \ - --requestheader-allowed-names= \ - --requestheader-client-ca-file=${pki.kubeFront.apiserver.ca} \ - --requestheader-extra-headers-prefix=X-Remote-Extra- \ - --requestheader-group-headers=X-Remote-Group \ - --requestheader-username-headers=X-Remote-User \ - -v=5 - ''; - }; - - controllerManager = let - top = config.services.kubernetes; - kubeconfig = top.lib.mkKubeConfig "controller-manager" pki.kube.controllermanager.config; - in { - enable = true; - bindAddress = "0.0.0.0"; - insecurePort = ports.k8sControllerManagerPlain; - leaderElect = true; - serviceAccountKeyFile = pki.kube.serviceaccounts.key; - rootCaFile = pki.kube.ca; - extraOpts = '' - --service-cluster-ip-range=10.10.12.0/24 \ - --use-service-account-credentials=true \ - --secure-port=${toString ports.k8sControllerManagerSecure}\ - --authentication-kubeconfig=${kubeconfig}\ - --authorization-kubeconfig=${kubeconfig}\ - ''; - kubeconfig = pki.kube.controllermanager.config; - }; - - scheduler = let - top = config.services.kubernetes; - kubeconfig = top.lib.mkKubeConfig "scheduler" pki.kube.controllermanager.config; - in { - enable = true; - address = "0.0.0.0"; - port = ports.k8sSchedulerPlain; - leaderElect = true; - kubeconfig = pki.kube.scheduler.config; - extraOpts = '' - --secure-port=${toString ports.k8sSchedulerSecure}\ - --authentication-kubeconfig=${kubeconfig}\ - --authorization-kubeconfig=${kubeconfig}\ - ''; - }; - - proxy = { - enable = true; - kubeconfig = pki.kube.proxy.config; - extraOpts = '' - --hostname-override=${fqdn}\ - --proxy-mode=iptables - ''; - }; - - kubelet = { - enable = true; - unschedulable = false; - hostname = fqdn; - tlsCertFile = pki.kube.kubelet.cert; - tlsKeyFile = pki.kube.kubelet.key; - clientCaFile = pki.kube.kubelet.ca; - nodeIp = machine.ipAddr; - networkPlugin = "cni"; - clusterDns = "10.10.12.254"; - kubeconfig = pki.kube.kubelet.config; - extraOpts = '' - --read-only-port=0 - ''; - package = k8spkgsKubelet.kubernetes; - }; - - }; - - # https://github.com/NixOS/nixpkgs/issues/60687 - systemd.services.kube-control-plane-online = { - preStart = pkgs.lib.mkForce ""; - }; - # this seems to depend on flannel - # TODO(q3k): file issue - systemd.services.kubelet-online = { - script = pkgs.lib.mkForce "sleep 1"; - }; -} diff --git a/ops/machines.nix b/ops/machines.nix index 7dd12321..208279ec 100644 --- a/ops/machines.nix +++ b/ops/machines.nix @@ -60,23 +60,6 @@ let sha256 = "0p7df7yzi35kblxr5ks0rxxp9cfh269g88xpj60sdhdjvfnn6cp7"; }) {}; - # Stopgap measure to import //cluster/nix machine definitions into new - # //ops/ infrastructure. - # - # TODO(q3k): inject defs-cluster-k0.nix / defs-machines.nix content via - # nixos options instead of having module definitions loading it themselves, - # deduplicate list of machines below with defs-machines.nix somehow. - clusterMachineConfig = name: [({ config, pkgs, ...}: { - # The hostname is used by //cluster/nix machinery to load the appropriate - # config from defs-machines into defs-cluster-k0. - networking.hostName = name; - imports = [ - ../cluster/nix/modules/base.nix - ../cluster/nix/modules/kubernetes.nix - ../cluster/nix/modules/ceph.nix - ]; - })]; - # mkMachine builds NixOS modules into a NixOS derivation. # It: # 1) injects passthru.hscloud.provision which deploys that configuration @@ -85,7 +68,7 @@ let # of the hscloud readTree object. It will contain whatever nixpkgs # checkout this file has been invoked with, ie. will not be 'mixed in' # with the pkgs argument. - mkMachine = pkgs: paths: pkgs.nixos ({ config, pkgs, ... }: { + mkMachine = machines: pkgs: paths: pkgs.nixos ({ config, pkgs, ... }: { imports = paths; config = let @@ -139,20 +122,32 @@ let # TODO(q3k): this should be named hscloud, but that seems to not work. Debug and rename. _module.args.workspace = hscloud; + _module.args.machines = machines; }; }); -in { - "bc01n01.hswaw.net" = mkMachine nixpkgsCluster (clusterMachineConfig "bc01n01"); - "bc01n02.hswaw.net" = mkMachine nixpkgsCluster (clusterMachineConfig "bc01n02"); - "dcr01s22.hswaw.net" = mkMachine nixpkgsCluster (clusterMachineConfig "dcr01s22"); - "dcr01s24.hswaw.net" = mkMachine nixpkgsCluster (clusterMachineConfig "dcr01s24"); - "edge01.waw.bgp.wtf" = mkMachine nixpkgsBgpwtf [ - ../bgpwtf/machines/edge01.waw.bgp.wtf.nix - ../bgpwtf/machines/edge01.waw.bgp.wtf-hardware.nix + mkClusterMachine = machines: path: mkMachine machines nixpkgsCluster [ + ../cluster/machines/modules/base.nix + ../cluster/machines/modules/kube-controlplane.nix + ../cluster/machines/modules/kube-dataplane.nix + ../cluster/machines/modules/ceph.nix + path ]; - "customs.hackerspace.pl" = mkMachine pkgs [ - ../hswaw/machines/customs.hackerspace.pl/configuration.nix - ]; -} + machines = self: { + "bc01n01.hswaw.net" = mkClusterMachine self ../cluster/machines/bc01n01.hswaw.net.nix; + "bc01n02.hswaw.net" = mkClusterMachine self ../cluster/machines/bc01n02.hswaw.net.nix; + "dcr01s22.hswaw.net" = mkClusterMachine self ../cluster/machines/dcr01s22.hswaw.net.nix; + "dcr01s24.hswaw.net" = mkClusterMachine self ../cluster/machines/dcr01s24.hswaw.net.nix; + + "edge01.waw.bgp.wtf" = mkMachine self nixpkgsBgpwtf [ + ../bgpwtf/machines/edge01.waw.bgp.wtf.nix + ../bgpwtf/machines/edge01.waw.bgp.wtf-hardware.nix + ]; + + "customs.hackerspace.pl" = mkMachine self pkgs [ + ../hswaw/machines/customs.hackerspace.pl/configuration.nix + ]; + }; + +in pkgs.lib.fix machines