forked from hswaw/hscloud
ops, cluster: consolidate NixOS provisioning
This moves the diff-and-activate logic from cluster/nix/provision.nix into ops/{provision,machines}.nix that can be used for both cluster machines and bgpwtf machines. The provisioning scripts now live per-NixOS-config, and anything under ops.machines.$fqdn now has a .passthru.hscloud.provision derivation which is that script. When ran, it will attempt to deploy onto the target machine. There's also a top-level tool at `ops.provision` which builds all configurations / machines and can be called with the machine name/fqdn to call the corresponding provisioner script. clustercfg is changed to use the new provisioning logic. Change-Id: I258abce9e8e3db42af35af102f32ab7963046353
This commit is contained in:
parent
eed9afe210
commit
b3c6770f8d
5 changed files with 189 additions and 86 deletions
|
@ -206,10 +206,12 @@ def nodestrap(args, nocerts=False):
|
||||||
ca_admitomatic = ca.CA(ss, certs_root, 'admitomatic', 'admitomatic webhook CA')
|
ca_admitomatic = ca.CA(ss, certs_root, 'admitomatic', 'admitomatic webhook CA')
|
||||||
ca_admitomatic.make_cert('admitomatic-webhook', ou='Admitomatic Webhook', hosts=['admitomatic.admitomatic.svc'])
|
ca_admitomatic.make_cert('admitomatic-webhook', ou='Admitomatic Webhook', hosts=['admitomatic.admitomatic.svc'])
|
||||||
|
|
||||||
subprocess.check_call(["nix", "run",
|
toplevel = subprocess.check_output([
|
||||||
"-f", local_root,
|
"nix-build",
|
||||||
"cluster.nix.provision",
|
local_root,
|
||||||
"-c", "provision-{}".format(fqdn.split('.')[0])])
|
"-A", "ops.machines.\"" + fqdn + "\".config.passthru.hscloud.provision",
|
||||||
|
]).decode().strip()
|
||||||
|
subprocess.check_call([toplevel])
|
||||||
|
|
||||||
|
|
||||||
def usage():
|
def usage():
|
||||||
|
|
|
@ -1,49 +0,0 @@
|
||||||
{ hscloud, pkgs, ... }:
|
|
||||||
|
|
||||||
with builtins;
|
|
||||||
|
|
||||||
let
|
|
||||||
machines = (import ./defs-machines.nix);
|
|
||||||
configurations = builtins.listToAttrs (map (machine: {
|
|
||||||
name = machine.fqdn;
|
|
||||||
value = pkgs.nixos ({ config, pkgs, ... }: {
|
|
||||||
networking.hostName = machine.name;
|
|
||||||
imports = [
|
|
||||||
./modules/base.nix
|
|
||||||
./modules/kubernetes.nix
|
|
||||||
];
|
|
||||||
});
|
|
||||||
}) machines);
|
|
||||||
|
|
||||||
scriptForMachine = machine: let
|
|
||||||
configuration = configurations."${machine.fqdn}";
|
|
||||||
in ''
|
|
||||||
set -e
|
|
||||||
remote=root@${machine.fqdn}
|
|
||||||
echo "Configuration for ${machine.fqdn} is ${configuration.toplevel}"
|
|
||||||
nix copy --no-check-sigs -s --to ssh://$remote ${configuration.toplevel}
|
|
||||||
echo "/etc/systemd/system diff:"
|
|
||||||
ssh $remote diff -ur /var/run/current-system/etc/systemd/system ${configuration.toplevel}/etc/systemd/system || true
|
|
||||||
echo ""
|
|
||||||
echo ""
|
|
||||||
ssh $remote ${configuration.toplevel}/bin/switch-to-configuration dry-activate
|
|
||||||
read -p "Do you want to switch to this configuration? " -n 1 -r
|
|
||||||
echo
|
|
||||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
|
||||||
ssh $remote ${configuration.toplevel}/bin/switch-to-configuration switch
|
|
||||||
fi
|
|
||||||
'';
|
|
||||||
|
|
||||||
provisioners = (map (machine:
|
|
||||||
pkgs.writeScriptBin "provision-${machine.name}" (scriptForMachine machine)
|
|
||||||
) machines);
|
|
||||||
|
|
||||||
provision = pkgs.writeScriptBin "provision" (
|
|
||||||
''
|
|
||||||
echo "Available provisioniers:"
|
|
||||||
'' + (concatStringsSep "\n" (map (machine: "echo ' provision-${machine.name}'") machines)));
|
|
||||||
in
|
|
||||||
pkgs.symlinkJoin {
|
|
||||||
name = "provision";
|
|
||||||
paths = [ provision ] ++ provisioners;
|
|
||||||
}
|
|
23
ops/README.md
Normal file
23
ops/README.md
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
Operations
|
||||||
|
===
|
||||||
|
|
||||||
|
Deploying NixOS machines
|
||||||
|
---
|
||||||
|
|
||||||
|
Machine configurations are in `ops/machines.nix`.
|
||||||
|
|
||||||
|
Wrapper script to show all available machines and provision a single machine:
|
||||||
|
|
||||||
|
$ $(nix-build -A ops.provision)
|
||||||
|
Available machines:
|
||||||
|
- bc01n01.hswaw.net
|
||||||
|
- bc01n02.hswaw.net
|
||||||
|
- dcr01s22.hswaw.net
|
||||||
|
- dcr01s24.hswaw.net
|
||||||
|
- edge01.waw.bgp.wtf
|
||||||
|
|
||||||
|
$ $(nix-build -A ops.provision) edge01.waw.bgp.wtf
|
||||||
|
|
||||||
|
This can be slow, as it evaluates/builds all machines' configs. If you just want to deploy one machine and possible iterate faster:
|
||||||
|
|
||||||
|
$ $(nix-build -A 'ops.machines."edge01.waw.bgp.wtf".config.passthru.hscloud.provision')
|
119
ops/machines.nix
119
ops/machines.nix
|
@ -3,30 +3,41 @@
|
||||||
# This allows to have a common attrset of machines that can be deployed
|
# This allows to have a common attrset of machines that can be deployed
|
||||||
# in the same way.
|
# in the same way.
|
||||||
#
|
#
|
||||||
# Currently building/deployment is still done in a half-assed way:
|
# For information about building/deploying machines see //ops/README.md.
|
||||||
#
|
|
||||||
# machine=edge01.waw.bgp.wtf
|
|
||||||
# d=$(nix-build -A 'ops.machines."'$machine'"'.toplevel)
|
|
||||||
#
|
|
||||||
# To then deploy derivation $d on $machine:
|
|
||||||
#
|
|
||||||
# nix-copy-closure --to root@$machine $d
|
|
||||||
# ssh root@$machine $d/bin/switch-to-configuration dry-activate
|
|
||||||
# ssh root@$machine $d/bin/switch-to-configuration test
|
|
||||||
# ssh root@$machine nix-env -p /nix/var/nix/profiles/system --set $d
|
|
||||||
# ssh root@$machine $d/bin/switch-to-configuration boot
|
|
||||||
#
|
|
||||||
# TODO(q3k): merge this with //cluster/clustercfg - this should be unified!
|
|
||||||
|
|
||||||
{ hscloud, pkgs, ... }:
|
{ hscloud, pkgs, ... }:
|
||||||
|
|
||||||
let
|
let
|
||||||
|
# nixpkgs for cluster machines (.hswaw.net). Currently pinned to an old
|
||||||
|
# nixpkgs because NixOS modules for kubernetes changed enough that it's not
|
||||||
|
# super easy to use them as is.
|
||||||
|
#
|
||||||
|
# TODO(q3k): fix this: use an old nixpkgs for Kube modules while using
|
||||||
|
# hscloud nixpkgs for everything else.
|
||||||
|
nixpkgsCluster = import (pkgs.fetchFromGitHub {
|
||||||
|
owner = "nixos";
|
||||||
|
repo = "nixpkgs-channels";
|
||||||
|
rev = "44ad80ab1036c5cc83ada4bfa451dac9939f2a10";
|
||||||
|
sha256 = "1b61nzvy0d46cspy07szkc0rggacxiqg9v1py27pkqpj7rvawfsk";
|
||||||
|
}) {};
|
||||||
|
|
||||||
|
# edge01 still lives on an old nixpkgs checkout.
|
||||||
|
#
|
||||||
|
# TODO(b/3): unpin and deploy.
|
||||||
|
nixpkgsBgpwtf = import (pkgs.fetchFromGitHub {
|
||||||
|
owner = "nixos";
|
||||||
|
repo = "nixpkgs-channels";
|
||||||
|
rev = "c59ea8b8a0e7f927e7291c14ea6cd1bd3a16ff38";
|
||||||
|
sha256 = "1ak7jqx94fjhc68xh1lh35kh3w3ndbadprrb762qgvcfb8351x8v";
|
||||||
|
}) {};
|
||||||
|
|
||||||
# Stopgap measure to import //cluster/nix machine definitions into new
|
# Stopgap measure to import //cluster/nix machine definitions into new
|
||||||
# //ops/machines infrastructure.
|
# //ops/ infrastructure.
|
||||||
|
#
|
||||||
# TODO(q3k): inject defs-cluster-k0.nix / defs-machines.nix content via
|
# TODO(q3k): inject defs-cluster-k0.nix / defs-machines.nix content via
|
||||||
# nixos options instead of having module definitions loading it themselves,
|
# nixos options instead of having module definitions loading it themselves,
|
||||||
# deduplicate list of machines below with defs-machines.nix somehow.
|
# deduplicate list of machines below with defs-machines.nix somehow.
|
||||||
mkClusterMachine = name: pkgs.nixos ({ config, pkgs, ... }: {
|
clusterMachineConfig = name: [({ config, pkgs, ...}: {
|
||||||
# The hostname is used by //cluster/nix machinery to load the appropriate
|
# The hostname is used by //cluster/nix machinery to load the appropriate
|
||||||
# config from defs-machines into defs-cluster-k0.
|
# config from defs-machines into defs-cluster-k0.
|
||||||
networking.hostName = name;
|
networking.hostName = name;
|
||||||
|
@ -34,29 +45,71 @@ let
|
||||||
../cluster/nix/modules/base.nix
|
../cluster/nix/modules/base.nix
|
||||||
../cluster/nix/modules/kubernetes.nix
|
../cluster/nix/modules/kubernetes.nix
|
||||||
];
|
];
|
||||||
});
|
})];
|
||||||
|
|
||||||
|
# mkMachine builds NixOS modules into a NixOS derivation, and injects
|
||||||
|
# passthru.hscloud.provision which deploys that configuration over SSH to a
|
||||||
|
# production machine.
|
||||||
mkMachine = pkgs: paths: pkgs.nixos ({ config, pkgs, ... }: {
|
mkMachine = pkgs: paths: pkgs.nixos ({ config, pkgs, ... }: {
|
||||||
imports = paths;
|
imports = paths;
|
||||||
|
|
||||||
|
config = let
|
||||||
|
name = config.networking.hostName;
|
||||||
|
domain = if (config.networking ? domain) && config.networking.domain != null then config.networking.domain else "hswaw.net";
|
||||||
|
fqdn = name + "." + domain;
|
||||||
|
toplevel = config.system.build.toplevel;
|
||||||
|
|
||||||
|
runProvision = ''
|
||||||
|
#!/bin/sh
|
||||||
|
set -eu
|
||||||
|
remote=root@${fqdn}
|
||||||
|
echo "Configuration for ${fqdn} is ${toplevel}"
|
||||||
|
nix copy -s --to ssh://$remote ${toplevel}
|
||||||
|
|
||||||
|
running="$(ssh $remote readlink -f /nix/var/nix/profiles/system)"
|
||||||
|
if [ "$running" == "${toplevel}" ]; then
|
||||||
|
echo "${fqdn} already running ${toplevel}."
|
||||||
|
else
|
||||||
|
echo "/etc/systemd/system diff:"
|
||||||
|
ssh $remote diff -ur /var/run/current-system/etc/systemd/system ${toplevel}/etc/systemd/system || true
|
||||||
|
echo ""
|
||||||
|
echo ""
|
||||||
|
echo "dry-activate diff:"
|
||||||
|
ssh $remote ${toplevel}/bin/switch-to-configuration dry-activate
|
||||||
|
read -p "Do you want to switch to this configuration? " -n 1 -r
|
||||||
|
echo
|
||||||
|
if ! [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo -ne "\n\nswitch-to-configuration test...\n"
|
||||||
|
ssh $remote ${toplevel}/bin/switch-to-configuration test
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo -ne "\n\n"
|
||||||
|
read -p "Do you want to set this configuration as boot? " -n 1 -r
|
||||||
|
echo
|
||||||
|
if ! [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo -ne "\n\nsetting system profile...\n"
|
||||||
|
ssh $remote nix-env -p /nix/var/nix/profiles/system --set ${toplevel}
|
||||||
|
|
||||||
|
echo -ne "\n\nswitch-to-configuration boot...\n"
|
||||||
|
ssh $remote ${toplevel}/bin/switch-to-configuration boot
|
||||||
|
'';
|
||||||
|
in {
|
||||||
|
passthru.hscloud.provision = pkgs.writeScript "provision-${fqdn}" runProvision;
|
||||||
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
in {
|
in {
|
||||||
"bc01n01.hswaw.net" = mkClusterMachine "bc01n01";
|
"bc01n01.hswaw.net" = mkMachine nixpkgsCluster (clusterMachineConfig "bc01n01");
|
||||||
"bc01n02.hswaw.net" = mkClusterMachine "bc01n02";
|
"bc01n02.hswaw.net" = mkMachine nixpkgsCluster (clusterMachineConfig "bc01n02");
|
||||||
"bc01n03.hswaw.net" = mkClusterMachine "bc01n03";
|
"dcr01s22.hswaw.net" = mkMachine nixpkgsCluster (clusterMachineConfig "dcr01s22");
|
||||||
"dcr01s22.hswaw.net" = mkClusterMachine "dcr01s22";
|
"dcr01s24.hswaw.net" = mkMachine nixpkgsCluster (clusterMachineConfig "dcr01s24");
|
||||||
"dcr01s24.hswaw.net" = mkClusterMachine "dcr01s24";
|
|
||||||
|
|
||||||
# edge01 still lives on an old nixpkgs checkout.
|
"edge01.waw.bgp.wtf" = mkMachine nixpkgsBgpwtf [
|
||||||
# TODO(b/3): unpin and deploy.
|
|
||||||
"edge01.waw.bgp.wtf" = mkMachine (
|
|
||||||
import (pkgs.fetchFromGitHub {
|
|
||||||
owner = "nixos";
|
|
||||||
repo = "nixpkgs-channels";
|
|
||||||
rev = "c59ea8b8a0e7f927e7291c14ea6cd1bd3a16ff38";
|
|
||||||
sha256 = "1ak7jqx94fjhc68xh1lh35kh3w3ndbadprrb762qgvcfb8351x8v";
|
|
||||||
}) {}
|
|
||||||
) [
|
|
||||||
../bgpwtf/machines/edge01.waw.bgp.wtf.nix
|
../bgpwtf/machines/edge01.waw.bgp.wtf.nix
|
||||||
../bgpwtf/machines/edge01.waw.bgp.wtf-hardware.nix
|
../bgpwtf/machines/edge01.waw.bgp.wtf-hardware.nix
|
||||||
];
|
];
|
||||||
|
|
74
ops/provision.nix
Normal file
74
ops/provision.nix
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
# Top-level wrapper script for calling per-machine provisioners.
|
||||||
|
#
|
||||||
|
# Given ops.machines."edge01.waw.bgp.wtf".config.passthru.hscloud.provision,
|
||||||
|
# this script allows to run it by doing:
|
||||||
|
# $ $(nix-build -A ops.provision) edge01.waw.bgp.wtf
|
||||||
|
# Or, to first list all available machines by doing:
|
||||||
|
# $ $(nix-build -A ops.provision)
|
||||||
|
#
|
||||||
|
# The main logic of the provisioner script is in machines.nix.
|
||||||
|
|
||||||
|
{ hscloud, pkgs, lib, ... }:
|
||||||
|
|
||||||
|
with lib; with builtins;
|
||||||
|
|
||||||
|
let
|
||||||
|
|
||||||
|
# All machines from ops.machines, keyed by FQDN.
|
||||||
|
machines = filterAttrs (n: _: n != "__readTree") hscloud.ops.machines;
|
||||||
|
# Machines' provisioner scripts, keyed by machine FQDN.
|
||||||
|
machineProvisioners = mapAttrs (_: v: v.config.passthru.hscloud.provision) machines;
|
||||||
|
# List of machine FQDNs.
|
||||||
|
machineNames = attrNames machines;
|
||||||
|
|
||||||
|
# User-friendly list of machines by FQDN.
|
||||||
|
machineList = concatStringsSep "\n"
|
||||||
|
(map
|
||||||
|
(name: " - ${name}")
|
||||||
|
machineNames);
|
||||||
|
|
||||||
|
# Derivation containing bin/provision-FQDN symlinks to machines' provisioners.
|
||||||
|
forest = pkgs.linkFarm "provision-forest"
|
||||||
|
(mapAttrsToList
|
||||||
|
(fqdn: p: { name = "bin/provision-${fqdn}"; path = p; })
|
||||||
|
machineProvisioners);
|
||||||
|
in
|
||||||
|
|
||||||
|
pkgs.writeScript "provision" ''
|
||||||
|
#!/bin/sh
|
||||||
|
name="$1"
|
||||||
|
|
||||||
|
usage() {
|
||||||
|
echo >&2 "Usage: $0 machine|machine.hswaw.net"
|
||||||
|
echo >&2 "Available machines:"
|
||||||
|
echo >&2 "${machineList}"
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ -z "$name" ]; then
|
||||||
|
usage
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
provisioner="${forest}/bin/provision-$name"
|
||||||
|
if [ ! -e "$provisioner" ]; then
|
||||||
|
name="$name.hswaw.net"
|
||||||
|
provisioner="${forest}/bin/provision-$name"
|
||||||
|
fi
|
||||||
|
if [ ! -e "$provisioner" ]; then
|
||||||
|
usage
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
# :^)
|
||||||
|
echo -ne "\e[34mh \e[31ms \e[33mc l \e[34mo \e[32mu \e[31md \e[0m"
|
||||||
|
echo ""
|
||||||
|
echo "Starting provisioner for $name..."
|
||||||
|
echo ""
|
||||||
|
echo "Too slow to evaluate? Equivalent faster command line that rebuilds just one node:"
|
||||||
|
echo " \$(nix-build -A 'ops.machines.\"$name\".config.passthru.hscloud.provision')"
|
||||||
|
echo ""
|
||||||
|
echo "Or, if you want to deploy the same configuration on different machines, just run"
|
||||||
|
echo "this script again without re-evaluating nix:"
|
||||||
|
echo " $0 $name"
|
||||||
|
echo ""
|
||||||
|
exec "$provisioner"
|
||||||
|
''
|
Loading…
Reference in a new issue