From e53e39a8be7ef6933a4530d963dea768dbda813b Mon Sep 17 00:00:00 2001 From: Sergiusz Bazanski Date: Thu, 20 Jun 2019 23:36:35 +0200 Subject: [PATCH] cluster/kube/lib/cockroachdb: use manual node pinning We move away from the StatefulSet based deployment to manually starting a deployment per intended node. This allows us to pin indivisual instances of Cockroach to particular nodes, so that they state co-located with their data. --- cluster/kube/lib/cockroachdb.libsonnet | 298 +++++++++++++------------ 1 file changed, 150 insertions(+), 148 deletions(-) diff --git a/cluster/kube/lib/cockroachdb.libsonnet b/cluster/kube/lib/cockroachdb.libsonnet index def9bcba..4ce2af7d 100644 --- a/cluster/kube/lib/cockroachdb.libsonnet +++ b/cluster/kube/lib/cockroachdb.libsonnet @@ -1,9 +1,16 @@ -# Deploy a 3-node CockroachDB cluster in secure mode. +# Deploy a CockroachDB cluster in secure mode. +# This creates an N-node cluster based on a given static topology. # Can be used either in own namespace or in an existing one: # crdb: cockroachdb.Cluster("q3kdb") { # cfg+: { # namespace: "q3k", // if not given, will create 'q3kdb' namespace +# topology: [ +# { name: "a", node: "bc01n01.hswaw.net", ip: "185.236.240.35" }, +# { name: "b", node: "bc01n02.hswaw.net", ip: "185.236.240.36" }, +# { name: "c", node: "bc01n03.hswaw.net", ip: "185.236.240.37" }, +# ], +# hostPath: "/var/db/cockroach-q3k", # }, #}, # @@ -14,7 +21,7 @@ # Then, you can create some users and databases for applications: # defaultdb> CREATE DATABASE wykop; # defaultdb> CREATE USER bialkov PASSWORD hackme; -# defaultdb> GRANT ALL ON DATABASE wykop to bialkov; +# defaultdb> GRANT ALL ON DATABASE wykop TO bialkov; # # You are then ready to access the database via the public service from your application. # @@ -36,6 +43,13 @@ local cm = import "cert-manager.libsonnet"; cfg:: { image: "cockroachdb/cockroach:v19.1.0", + + # Must be unique per cluster. + portServe: 26257, + portHttp: 8080, + hostPath: error "hostPath must be defined", + topology: error "topology must be defined", + namespace: null, ownNamespace: cluster.cfg.namespace == null, }, @@ -57,8 +71,6 @@ local cm = import "cert-manager.libsonnet"; name(suffix):: if cluster.cfg.ownNamespace then suffix else name + "-" + suffix, - hosts:: ["%s-%d.%s.cluster.local" % [cluster.statefulSet.metadata.name, n, cluster.internalService.host] for n in std.range(0, cluster.statefulSet.spec.replicas)], - pki: { selfSignedIssuer: cm.Issuer(cluster.name("selfsigned")) { metadata+: cluster.metadata, @@ -99,14 +111,14 @@ local cm = import "cert-manager.libsonnet"; }, commonName: "node", dnsNames: [ - "localhost", - "127.0.0.1", cluster.publicService.metadata.name, std.join(".", [cluster.publicService.metadata.name, cluster.metadata.namespace ]), + cluster.publicService.host, std.join(".", [cluster.publicService.host, "cluster.local" ]), - std.join(".", [ "*", cluster.internalService.metadata.name ]), - std.join(".", [ "*", cluster.internalService.metadata.name, cluster.metadata.namespace ]), - std.join(".", [ "*", cluster.internalService.host, "cluster.local" ]), + std.join(".", [cluster.publicService.metadata.name, cluster.metadata.namespace ]), + ] + [ + "%s.cluster.local" % s.service.host + for s in cluster.servers ], }, }, @@ -147,35 +159,15 @@ local cm = import "cert-manager.libsonnet"; publicService: kube.Service(cluster.name("public")) { metadata+: cluster.metadata, - target_pod:: cluster.statefulSet.spec.template, + target_pod:: cluster.servers[0].deploy.spec.template, spec+: { ports: [ - { name: "grpc", port: 26257, targetPort: 26257 }, - { name: "http", port: 8080, targetPort: 8080 }, + { name: "grpc", port: cluster.cfg.portServe, targetPort: cluster.cfg.portServe }, + { name: "http", port: cluster.cfg.portHttp, targetPort: cluster.cfg.portHttp }, ], }, }, - internalService: kube.Service(cluster.name("internal")) { - metadata+: cluster.metadata + { - annotations+: { - "service.alpha.kubernetes.io/tolerate-unready-endpoints": "true", - "prometheus.io/scrape": "true", - "prometheus.io/path": "_status/vars", - "prometheus.io/port": "8080", - }, - }, - target_pod:: cluster.statefulSet.spec.template, - spec+: { - ports: [ - { name: "grpc", port: 26257, targetPort: 26257 }, - { name: "http", port: 8080, targetPort: 8080 }, - ], - publishNotReadyAddresses: true, - clusterIP: "None", - }, - }, - podDisruptionBudget: kube.PodDisruptionBudget(cluster.name("pod")) { metadata+: cluster.metadata, spec: { @@ -188,128 +180,137 @@ local cm = import "cert-manager.libsonnet"; }, }, - statefulSet: kube.StatefulSet(cluster.name("cockroachdb")) { - metadata+: cluster.metadata { - labels+: { - "app.kubernetes.io/component": "server", - }, - }, - spec+: { - serviceName: cluster.internalService.metadata.name, - replicas: 3, - template: { - metadata: cluster.statefulSet.metadata, + servers: [ + { + local server = self, + service: kube.Service(cluster.name("server-" + el.name)) { + metadata+: cluster.metadata + { + annotations+: { + "service.alpha.kubernetes.io/tolerate-unready-endpoints": "true", + "prometheus.io/scrape": "true", + "prometheus.io/path": "_status/vars", + "prometheus.io/port": std.toString(cluster.cfg.portHttp), + }, + }, + target_pod:: server.deploy.spec.template, spec+: { - dnsPolicy: "ClusterFirst", - serviceAccountName: cluster.serviceAccount.metadata.name, - affinity: { - podAntiAffinity: { - preferredDuringSchedulingIgnoredDuringExecution: [ - { - weight: 100, - podAffinityTerm: { - labelSelector: { - matchExpressions: [ - { - key: "app.kubernetes.io/component", - operator: "In", - values: [ "cockroachdb" ], - }, - ], + ports: [ + { name: "grpc", port: cluster.cfg.portServe, targetPort: cluster.cfg.portServe }, + { name: "http", port: cluster.cfg.portHttp, targetPort: cluster.cfg.portHttp }, + ], + publishNotReadyAddresses: true, + clusterIP: "None", + }, + }, + deploy: kube.Deployment(cluster.name("server-" + el.name)) { + metadata+: cluster.metadata { + labels+: { + "app.kubernetes.io/component": "server", + "kubernetes.hackerspace.pl/cockroachdb-server": el.name, + }, + }, + spec+: { + template+: { + metadata: server.deploy.metadata, + spec+: { + dnsPolicy: "ClusterFirst", + serviceAccountName: cluster.serviceAccount.metadata.name, + nodeSelector: { + "kubernetes.io/hostname": el.node, + }, + containers: [ + kube.Container("cockroachdb") { + image: cluster.cfg.image, + imagePullPolicy: "IfNotPresent", + resources: { + requests: { + cpu: "2", + memory: "6Gi", }, - topologyKey: "kubernetes.io/hostname", + limits: { + memory: "6Gi", + }, + }, + ports_: { + "grpc": { containerPort: cluster.cfg.portServe }, + "http": { containerPort: cluster.cfg.portHttp }, + }, + livenessProbe: { + httpGet: { + path: "/health", + port: "http", + }, + initialDelaySeconds: 30, + periodSeconds: 5, + }, + readinessProbe: { + httpGet: { + path: "/health?ready=1", + port: "http", + }, + initialDelaySeconds: 10, + periodSeconds: 5, + failureThreshold: 2, + }, + volumeMounts: [ + { + name: "datadir", + mountPath: "/cockroach/cockroach-data", + }, + { + name: "certs", + mountPath: "/cockroach/cockroach-certs/node.crt", + subPath: "tls.crt", + }, + { + name: "certs", + mountPath: "/cockroach/cockroach-certs/node.key", + subPath: "tls.key", + }, + { + name: "certs", + mountPath: "/cockroach/cockroach-certs/ca.crt", + subPath: "ca.crt", + }, + ], + env_: { + "COCKROACH_CERTS_DIR": "/cockroach/cockroach-certs", + }, + command: [ + "/cockroach/cockroach", "start", + "--logtostderr", + "--certs-dir", "/cockroach/cockroach-certs", + "--advertise-host", "%s.cluster.local" % server.service.host, + "--cache", "25%", "--max-sql-memory", "25%", + "--join", std.join(",", ["%s.cluster.local:%d" % [s.service.host, cluster.cfg.portServe] for s in cluster.servers]), + "--listen-addr=0.0.0.0:%d" % cluster.cfg.portServe, + "--http-addr=0.0.0.0:%d" % cluster.cfg.portHttp, + ], + }, + ], + terminationGracePeriodSeconds: 60, + volumes: [ + { + name: "datadir", + hostPath: { + path: cluster.cfg.hostPath, + }, + }, + { + name: "certs", + secret: { + secretName: cluster.pki.nodeCertificate.spec.secretName, + defaultMode: kube.parseOctal("400"), }, }, ], }, }, - containers: [ - kube.Container("cockroachdb") { - image: cluster.cfg.image, - imagePullPolicy: "IfNotPresent", - resources: { - requests: { - cpu: "2", - memory: "6Gi", - }, - limits: { - memory: "6Gi", - }, - }, - ports_: { - "grpc": { containerPort: 26257 }, - "http": { containerPort: 8080 }, - }, - livenessProbe: { - httpGet: { - path: "/health", - port: "http", - }, - initialDelaySeconds: 30, - periodSeconds: 5, - }, - readinessProbe: { - httpGet: { - path: "/health?ready=1", - port: "http", - }, - initialDelaySeconds: 10, - periodSeconds: 5, - failureThreshold: 2, - }, - volumeMounts: [ - { - name: "datadir", - mountPath: "/cockroach/cockroach-data", - }, - { - name: "certs", - mountPath: "/cockroach/cockroach-certs/node.crt", - subPath: "tls.crt", - }, - { - name: "certs", - mountPath: "/cockroach/cockroach-certs/node.key", - subPath: "tls.key", - }, - { - name: "certs", - mountPath: "/cockroach/cockroach-certs/ca.crt", - subPath: "ca.crt", - }, - ], - env_: { - "COCKROACH_CERTS_DIR": "/cockroach/cockroach-certs", - }, - command: [ - "/bin/bash", - "-ecx", - "exec /cockroach/cockroach start --logtostderr --certs-dir /cockroach/cockroach-certs --advertise-host $(hostname -f) --http-addr 0.0.0.0 --cache 25% --max-sql-memory 25% --join " + std.join(",", cluster.hosts), - ], - }, - ], - terminationGracePeriodSeconds: 60, - volumes: [ - { - name: "datadir", - emptyDir: {}, - }, - { - name: "certs", - secret: { - secretName: cluster.pki.nodeCertificate.spec.secretName, - defaultMode: kube.parseOctal("400"), - }, - }, - ], }, - }, - podManagementPolicy: "Parallel", - updateStrategy: { - type: "RollingUpdate", - }, - }, - }, + } + } + for el in cluster.cfg.topology + ], initJob: kube.Job(cluster.name("init")) { metadata+: cluster.metadata, @@ -328,7 +329,7 @@ local cm = import "cert-manager.libsonnet"; command: [ "/bin/bash", "-ecx", - "/cockroach/cockroach init --host=" + cluster.hosts[0], + "/cockroach/cockroach init --host=%s.cluster.local:%d" % [cluster.servers[0].service.host, cluster.cfg.portServe], ], volumeMounts: [ { @@ -377,7 +378,8 @@ local cm = import "cert-manager.libsonnet"; image: cluster.cfg.image, env_: { "COCKROACH_CERTS_DIR": "/cockroach/cockroach-certs", - "COCKROACH_HOST": cluster.hosts[0], + "COCKROACH_HOST": cluster.publicService.host, + "COCKROACH_PORT": std.toString(cluster.cfg.portServe), }, command: ["sleep", "2147483648"], //(FIXME) keep the client pod running indefinitely volumeMounts: [