forked from hswaw/hscloud
cluster/kube/lib/cockroachdb: use manual node pinning
We move away from the StatefulSet based deployment to manually starting a deployment per intended node. This allows us to pin indivisual instances of Cockroach to particular nodes, so that they state co-located with their data.
This commit is contained in:
parent
662a3cdcca
commit
e53e39a8be
1 changed files with 150 additions and 148 deletions
|
@ -1,9 +1,16 @@
|
|||
# Deploy a 3-node CockroachDB cluster in secure mode.
|
||||
# Deploy a CockroachDB cluster in secure mode.
|
||||
# This creates an N-node cluster based on a given static topology.
|
||||
|
||||
# Can be used either in own namespace or in an existing one:
|
||||
# crdb: cockroachdb.Cluster("q3kdb") {
|
||||
# cfg+: {
|
||||
# namespace: "q3k", // if not given, will create 'q3kdb' namespace
|
||||
# topology: [
|
||||
# { name: "a", node: "bc01n01.hswaw.net", ip: "185.236.240.35" },
|
||||
# { name: "b", node: "bc01n02.hswaw.net", ip: "185.236.240.36" },
|
||||
# { name: "c", node: "bc01n03.hswaw.net", ip: "185.236.240.37" },
|
||||
# ],
|
||||
# hostPath: "/var/db/cockroach-q3k",
|
||||
# },
|
||||
#},
|
||||
#
|
||||
|
@ -14,7 +21,7 @@
|
|||
# Then, you can create some users and databases for applications:
|
||||
# defaultdb> CREATE DATABASE wykop;
|
||||
# defaultdb> CREATE USER bialkov PASSWORD hackme;
|
||||
# defaultdb> GRANT ALL ON DATABASE wykop to bialkov;
|
||||
# defaultdb> GRANT ALL ON DATABASE wykop TO bialkov;
|
||||
#
|
||||
# You are then ready to access the database via the public service from your application.
|
||||
#
|
||||
|
@ -36,6 +43,13 @@ local cm = import "cert-manager.libsonnet";
|
|||
|
||||
cfg:: {
|
||||
image: "cockroachdb/cockroach:v19.1.0",
|
||||
|
||||
# Must be unique per cluster.
|
||||
portServe: 26257,
|
||||
portHttp: 8080,
|
||||
hostPath: error "hostPath must be defined",
|
||||
topology: error "topology must be defined",
|
||||
|
||||
namespace: null,
|
||||
ownNamespace: cluster.cfg.namespace == null,
|
||||
},
|
||||
|
@ -57,8 +71,6 @@ local cm = import "cert-manager.libsonnet";
|
|||
|
||||
name(suffix):: if cluster.cfg.ownNamespace then suffix else name + "-" + suffix,
|
||||
|
||||
hosts:: ["%s-%d.%s.cluster.local" % [cluster.statefulSet.metadata.name, n, cluster.internalService.host] for n in std.range(0, cluster.statefulSet.spec.replicas)],
|
||||
|
||||
pki: {
|
||||
selfSignedIssuer: cm.Issuer(cluster.name("selfsigned")) {
|
||||
metadata+: cluster.metadata,
|
||||
|
@ -99,14 +111,14 @@ local cm = import "cert-manager.libsonnet";
|
|||
},
|
||||
commonName: "node",
|
||||
dnsNames: [
|
||||
"localhost",
|
||||
"127.0.0.1",
|
||||
cluster.publicService.metadata.name,
|
||||
std.join(".", [cluster.publicService.metadata.name, cluster.metadata.namespace ]),
|
||||
cluster.publicService.host,
|
||||
std.join(".", [cluster.publicService.host, "cluster.local" ]),
|
||||
std.join(".", [ "*", cluster.internalService.metadata.name ]),
|
||||
std.join(".", [ "*", cluster.internalService.metadata.name, cluster.metadata.namespace ]),
|
||||
std.join(".", [ "*", cluster.internalService.host, "cluster.local" ]),
|
||||
std.join(".", [cluster.publicService.metadata.name, cluster.metadata.namespace ]),
|
||||
] + [
|
||||
"%s.cluster.local" % s.service.host
|
||||
for s in cluster.servers
|
||||
],
|
||||
},
|
||||
},
|
||||
|
@ -147,35 +159,15 @@ local cm = import "cert-manager.libsonnet";
|
|||
|
||||
publicService: kube.Service(cluster.name("public")) {
|
||||
metadata+: cluster.metadata,
|
||||
target_pod:: cluster.statefulSet.spec.template,
|
||||
target_pod:: cluster.servers[0].deploy.spec.template,
|
||||
spec+: {
|
||||
ports: [
|
||||
{ name: "grpc", port: 26257, targetPort: 26257 },
|
||||
{ name: "http", port: 8080, targetPort: 8080 },
|
||||
{ name: "grpc", port: cluster.cfg.portServe, targetPort: cluster.cfg.portServe },
|
||||
{ name: "http", port: cluster.cfg.portHttp, targetPort: cluster.cfg.portHttp },
|
||||
],
|
||||
},
|
||||
},
|
||||
|
||||
internalService: kube.Service(cluster.name("internal")) {
|
||||
metadata+: cluster.metadata + {
|
||||
annotations+: {
|
||||
"service.alpha.kubernetes.io/tolerate-unready-endpoints": "true",
|
||||
"prometheus.io/scrape": "true",
|
||||
"prometheus.io/path": "_status/vars",
|
||||
"prometheus.io/port": "8080",
|
||||
},
|
||||
},
|
||||
target_pod:: cluster.statefulSet.spec.template,
|
||||
spec+: {
|
||||
ports: [
|
||||
{ name: "grpc", port: 26257, targetPort: 26257 },
|
||||
{ name: "http", port: 8080, targetPort: 8080 },
|
||||
],
|
||||
publishNotReadyAddresses: true,
|
||||
clusterIP: "None",
|
||||
},
|
||||
},
|
||||
|
||||
podDisruptionBudget: kube.PodDisruptionBudget(cluster.name("pod")) {
|
||||
metadata+: cluster.metadata,
|
||||
spec: {
|
||||
|
@ -188,128 +180,137 @@ local cm = import "cert-manager.libsonnet";
|
|||
},
|
||||
},
|
||||
|
||||
statefulSet: kube.StatefulSet(cluster.name("cockroachdb")) {
|
||||
metadata+: cluster.metadata {
|
||||
labels+: {
|
||||
"app.kubernetes.io/component": "server",
|
||||
},
|
||||
},
|
||||
spec+: {
|
||||
serviceName: cluster.internalService.metadata.name,
|
||||
replicas: 3,
|
||||
template: {
|
||||
metadata: cluster.statefulSet.metadata,
|
||||
servers: [
|
||||
{
|
||||
local server = self,
|
||||
service: kube.Service(cluster.name("server-" + el.name)) {
|
||||
metadata+: cluster.metadata + {
|
||||
annotations+: {
|
||||
"service.alpha.kubernetes.io/tolerate-unready-endpoints": "true",
|
||||
"prometheus.io/scrape": "true",
|
||||
"prometheus.io/path": "_status/vars",
|
||||
"prometheus.io/port": std.toString(cluster.cfg.portHttp),
|
||||
},
|
||||
},
|
||||
target_pod:: server.deploy.spec.template,
|
||||
spec+: {
|
||||
dnsPolicy: "ClusterFirst",
|
||||
serviceAccountName: cluster.serviceAccount.metadata.name,
|
||||
affinity: {
|
||||
podAntiAffinity: {
|
||||
preferredDuringSchedulingIgnoredDuringExecution: [
|
||||
{
|
||||
weight: 100,
|
||||
podAffinityTerm: {
|
||||
labelSelector: {
|
||||
matchExpressions: [
|
||||
{
|
||||
key: "app.kubernetes.io/component",
|
||||
operator: "In",
|
||||
values: [ "cockroachdb" ],
|
||||
},
|
||||
],
|
||||
ports: [
|
||||
{ name: "grpc", port: cluster.cfg.portServe, targetPort: cluster.cfg.portServe },
|
||||
{ name: "http", port: cluster.cfg.portHttp, targetPort: cluster.cfg.portHttp },
|
||||
],
|
||||
publishNotReadyAddresses: true,
|
||||
clusterIP: "None",
|
||||
},
|
||||
},
|
||||
deploy: kube.Deployment(cluster.name("server-" + el.name)) {
|
||||
metadata+: cluster.metadata {
|
||||
labels+: {
|
||||
"app.kubernetes.io/component": "server",
|
||||
"kubernetes.hackerspace.pl/cockroachdb-server": el.name,
|
||||
},
|
||||
},
|
||||
spec+: {
|
||||
template+: {
|
||||
metadata: server.deploy.metadata,
|
||||
spec+: {
|
||||
dnsPolicy: "ClusterFirst",
|
||||
serviceAccountName: cluster.serviceAccount.metadata.name,
|
||||
nodeSelector: {
|
||||
"kubernetes.io/hostname": el.node,
|
||||
},
|
||||
containers: [
|
||||
kube.Container("cockroachdb") {
|
||||
image: cluster.cfg.image,
|
||||
imagePullPolicy: "IfNotPresent",
|
||||
resources: {
|
||||
requests: {
|
||||
cpu: "2",
|
||||
memory: "6Gi",
|
||||
},
|
||||
topologyKey: "kubernetes.io/hostname",
|
||||
limits: {
|
||||
memory: "6Gi",
|
||||
},
|
||||
},
|
||||
ports_: {
|
||||
"grpc": { containerPort: cluster.cfg.portServe },
|
||||
"http": { containerPort: cluster.cfg.portHttp },
|
||||
},
|
||||
livenessProbe: {
|
||||
httpGet: {
|
||||
path: "/health",
|
||||
port: "http",
|
||||
},
|
||||
initialDelaySeconds: 30,
|
||||
periodSeconds: 5,
|
||||
},
|
||||
readinessProbe: {
|
||||
httpGet: {
|
||||
path: "/health?ready=1",
|
||||
port: "http",
|
||||
},
|
||||
initialDelaySeconds: 10,
|
||||
periodSeconds: 5,
|
||||
failureThreshold: 2,
|
||||
},
|
||||
volumeMounts: [
|
||||
{
|
||||
name: "datadir",
|
||||
mountPath: "/cockroach/cockroach-data",
|
||||
},
|
||||
{
|
||||
name: "certs",
|
||||
mountPath: "/cockroach/cockroach-certs/node.crt",
|
||||
subPath: "tls.crt",
|
||||
},
|
||||
{
|
||||
name: "certs",
|
||||
mountPath: "/cockroach/cockroach-certs/node.key",
|
||||
subPath: "tls.key",
|
||||
},
|
||||
{
|
||||
name: "certs",
|
||||
mountPath: "/cockroach/cockroach-certs/ca.crt",
|
||||
subPath: "ca.crt",
|
||||
},
|
||||
],
|
||||
env_: {
|
||||
"COCKROACH_CERTS_DIR": "/cockroach/cockroach-certs",
|
||||
},
|
||||
command: [
|
||||
"/cockroach/cockroach", "start",
|
||||
"--logtostderr",
|
||||
"--certs-dir", "/cockroach/cockroach-certs",
|
||||
"--advertise-host", "%s.cluster.local" % server.service.host,
|
||||
"--cache", "25%", "--max-sql-memory", "25%",
|
||||
"--join", std.join(",", ["%s.cluster.local:%d" % [s.service.host, cluster.cfg.portServe] for s in cluster.servers]),
|
||||
"--listen-addr=0.0.0.0:%d" % cluster.cfg.portServe,
|
||||
"--http-addr=0.0.0.0:%d" % cluster.cfg.portHttp,
|
||||
],
|
||||
},
|
||||
],
|
||||
terminationGracePeriodSeconds: 60,
|
||||
volumes: [
|
||||
{
|
||||
name: "datadir",
|
||||
hostPath: {
|
||||
path: cluster.cfg.hostPath,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "certs",
|
||||
secret: {
|
||||
secretName: cluster.pki.nodeCertificate.spec.secretName,
|
||||
defaultMode: kube.parseOctal("400"),
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
containers: [
|
||||
kube.Container("cockroachdb") {
|
||||
image: cluster.cfg.image,
|
||||
imagePullPolicy: "IfNotPresent",
|
||||
resources: {
|
||||
requests: {
|
||||
cpu: "2",
|
||||
memory: "6Gi",
|
||||
},
|
||||
limits: {
|
||||
memory: "6Gi",
|
||||
},
|
||||
},
|
||||
ports_: {
|
||||
"grpc": { containerPort: 26257 },
|
||||
"http": { containerPort: 8080 },
|
||||
},
|
||||
livenessProbe: {
|
||||
httpGet: {
|
||||
path: "/health",
|
||||
port: "http",
|
||||
},
|
||||
initialDelaySeconds: 30,
|
||||
periodSeconds: 5,
|
||||
},
|
||||
readinessProbe: {
|
||||
httpGet: {
|
||||
path: "/health?ready=1",
|
||||
port: "http",
|
||||
},
|
||||
initialDelaySeconds: 10,
|
||||
periodSeconds: 5,
|
||||
failureThreshold: 2,
|
||||
},
|
||||
volumeMounts: [
|
||||
{
|
||||
name: "datadir",
|
||||
mountPath: "/cockroach/cockroach-data",
|
||||
},
|
||||
{
|
||||
name: "certs",
|
||||
mountPath: "/cockroach/cockroach-certs/node.crt",
|
||||
subPath: "tls.crt",
|
||||
},
|
||||
{
|
||||
name: "certs",
|
||||
mountPath: "/cockroach/cockroach-certs/node.key",
|
||||
subPath: "tls.key",
|
||||
},
|
||||
{
|
||||
name: "certs",
|
||||
mountPath: "/cockroach/cockroach-certs/ca.crt",
|
||||
subPath: "ca.crt",
|
||||
},
|
||||
],
|
||||
env_: {
|
||||
"COCKROACH_CERTS_DIR": "/cockroach/cockroach-certs",
|
||||
},
|
||||
command: [
|
||||
"/bin/bash",
|
||||
"-ecx",
|
||||
"exec /cockroach/cockroach start --logtostderr --certs-dir /cockroach/cockroach-certs --advertise-host $(hostname -f) --http-addr 0.0.0.0 --cache 25% --max-sql-memory 25% --join " + std.join(",", cluster.hosts),
|
||||
],
|
||||
},
|
||||
],
|
||||
terminationGracePeriodSeconds: 60,
|
||||
volumes: [
|
||||
{
|
||||
name: "datadir",
|
||||
emptyDir: {},
|
||||
},
|
||||
{
|
||||
name: "certs",
|
||||
secret: {
|
||||
secretName: cluster.pki.nodeCertificate.spec.secretName,
|
||||
defaultMode: kube.parseOctal("400"),
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
podManagementPolicy: "Parallel",
|
||||
updateStrategy: {
|
||||
type: "RollingUpdate",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
for el in cluster.cfg.topology
|
||||
],
|
||||
|
||||
initJob: kube.Job(cluster.name("init")) {
|
||||
metadata+: cluster.metadata,
|
||||
|
@ -328,7 +329,7 @@ local cm = import "cert-manager.libsonnet";
|
|||
command: [
|
||||
"/bin/bash",
|
||||
"-ecx",
|
||||
"/cockroach/cockroach init --host=" + cluster.hosts[0],
|
||||
"/cockroach/cockroach init --host=%s.cluster.local:%d" % [cluster.servers[0].service.host, cluster.cfg.portServe],
|
||||
],
|
||||
volumeMounts: [
|
||||
{
|
||||
|
@ -377,7 +378,8 @@ local cm = import "cert-manager.libsonnet";
|
|||
image: cluster.cfg.image,
|
||||
env_: {
|
||||
"COCKROACH_CERTS_DIR": "/cockroach/cockroach-certs",
|
||||
"COCKROACH_HOST": cluster.hosts[0],
|
||||
"COCKROACH_HOST": cluster.publicService.host,
|
||||
"COCKROACH_PORT": std.toString(cluster.cfg.portServe),
|
||||
},
|
||||
command: ["sleep", "2147483648"], //(FIXME) keep the client pod running indefinitely
|
||||
volumeMounts: [
|
||||
|
|
Loading…
Reference in a new issue