*: tear down ceph-waw2

It reached the stage of being crapped out so much that the OSDs spurious IOPS killed the performance of disks colocated on the same M610 RAID controllers. This made etcd _very_ slow, to the point of churning through re-elections due to timeouts. etcd/apiserver latencies, observe the difference at ~15:38: https://object.ceph-waw3.hswaw.net/q3k-personal/4fbe8d4cfc8193cad307d487371b4e44358b931a7494aa88aff50b13fae9983c.png I moved gerrit/* and matrix/appservice-irc-freenode PVCs to ceph-waw3 by hand. The rest were non-critical so I removed them, they can be recovered from benji backups if needed. Change-Id: Iffbe87aefc06d8324a82b958a579143b7dd9914c
2021-01-22 16:26:07 +01:00 · 2021-01-22 16:26:07 +01:00 · 61f978a0a0
commit 61f978a0a0
parent 856b284e29
3 changed files with 3 additions and 97 deletions
--- a/app/matrix/matrix.hackerspace.pl.jsonnet
+++ b/app/matrix/matrix.hackerspace.pl.jsonnet
@ -26,8 +26,7 @@ matrix {
        "irc-freenode": irc.AppServiceIrc("freenode") {
            cfg+: {
                image: cfg.images.appserviceIRC,
-                // TODO(q3k): move this appservice to waw-hdd-redundant-3
-                storageClassName: "waw-hdd-paranoid-2",
+                storageClassName: "waw-hdd-redundant-3",
                metadata: app.metadata("appservice-irc-freenode"),
                // TODO(q3k): add labels to blessed nodes
                nodeSelector: {
--- a/cluster/kube/k0.libsonnet
+++ b/cluster/kube/k0.libsonnet
@ -85,99 +85,7 @@ local rook = import "lib/rook.libsonnet";

        ceph: {
            // waw1 cluster - dead as of 2019/08/06, data corruption
-            // waw2 cluster: shitty 7200RPM 2.5" HDDs
-            waw2: rook.Cluster(k0.cluster.rook, "ceph-waw2") {
-                spec: {
-                    // This cluster is quite broken. We're just keeping it around
-                    // for the hell of it.
-                    continueUpgradeAfterChecksEvenIfNotHealthy: true,
-                    mon: {
-                        count: 3,
-                        allowMultiplePerNode: false,
-                    },
-                    storage: {
-                        useAllNodes: false,
-                        useAllDevices: false,
-                        config: {
-                            databaseSizeMB: "1024",
-                            journalSizeMB: "1024",
-                        },
-                        nodes: [
-                            {
-                                name: "bc01n01.hswaw.net",
-                                location: "rack=dcr01 chassis=bc01 host=bc01n01",
-                                devices: [ { name: "sda" } ],
-                            },
-                            {
-                                name: "bc01n02.hswaw.net",
-                                location: "rack=dcr01 chassis=bc01 host=bc01n02",
-                                devices: [ { name: "sda" } ],
-                            },
-                        ],
-                    },
-                    benji:: {
-                        metadataStorageClass: "waw-hdd-paranoid-2",
-                        encryptionPassword: std.split((importstr "../secrets/plain/k0-benji-encryption-password"), '\n')[0],
-                        pools: [
-                            "waw-hdd-redundant-2",
-                            "waw-hdd-redundant-2-metadata",
-                            "waw-hdd-paranoid-2",
-                            "waw-hdd-yolo-2",
-                        ],
-                        s3Configuration: {
-                            awsAccessKeyId: "RPYZIROFXNLQVU2WJ4R3",
-                            awsSecretAccessKey: std.split((importstr "../secrets/plain/k0-benji-secret-access-key"), '\n')[0],
-                            bucketName: "benji-k0-backups",
-                            endpointUrl: "https://s3.eu-central-1.wasabisys.com/",
-                        },
-                    }
-                },
-            },
-            waw2Pools: {
-                // redundant block storage
-                blockRedundant: rook.ECBlockPool(k0.ceph.waw2, "waw-hdd-redundant-2") {
-                    spec: {
-                        failureDomain: "host",
-                        erasureCoded: {
-                            dataChunks: 2,
-                            codingChunks: 1,
-                        },
-                    },
-                },
-                // paranoid block storage (3 replicas)
-                blockParanoid: rook.ReplicatedBlockPool(k0.ceph.waw2, "waw-hdd-paranoid-2") {
-                    spec: {
-                        failureDomain: "host",
-                        replicated: {
-                            size: 3,
-                        },
-                    },
-                },
-                // yolo block storage (no replicas!)
-                blockYolo: rook.ReplicatedBlockPool(k0.ceph.waw2, "waw-hdd-yolo-2") {
-                    spec: {
-                        failureDomain: "host",
-                        replicated: {
-                            size: 1,
-                        },
-                    },
-                },
-                objectRedundant: rook.S3ObjectStore(k0.ceph.waw2, "waw-hdd-redundant-2-object") {
-                    spec: {
-                        metadataPool: {
-                            failureDomain: "host",
-                            replicated: { size: 3 },
-                        },
-                        dataPool: {
-                            failureDomain: "host",
-                            erasureCoded: {
-                                dataChunks: 2,
-                                codingChunks: 1,
-                            },
-                        },
-                    },
-                },
-            },
+            // waw2 cluster - dead as of 2021/01/22, torn down (horrible M610 RAID controllers are horrible)

            // waw3: 6TB SAS 3.5" HDDs
            waw3: rook.Cluster(k0.cluster.rook, "ceph-waw3") {
@ -381,7 +289,6 @@ local rook = import "lib/rook.libsonnet";
        # bits they use, whatever those might be.
        # TODO(q3k): fix this?
        unnecessarilyInsecureNamespaces: [
-            policies.AllowNamespaceInsecure("ceph-waw2"),
            policies.AllowNamespaceInsecure("ceph-waw3"),
            policies.AllowNamespaceInsecure("matrix"),
            policies.AllowNamespaceInsecure("registry"),
--- a/devtools/gerrit/kube/prod.jsonnet
+++ b/devtools/gerrit/kube/prod.jsonnet
@ -11,7 +11,7 @@ local gerrit = import "gerrit.libsonnet";
            domain: "gerrit.hackerspace.pl",
            identity: "7b6244cf-e30b-42c5-ba91-c329ef4e6cf1",

-            storageClassName: "waw-hdd-paranoid-2",
+            storageClassName: "waw-hdd-redundant-3",

            secureSecret: "gerrit",
        },