From cfc0496266d38b3535a6a0e0516ee7e136ef7ae0 Mon Sep 17 00:00:00 2001 From: Serge Bazanski Date: Sat, 10 Oct 2020 15:57:12 +0200 Subject: [PATCH] ops/monitoring: scrape apiserver, scheduler, and controller-manager These get scraped by public IP address, which get retrieved via service discovery in Prometheus (by using the endpoints role on the default/kubernetes service). Also drive-by fix cluster prometheus resources - the default configuration wants at least 3GB of physical memory. Change-Id: I1eedb19051f62b40613f69e5f0f736d5958acf42 --- ops/monitoring/lib/cluster.libsonnet | 156 +++++++++++++++++---------- 1 file changed, 100 insertions(+), 56 deletions(-) diff --git a/ops/monitoring/lib/cluster.libsonnet b/ops/monitoring/lib/cluster.libsonnet index 511d4262..00aa7922 100644 --- a/ops/monitoring/lib/cluster.libsonnet +++ b/ops/monitoring/lib/cluster.libsonnet @@ -60,63 +60,106 @@ local kube = import "../../../kube/kube.libsonnet"; bearer_token_file: "/var/run/secrets/kubernetes.io/serviceaccount/token", }, - scrape_configs: [ - // When scraping node-based metrics (ie. node and cadvisor metrics) we contact - // the metrics endpoints on the kubelet via the API server. This is done by - // relabeling _address__ and __metrics_path__ to point at the k8s API server, - // and at the API server proxy path to reach a node's metrics endpoint. - // - // This approach was lifted from the prometheus examples for Kubernetes, and - // while the benefits outlined there do not matter that much to us (our - // kubelets listen on public addresses, anyway), we still enjoy this approach - // for the fact that we don't have to hardcode the kubelet TLS port. - // - // https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml - // - // When contacting the API server, we hardcode the 'hswaw.net' DNS suffix as - // our API server's TLS certificate only has a CN/SAN for its full FQDN, not - // the .svc.cluster.local shorthand (see //cluster/clustercfg:clustercfg.py). - // Scrape Kubernetes node metrics via apiserver. This emites kube_node_* metrics. - kubeScrapeConfig("cluster_node_metrics", "node") { - relabel_configs: [ - { - action: "labelmap", - regex: "__meta_kubernetes_node_label_(.+)", - }, - { - action: "replace", - target_label: "__address__", - replacement: "kubernetes.default.svc.%s.hswaw.net:443" % [cluster.cfg.name], - }, - { - target_label: "__metrics_path__", - source_labels: ["__meta_kubernetes_node_name"], - regex: "(.+)", - replacement: "/api/v1/nodes/${1}/proxy/metrics", - }, - ], + // When scraping node-based metrics (ie. node and cadvisor metrics) we contact + // the metrics endpoints on the kubelet via the API server. This is done by + // relabeling _address__ and __metrics_path__ to point at the k8s API server, + // and at the API server proxy path to reach a node's metrics endpoint. + // + // This approach was lifted from the prometheus examples for Kubernetes, and + // while the benefits outlined there do not matter that much to us (our + // kubelets listen on public addresses, anyway), we still enjoy this approach + // for the fact that we don't have to hardcode the kubelet TLS port. + // + // https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml + // + // When contacting the API server, we hardcode the 'hswaw.net' DNS suffix as + // our API server's TLS certificate only has a CN/SAN for its full FQDN, not + // the .svc.cluster.local shorthand (see //cluster/clustercfg:clustercfg.py). + local kubeScrapeNodeMetrics = function(name, path) kubeScrapeConfig(name, "node") { + relabel_configs: [ + { + action: "labelmap", + regex: "__meta_kubernetes_node_label_(.+)", + }, + { + action: "replace", + target_label: "__address__", + replacement: "kubernetes.default.svc.%s.hswaw.net:443" % [cluster.cfg.name], + }, + { + target_label: "__metrics_path__", + source_labels: ["__meta_kubernetes_node_name"], + regex: "(.+)", + replacement: "/api/v1/nodes/${1}/proxy" + path, + }, + ], + }, + + // When scraping API server-colocated metrics (ie. metrics from nixos services running alongside + // APIserver instances), we contact the metrics endpoints directly over the node's IP addresses + // and an external port. The node IP addresses are discovered via Prometheus kubernetes endpoint + // discovery which selects all endpoints for the default/kubernetes service. This service is + // backed by apiserver instances on public IP addresses. We can then rewrite the received port + // by the port of the service we're interested in to get to that service. + local kubeScrapeAPIServerColocated = function(name, port) kubeScrapeConfig(name, "endpoints") { + relabel_configs: [ + // Select only endpoints that back the default/kubernetes service. These are all + // public IP addresses of nodes that run the API server. + { + action: "keep", + regex: "default;kubernetes;https", + source_labels: [ + "__meta_kubernetes_namespace", + "__meta_kubernetes_service_name", + "__meta_kubernetes_endpoint_port_name", + ], + }, + ] + (if port == 4001 then [] else [ + // Replace endpoint port with requested port, if the requested port is not the apiserver's + // port 4001, which is the one returned by default for the these endpoints. + { + action: "replace", + regex: "([^:]+):.+", + replacement: "$1:%d" % [port], + source_labels: [ + "__address__", + ], + target_label: "__address__", + }, + ]), + // We disable server-side TLS certificate verification. + // Unfortunately, all apiserver-colocated services run with TLS certificates that do not have + // the right IP address SAN. Unfortunately, we can't override the TLS ServerName for a scrape + // target [1], so the only two choiced we are left with are: + // 1) re-emit relevant certificates with IP address SANs that allow for access by IP. + // 2) disable TLS verification. + // We choose 2), knowing that if someone manages to hijack a target IP address they can end up + // stealing our bearer token and impersonating the service account with which Prometheus is + // running. In the long term, we hope for [1] to be resolved. + // + // TODO(q3k): revisit this once [1] gets fixed. + // [1] - https://github.com/prometheus/prometheus/issues/4827 + tls_config: { + insecure_skip_verify: true, }, + }, + + scrape_configs: [ + /// Scrape per-node metrics, proxied via the APIServer.. + // Scrape Kubernetes node metrics via apiserver. This emits kube_node_* metrics. + kubeScrapeNodeMetrics("cluster_node_metrics", "/metrics"), // Scrape Kubernetes node cadvisor metrics via apiserver. This emits container_* metrics. - kubeScrapeConfig("cluster_cadvisor_metrics", "node") { - relabel_configs: [ - { - action: "labelmap", - regex: "__meta_kubernetes_node_label_(.+)", - }, - { - action: "replace", - target_label: "__address__", - replacement: "kubernetes.default.svc.%s.hswaw.net:443" % [cluster.cfg.name], - }, - { - target_label: "__metrics_path__", - source_labels: ["__meta_kubernetes_node_name"], - regex: "(.+)", - replacement: "/api/v1/nodes/${1}/proxy/metrics/cadvisor", - }, - ], - }, + kubeScrapeNodeMetrics("cluster_cadvisor_metrics", "/metrics/cadvisor"), + + /// Scape apiserver-colocated ('master node') metrics, over nodes' public IP addresses. + /// (currently all nodes are 'master' nodes) + // Scrape Kubernetes apiserver metrics. + kubeScrapeAPIServerColocated("cluster_apiserver_metrics", 4001), + // Scrape Kubernetes controller-manager metrics. + kubeScrapeAPIServerColocated("cluster_controllermanager_metrics", 4003), + // Scrape Kubernetes scheduler metrics. + kubeScrapeAPIServerColocated("cluster_scheduler_metrics", 4005), ], remote_write: [ @@ -152,6 +195,7 @@ local kube = import "../../../kube/kube.libsonnet"; { nonResourceURLs: ["/metrics"], verbs: ["get"], }, // Allow to access node details for discovery. { apiGroups: [""], resources: ["nodes"], verbs: ["list", "watch", "get"], }, + { apiGroups: [""], resources: ["endpoints", "services", "pods"], verbs: ["list", "watch", "get"], }, // Allow to proxy to bare node HTTP to access per-node metrics endpoints. { apiGroups: [""], resources: ["nodes/proxy"], verbs: ["get"], }, ], @@ -183,11 +227,11 @@ local kube = import "../../../kube/kube.libsonnet"; ], resources: { requests: { - memory: "256Mi", + memory: "3Gi", cpu: "100m", }, limits: { - memory: "1Gi", + memory: "3Gi", cpu: "1", }, },