1
0
Fork 0

monitoring: global: implement

This creates a basic Global instance, running Victoria Metrics on k0.

Change-Id: Ib03003213d79b41cc54efe40cd2c4837f652c0f4
master
q3k 2020-08-24 21:00:56 +02:00 committed by q3k
parent 2e001e5046
commit 363bf4f341
8 changed files with 332 additions and 16 deletions

41
ops/monitoring/README.md Normal file
View File

@ -0,0 +1,41 @@
hscloud monitoring
==================
Quick links
-----------
- *Old Global Dashboard*: [monitoring.hackerspace.pl](https://monitoring.hackerspace.pl) - old monitoring system, unrelated to this one, configured using Chef at management.hackerspace.pl (long since dead). This setup is supposed to replace it.
Architecture
------------
The hscloud monitoring solution is two-tiered:
- at the *global* tier we run metrics aggregation, long-term storage, dashboard and alerting.
- at the *agent* tier we collect metrics from various sources (possibly even lower tiered agents).
All agent-tier agents send metrics to all global instances.
.--------. .--------. '.
| global | | global | > - global tier
'--------' '--------' .' (contains 'global instances')
| '---. .---' |
| X |
| .---' '---. |
| | | |
.--------------. .--------------------. '.
| cluster | | hswaw-proxy | |
| k0.hswaw.net | | waw.hackerspace.pl | > - agent tier
'--------------' '--------------------' .' (contains 'agents')
Agent - cluster
---------------
Cluster agents are responsible from collecting Kubernetes cluster metrics. They run a prometheus server that scrapes kubelet/cadvisor/... metrics and send them off to global instances.
Global Instances
----------------
Global agents run Victoria Metrics, ingest metrics from all agents, and perform long-term storage. In the future they will also run Grafana and AlertManager.

View File

@ -1,11 +1,39 @@
local lib = import "lib.libsonnet";
local cluster = import "lib/cluster.libsonnet";
local global = import "lib/global.libsonnet";
// Monitoring tiers set up on k0. See README for architectural background.
{
cluster: lib.Cluster("k0") {
cfg+: {
storageClasses+: {
prometheus: "waw-hdd-redundant-3",
},
local k0 = self,
local cfg = {
storageClasses+: {
prometheus: "waw-hdd-redundant-3",
victoria: "waw-hdd-redundant-3",
},
},
// Cluster tier - prometheus.
cluster: cluster.Cluster("k0") {
cfg+: cfg {
username: "cluster-k0",
upstreams: [
{ password: std.split(importstr "secrets/plain/global-agent-cluster-k0", "\n")[0], remote: k0.global.internalIngestURL },
],
},
},
// Global tier - victoria metrics.
global: global.Global("k0") {
cfg+: cfg {
hosts: {
globalAPI: "monitoring-global-api.k0.hswaw.net",
},
agents: [
// Ingestion from k0 cluster tier.
{ username: k0.cluster.cfg.username, password: std.split(importstr "secrets/plain/global-agent-cluster-k0", "\n")[0], },
// Access from q3k's test Grafana.
{ username: "grafana", password: std.split(importstr "secrets/plain/global-agent-grafana", "\n")[0], },
],
},
},
}

View File

@ -1,5 +0,0 @@
local cluster = import "lib/cluster.libsonnet";
{
Cluster: cluster.Cluster,
}

View File

@ -2,8 +2,10 @@ local kube = import "../../../kube/kube.libsonnet";
{
// Cluster sets up all cluster-specific monitoring resources in their own namespace.
//
// Currently this consists of a prometheus server that scrapes k8s nodes for kubelet
// and cAdvisor metrics.
// and cAdvisor metrics, and possibly ships over metrics to the global tier via set
// upstreams.
Cluster(name):: {
local cluster = self,
local cfg = cluster.cfg,
@ -18,6 +20,17 @@ local kube = import "../../../kube/kube.libsonnet";
storageClasses: {
prometheus: error "storageClasses.prometheus must be set",
},
// Username used to authenticate to upstreams.
username: error "username must be set",
// Global tier upstreams that this cluster should ship metrics off to.
// List of
// {
// remote: URL of upstream
// password: password used to authenticate, in conjunction with cfg.username.
//
upstreams: [],
},
namespace: kube.Namespace(cfg.namespace),
@ -105,6 +118,17 @@ local kube = import "../../../kube/kube.libsonnet";
],
},
],
remote_write: [
{
url: u.remote,
basic_auth: {
username: cluster.cfg.username,
password: u.password,
},
}
for u in cluster.cfg.upstreams
],
},
configmap: kube.ConfigMap("prometheus-cluster") {
@ -152,9 +176,7 @@ local kube = import "../../../kube/kube.libsonnet";
"/bin/prometheus",
"--config.file=/etc/prometheus/prometheus.yml",
"--storage.tsdb.path=/prometheus",
# TODO(q3k): reduce this once we have a long-term storage
# solution.
"--storage.tsdb.retention.time=120d",
"--storage.tsdb.retention.size=10GB",
"--web.console.libraries=/usr/share/prometheus/console_libraries",
"--web.console.templates=/usr/share/prometheus/consoles",
"--web.enable-lifecycle",
@ -198,7 +220,7 @@ local kube = import "../../../kube/kube.libsonnet";
accessModes: ["ReadWriteOnce"],
resources: {
requests: {
storage: "32Gi",
storage: "16Gi",
},
},
},

View File

@ -0,0 +1,149 @@
local kube = import "../../../kube/kube.libsonnet";
{
// Global sets up a global tier instance of the hscloud monitoring infrastructure.
//
// This currently consists of Victoria Metrics, to which the agent tier sends metrics data via
// the prometheus remote_write protocol.
// Victoria Metrics is here used as a long-term storage solution. However, right now, it
// just keeps data locally on disk. In the future, S3 snapshots/backups should be introduced.
Global(name):: {
local global = self,
local cfg = global.cfg,
cfg:: {
name: name,
namespace: "monitoring-global-%s" % [cfg.name],
images: {
victoria: "victoriametrics/victoria-metrics:v1.40.0",
vmauth: "victoriametrics/vmauth:v1.40.0",
},
hosts: {
// DNS hostname that this global tier will use. Ingress will run under it.
globalAPI: error "hosts.globalAPI must be set",
},
storageClasses: {
// Storage class used for main data retention.
victoria: error "storageClasses.victoria must be set",
},
// A list of agents that will push metrics to this instance.
// List of:
// {
// username: the username that the agent will authenticate with
// password: the password that the agent will authenticate with
// }
agents: [],
},
// Generated URLs that agents should use to ship metrics over. Both require HTTP basic
// auth, configured via cfg.agents.
// The internal URL should be used for agents colocated in the same Kubernetes cluster.
internalIngestURL:: "http://%s/api/v1/write" % [global.victoria.serviceAPI.host_colon_port],
// The glboal URL should be used for agents sending data over the internet.
globalIngestURL:: "https://%s/api/v1/write" % [cfg.hosts.globalAPI],
namespace: kube.Namespace(cfg.namespace),
local ns = global.namespace,
victoria: {
local victoria = self,
pvc: ns.Contain(kube.PersistentVolumeClaim("victoria-data")) {
spec+: {
storageClassName: cfg.storageClasses.victoria,
accessModes: ["ReadWriteOnce"],
resources: {
requests: {
storage: "64Gi",
},
},
},
},
authSecret: ns.Contain(kube.Secret("vmauth")) {
data+: {
"config.yaml": std.base64(std.manifestJson({
users: [
{
username: a.username,
password: a.password,
url_prefix: "http://localhost:8428",
}
for a in cfg.agents
],
}) + "\n")
},
},
deploy: ns.Contain(kube.Deployment("victoria")) {
spec+: {
template+: {
spec+: {
containers_: {
default: kube.Container("default") {
image: cfg.images.victoria,
volumeMounts_: {
data: { mountPath: "/victoria-metrics-data", },
},
},
vmauth: kube.Container("vmauth") {
image: cfg.images.vmauth,
command: [
"/vmauth-prod",
"-auth.config", "/mnt/secret/config.yaml",
],
volumeMounts_: {
secret: { mountPath: "/mnt/secret", },
},
ports_: {
api: { containerPort: 8427 }
},
}
},
volumes_: {
data: kube.PersistentVolumeClaimVolume(victoria.pvc),
secret: kube.SecretVolume(victoria.authSecret),
},
},
},
},
},
serviceAPI: ns.Contain(kube.Service("victoria-api")) {
target_pod: victoria.deploy.spec.template,
spec+: {
ports: [
{ name: "api", port: 8427, targetPort: 8427, protocol: "TCP" },
],
type: "ClusterIP",
},
},
ingressAPI: ns.Contain(kube.Ingress("victoria-api")) {
metadata+: {
annotations+: {
"kubernetes.io/tls-acme": "true",
"certmanager.k8s.io/cluster-issuer": "letsencrypt-prod",
},
},
spec+: {
tls: [
{ hosts: [cfg.hosts.globalAPI], secretName: "ingress-tls" },
],
rules: [
{
host: cfg.hosts.globalAPI,
http: {
paths: [ { path: "/", backend: { serviceName: victoria.serviceAPI.metadata.name, servicePort: 8427 } }, ],
},
}
],
},
},
},
}
}

View File

@ -0,0 +1,40 @@
-----BEGIN PGP MESSAGE-----
hQEMAzhuiT4RC8VbAQgAiZLuysTzxY8VM1wOAC7Hb0/3dHh0/5cFG1nOC6svnVt4
NLZG0K+9uSuku76N/TZak1lk0pieeW9PE+FBDAAjUhGKS1/0qvZmG2Y5T3qs7pYf
0Zv68hKix88bEfK7yfF/t68cYB1F2ms/4Y5tCBuW3av8MI7XQifWdnwgokxbE6xY
yhGpII6zZemfA+kuMo4BRsyy2Z1xsKo7Ah64hQQUQFXwzr+i4hzpp2AeVlWAcFNj
IlHPxA02ZcBCtjz2DLShFN2s8WBenboM88eUfeKpRAbMMfGcycmpIt7uf6pZ1UJa
viTnfV1juqyXaMLECOBNYBhlMagjRIZ0CbM/5mn3Q4UBDANcG2tp6fXqvgEIAK2M
pbeD3JpNE6pRvQsAHuKObQ+Bm82CxZg2uS9OPwNm6l7ESROpCnTRU8ahHIJO2f1d
IMXzLO4M6QMb5FpAl2ixsT/SeZ9Z8NSxcl1ndByTRPQ3wSNfCV8wW7tXIWHzv1El
pjBRowEbitwuwFgfgk86lYdYLKRPefAPr4fFNQV6aGLSWdVMo6vdR/C78xDivduy
A79Fu64+nsKgOrKHkcxn4YyhFDTOt7avpCX3xAFDWoN7w3W5iQ/EQk+6SVnfsqjo
IqTcxcS1o1TxpEjyoBPgpAERFEJEjIE2Dpo8E5UjJDLHMtSJMMqrAW7nLZJimI+c
DSY83h5VtCzAnvjIXYeFAgwDodoT8VqRl4UBD/4ujIoPDkIZ/dLGbiwtlwZz4giY
2LbfxHq2nkwy3+V6fbAxp+GyK4lB4XiZia2lMeWk5UECK8z9fpAhGvrFaSwjXkvn
ig8LY4WFW8uxjtilJXxPBIqkl9EMyEZJaBFQ3d9icE2ZgPV7peXtZBYgZVD4fY7E
Pxi2CTRrILr628Vqpbo0GdB88NdDd24wzkec6rVVV8WktWbyNXvzzJE6BbkcdBj6
DyfG55SKSQAjYfC3b8LYtcCZDXiidFMflDXraVuaoOWHKuAb8Mwhrz0TwdTVSH0G
xcnANQjBXf+TNfm8nrfLLSnmyili4qOgEuRQKeSJR+aiR3kDQhV0exd/jAJlcTcF
+QuLWEgpj95W+TD9s/EaVjRBIWs2TFvsUYlU+HZap0GzFiBQQubpK/EgJsh6Xz1/
3mjDxG5vv/Wdti0ko3oul9koS24eNK07lwM6g/GwtLhT48h/Db/M/9/Vadx+T6KW
fEKTdYyn438hOVKqrKwIRLp98e++VbLIg7pQsH0YOFK0QMFk6N7IbYRSEBnuDD+F
W3VJ4wiP0dwM2LttHwFTapReaDkORYv70rvb3mplp4kCLF7AvUJJQaU1cncuLdwd
Sj3iDu8s0lf3lM0Y+SlB1xDkzHrOGmcC0I4JVH+padBDKCpAn/8IbFmlaiV1Xjtd
KTwB6+NcMtmqX+ObNYUCDAPiA8lOXOuz7wEP/A9JFmiFZ36mPnfjbA7OHz6U3zOT
71iHwJDJXXG4g83WKtOLTYaNAizjXVz7wInWbwigTK2uD38lzOfArbU7UaAP38yS
89xNff5YOQASO71AJutoulUbA43TFF0gVqtcqsYJO7Cx+DSrBwhHXtFsppOVeMsH
9CmxVskPnwyZGG1yAJJ+EnD+y+SGmUh97EyWH+UKNZ4fiXVnwNt1ffY6vFYz61d8
AferfbvKy/9UN2gxn+QDVjDdyf5Qk14t6ljdBnO/RVbZNpU7pIRepttUFlCr87a+
SZR+WGPOaedzq/8nu3rABEqOxyLd0W0c9eOFwAtLxszjQ3yakhDseH2xlpeqa5g1
YzPFL79ywFDLjdfnPju8OBROEtyZD2mNrCqR846xIjPERjMhDYSS7DUI73iC7hrW
5hzfNj7ky1l0mYg3lfIdbtDrQO/HjnsYL3JA8WTcNHVEx3EpgEpQCz7g2TZAfpNs
E08pkn8hLuNg+PH1RvTFLTVflclfZsnTPu7np7TTE8O1OaA8qUG/P1nAXX+wX93a
a2uHQ39I3VZqPA7eRK+Gp6lDSBP1pbZbKz5tV/9glVPXKXY+bDamlWE7kgXbGOsY
zILK+jN9GFad4/gg9b7Upw5EdphnyAoob2SbrKbFN5ALyfFbgG+wFhFdb2oS7tCk
eLx5zEc+aQReFEQ30nMBLEWI+DfbN4nby1Ccp3bcOQvnSr9a0XJYSgFtcVve9aWw
/tYpNn0+fo1M1J+93UPjfjL9/ApDH3dDaS6L6WR8jn2EHALHUbwNuShBnDcCAlQe
/ZVaR2LGJprPHURChG2pognfRZhp+YK06diTwUHtyHir
=e/F1
-----END PGP MESSAGE-----

View File

@ -0,0 +1,40 @@
-----BEGIN PGP MESSAGE-----
hQEMAzhuiT4RC8VbAQf+NMwFZv9tVcUOo13hi7r2Z5V294dseTFk+q3nE//ZmWx6
6LL70Ggdc3etozf9w6uriQG0wbrfy7XwOYkpFJYaJb2gut0xxG/Fw221ZGR8elpe
79FzveD0FUZK+UdixXMiqYQOiwUK5+RbjhKN+R3WjG5mrClHDeCG5WrXFPvT9wOX
dA3ED/ZczrNxvSbKeE1imFoCeudrC9/zo/CRmb0BHrIoOEe+vCe/MzN0s/fkiq1k
RyZZxJ0M6/oudlcexyaYJcTdBTW1ZMNmmZ9lWsBjmf5kTKGu1tDUuMU9RBJmtyYn
8euaTwwCOfZjz6vdKoGer07ftEyfbjDGuU6zOtN9Z4UBDANcG2tp6fXqvgEH/i++
MbBnFbCOajtIN2xN8P6WiP6RjPmUKaKLJCltZHqPPYuULFWTa8uBIbfqVjtgFfoE
43eBgP+D1EQooq6O7NqWcoCY7LphwKx///oWsmeuiRy+wwQOGMV45tF31n944P1U
qZGhik8n7pxLkNaC5ohaucQJeaDSi7GuMATzBWdGY6lZkaNLUfrPmKXu9tyIaA4u
b80gPvFWc/9PgnS9rAcVPA7/8Il53EVJJsYK2/S7nDFKRTJfThId4cvvAFUXkuwW
hM6FEFcJdSW90qbhBGCwr3yfvSb3Je0k7gYrg9iQTLxRpIbG3Mbz9irEKno3alGe
8FvuAdmVn1AVyxNomT+FAgwDodoT8VqRl4UBD/0bGb+8AF3mTmEMTHAPPLUIxhu+
ihb4Po6OxQ7u/UCMSHXhFQCqb4ytK2JsG2UhcIiYrQrZMVGQh3rGcfZESHpX1/Ol
rTwkjUZSSnek8M1hbEkS7PU2rePsXt/O07+zenqMO3pMeVsX+VLEGXRS6KZ9WXsc
X09iLyqBErgntaM7otMSZVSPV7VFEaIoVdPe4NZxudDMedeA0hr1BneaVUNVjMtQ
UE6ZVxzFSoqMnfsJY9/dn/uhHdv7qOhzw0ABINmDybI6IWNEaGzzSJC2HcHZjocY
h2Se4mzxjOz9X4CG28h8b9jFHRtSe4OiSAQYxwtZNxGR6kCt1PfP20oemPBg+LXF
T6+ledT5nkkaoztl5EOxKoh20BfbNOR2AWbPYuRLJ7OKF/dFDJ3PndPwSjEvPZDY
xpvHszqVlcMpleqM/iQILD33Nzz9RhtSZHQiGYuZsak4aeUWsz/3a6JhtcliK4Do
CyG3J1wVf0a+jsXlDr0M50qf+aY7k76zTqtfXcPSKypMeP0yZaYMPCnzHvFpj39u
u3NGOEiwv1WXMrUn59SuL9X6aP5s4D455E5JDuOFKrndN78CKqsCTPkNGOCU5F+f
B25lXY0yF22RLiHbWiAGcM+u4roi7qA8HWYly6lqOl3imk3D3NJP+ENGyoCxgfcQ
GIrl8z5fyE5GtJUgAoUCDAPiA8lOXOuz7wEP/3ZmA1njB/F1nu/vafx90O0A0Mmr
J7EvveK89W2P5JsZjEX/sVSurx1kY4U1Lofe00jdbsQNtfQ33/K2zr0Vb5G3VQxL
QnsksGO9MjywwVzpspuS+gQE2P6VU5YjpHXA15SJXkJV/SDvxoPSyPt5x3m0nU+9
aj43mTgKdvTwSeDoEHzt54KRY01HOugpmmY/TZ5Wkeam2vNsCaSEYAdDTaSEG6j2
OfxQf8X/A+7RDwyoVpjDg4LVAyHmcomWBeudEH4GkR+oGC7YQ39QEb4TA9h0ufUO
2N1XWIf+FraCFX5x9IeKoe9ZyInz8I24lRMRHHu1RGrluGHCjflVzuIfOnasy2yU
CX9EhvoL2IxfCLdY2XoBcrpCcTr+FKu4/5n4P3WN6TSWnQlvCypgfJ7zkjGWytQW
cChiouYvYLcW2q1opZgIu3J1i7QnS6qHzzhK3eejBF0DZinxs+q7cylVVAzrq5FF
p6+v6OXlZRbsIoBJg0kKKfUwqzoTwEFPvPMvoIxCmwSfQmlnemeXrXdahqbZXgIO
a4jMrlene6Asr7xVZ+9siv5plPogmvWco/950KmKlXOUEg439nADHzhTYPmai5YA
i9inb9B1sAcpbYQejUwnIx+W11qsyE2PAXHdj/mvVm1fzO/VJ+yGHtnKwfGtHS8x
v7vq6yCM2p4HPQLC0m4BFIvHf29iKhYDxjj3F0d//VEez/43+79bDDakmS5StD6P
DykfoaYRBijgUfhG4a4UbMBbpIuukwBI0EQoy+3Sca6Rx1Da5lIPXuMbb/N/c4rp
xL1OhySubTvzg2yTCfUoEuMWZFL1rwgT9lrKVg==
=Do8R
-----END PGP MESSAGE-----

View File

@ -0,0 +1 @@
*