From 35d437883b6a02fd73c0e5258d40bcf065ce3065 Mon Sep 17 00:00:00 2001 From: Sergiusz Bazanski Date: Wed, 24 Jun 2020 22:18:13 +0200 Subject: [PATCH] kube/policies: implement mostlysecure This now allows to run apt and should allow to run most upstream docker images. In return, we prohibit some mildly sketchy stuff. But this is safe enough for project namespaces with limited administrative access. We should still get gvisor sooner than later... Change-Id: Ida5ccfae440bacb6f3fd55dcc34ca0addfddd5ae --- kube/policies.libsonnet | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/kube/policies.libsonnet b/kube/policies.libsonnet index 242c00c4..0f8d3d85 100644 --- a/kube/policies.libsonnet +++ b/kube/policies.libsonnet @@ -7,6 +7,12 @@ local kube = import "kube.libsonnet"; policyNameAllowSecure: "policy:allow-secure", policyNameAllowMostlySecure: "policy:allow-mostlysecure", + # egrep 'define CAP_[A-Z_]+.+[0-9]+$' include/linux/capability.h | cut -d' ' -f 2 | tr '\n' ',' + local allCapsStr = 'CAP_CHOWN,CAP_DAC_OVERRIDE,CAP_DAC_READ_SEARCH,CAP_FOWNER,CAP_FSETID,CAP_KILL,CAP_SETGID,CAP_SETUID,CAP_SETPCAP,CAP_LINUX_IMMUTABLE,CAP_NET_BIND_SERVICE,CAP_NET_BROADCAST,CAP_NET_ADMIN,CAP_NET_RAW,CAP_IPC_LOCK,CAP_IPC_OWNER,CAP_SYS_MODULE,CAP_SYS_RAWIO,CAP_SYS_CHROOT,CAP_SYS_PTRACE,CAP_SYS_PACCT,CAP_SYS_ADMIN,CAP_SYS_BOOT,CAP_SYS_NICE,CAP_SYS_RESOURCE,CAP_SYS_TIME,CAP_SYS_TTY_CONFIG,CAP_MKNOD,CAP_LEASE,CAP_AUDIT_WRITE,CAP_AUDIT_CONTROL,CAP_SETFCAP,CAP_MAC_OVERRIDE,CAP_MAC_ADMIN,CAP_SYSLOG,CAP_WAKE_ALARM,CAP_BLOCK_SUSPEND,CAP_AUDIT_READ', + // Split by `,`, remove CAP_ prefix, turn into unique set. + local allCaps = std.set(std.map(function(el) std.substr(el, 4, std.length(el)-4), std.split(allCapsStr, ','))), + + Cluster: { local cluster = self, @@ -98,6 +104,7 @@ local kube = import "kube.libsonnet"; ], }, readOnlyRootFilesystem: false, + }, }, secureRole: kube.ClusterRole(policies.policyNameAllowSecure) { @@ -111,13 +118,38 @@ local kube = import "kube.libsonnet"; ], }, - // MostlySecure: like secure, but allows for setuid inside containers. + // MostlySecure: like secure, but allows for setuid inside containers + // and enough filesystem access to run apt. mostlySecure: cluster.secure { metadata+: { name: "mostlysecure", }, spec+: { - allowPrivilegeEscalation: true, + requiredDropCapabilities: std.setDiff(allCaps, [ + // Drop everything apart from: + "CHOWN", + "DAC_OVERRIDE", + "FOWNER", + "LEASE", + "SETGID", + "SETUID", + ]), + supplementalGroups: { + // Allow running as root gid - we allow running as root + // uid anyway, as we trust our container runtime. + rule: 'MustRunAs', + ranges: [ + { min: 0, max: 65535, }, + ], + }, + fsGroup: { + // Allow setting the fsGroup to 0, as all filesystem mounts + // are trusted anyway. + rule: 'MustRunAs', + ranges: [ + { min: 0, max: 65535, }, + ], + }, }, }, mostlySecureRole: kube.ClusterRole(policies.policyNameAllowMostlySecure) {