forked from hswaw/hscloud
This now allows to run apt and should allow to run most upstream docker images. In return, we prohibit some mildly sketchy stuff. But this is safe enough for project namespaces with limited administrative access. We should still get gvisor sooner than later... Change-Id: Ida5ccfae440bacb6f3fd55dcc34ca0addfddd5ae
200 lines
7.3 KiB
Jsonnet
200 lines
7.3 KiB
Jsonnet
local kube = import "kube.libsonnet";
|
|
|
|
{
|
|
local policies = self,
|
|
|
|
policyNameAllowInsecure: "policy:allow-insecure",
|
|
policyNameAllowSecure: "policy:allow-secure",
|
|
policyNameAllowMostlySecure: "policy:allow-mostlysecure",
|
|
|
|
# egrep 'define CAP_[A-Z_]+.+[0-9]+$' include/linux/capability.h | cut -d' ' -f 2 | tr '\n' ','
|
|
local allCapsStr = 'CAP_CHOWN,CAP_DAC_OVERRIDE,CAP_DAC_READ_SEARCH,CAP_FOWNER,CAP_FSETID,CAP_KILL,CAP_SETGID,CAP_SETUID,CAP_SETPCAP,CAP_LINUX_IMMUTABLE,CAP_NET_BIND_SERVICE,CAP_NET_BROADCAST,CAP_NET_ADMIN,CAP_NET_RAW,CAP_IPC_LOCK,CAP_IPC_OWNER,CAP_SYS_MODULE,CAP_SYS_RAWIO,CAP_SYS_CHROOT,CAP_SYS_PTRACE,CAP_SYS_PACCT,CAP_SYS_ADMIN,CAP_SYS_BOOT,CAP_SYS_NICE,CAP_SYS_RESOURCE,CAP_SYS_TIME,CAP_SYS_TTY_CONFIG,CAP_MKNOD,CAP_LEASE,CAP_AUDIT_WRITE,CAP_AUDIT_CONTROL,CAP_SETFCAP,CAP_MAC_OVERRIDE,CAP_MAC_ADMIN,CAP_SYSLOG,CAP_WAKE_ALARM,CAP_BLOCK_SUSPEND,CAP_AUDIT_READ',
|
|
// Split by `,`, remove CAP_ prefix, turn into unique set.
|
|
local allCaps = std.set(std.map(function(el) std.substr(el, 4, std.length(el)-4), std.split(allCapsStr, ','))),
|
|
|
|
|
|
Cluster: {
|
|
local cluster = self,
|
|
|
|
// Insecure: allowing creation of these pods allows you to pwn the entire cluster.
|
|
insecure: kube._Object("policy/v1beta1", "PodSecurityPolicy", "insecure") {
|
|
spec: {
|
|
privileged: true,
|
|
allowPrivilegeEscalation: true,
|
|
allowedCapabilities: ['*'],
|
|
volumes: ['*'],
|
|
hostNetwork: true,
|
|
hostPorts: [
|
|
{ max: 40000, min: 1 },
|
|
],
|
|
hostIPC: true,
|
|
hostPID: true,
|
|
runAsUser: {
|
|
rule: 'RunAsAny',
|
|
},
|
|
seLinux: {
|
|
rule: 'RunAsAny',
|
|
},
|
|
supplementalGroups: {
|
|
rule: 'RunAsAny',
|
|
},
|
|
fsGroup: {
|
|
rule: 'RunAsAny',
|
|
},
|
|
},
|
|
},
|
|
insecureRole: kube.ClusterRole(policies.policyNameAllowInsecure) {
|
|
rules: [
|
|
{
|
|
apiGroups: ['policy'],
|
|
resources: ['podsecuritypolicies'],
|
|
verbs: ['use'],
|
|
resourceNames: ['insecure'],
|
|
}
|
|
],
|
|
},
|
|
|
|
// Secure: very limited subset of security policy, everyone is allowed
|
|
// to spawn containers of this kind.
|
|
secure: kube._Object("policy/v1beta1", "PodSecurityPolicy", "secure") {
|
|
spec: {
|
|
privileged: false,
|
|
# Required to prevent escalations to root.
|
|
allowPrivilegeEscalation: false,
|
|
# This is redundant with non-root + disallow privilege escalation,
|
|
# but we can provide it for defense in depth.
|
|
requiredDropCapabilities: ["ALL"],
|
|
# Allow core volume types.
|
|
volumes: [
|
|
'configMap',
|
|
'emptyDir',
|
|
'projected',
|
|
'secret',
|
|
'downwardAPI',
|
|
'persistentVolumeClaim',
|
|
],
|
|
hostNetwork: false,
|
|
hostIPC: false,
|
|
hostPID: false,
|
|
runAsUser: {
|
|
# Allow to run as root - docker, we trust you here.
|
|
rule: 'RunAsAny',
|
|
},
|
|
seLinux: {
|
|
rule: 'RunAsAny',
|
|
},
|
|
supplementalGroups: {
|
|
rule: 'MustRunAs',
|
|
ranges: [
|
|
{
|
|
# Forbid adding the root group.
|
|
min: 1,
|
|
max: 65535,
|
|
}
|
|
],
|
|
},
|
|
fsGroup: {
|
|
rule: 'MustRunAs',
|
|
ranges: [
|
|
{
|
|
# Forbid adding the root group.
|
|
min: 1,
|
|
max: 65535,
|
|
}
|
|
],
|
|
},
|
|
readOnlyRootFilesystem: false,
|
|
|
|
},
|
|
},
|
|
secureRole: kube.ClusterRole(policies.policyNameAllowSecure) {
|
|
rules: [
|
|
{
|
|
apiGroups: ['policy'],
|
|
resources: ['podsecuritypolicies'],
|
|
verbs: ['use'],
|
|
resourceNames: ['secure'],
|
|
},
|
|
],
|
|
},
|
|
|
|
// MostlySecure: like secure, but allows for setuid inside containers
|
|
// and enough filesystem access to run apt.
|
|
mostlySecure: cluster.secure {
|
|
metadata+: {
|
|
name: "mostlysecure",
|
|
},
|
|
spec+: {
|
|
requiredDropCapabilities: std.setDiff(allCaps, [
|
|
// Drop everything apart from:
|
|
"CHOWN",
|
|
"DAC_OVERRIDE",
|
|
"FOWNER",
|
|
"LEASE",
|
|
"SETGID",
|
|
"SETUID",
|
|
]),
|
|
supplementalGroups: {
|
|
// Allow running as root gid - we allow running as root
|
|
// uid anyway, as we trust our container runtime.
|
|
rule: 'MustRunAs',
|
|
ranges: [
|
|
{ min: 0, max: 65535, },
|
|
],
|
|
},
|
|
fsGroup: {
|
|
// Allow setting the fsGroup to 0, as all filesystem mounts
|
|
// are trusted anyway.
|
|
rule: 'MustRunAs',
|
|
ranges: [
|
|
{ min: 0, max: 65535, },
|
|
],
|
|
},
|
|
},
|
|
},
|
|
mostlySecureRole: kube.ClusterRole(policies.policyNameAllowMostlySecure) {
|
|
rules: [
|
|
{
|
|
apiGroups: ['policy'],
|
|
resources: ['podsecuritypolicies'],
|
|
verbs: ['use'],
|
|
resourceNames: ['mostlysecure'],
|
|
},
|
|
],
|
|
},
|
|
},
|
|
|
|
# Allow insecure access to all service accounts in a given namespace.
|
|
AllowNamespaceInsecure(namespace): {
|
|
rb: kube.RoleBinding("policy:allow-insecure-in-" + namespace) {
|
|
metadata+: {
|
|
namespace: namespace,
|
|
},
|
|
roleRef_: policies.Cluster.insecureRole,
|
|
subjects: [
|
|
{
|
|
kind: "Group",
|
|
apiGroup: "rbac.authorization.k8s.io",
|
|
name: "system:serviceaccounts",
|
|
}
|
|
],
|
|
},
|
|
},
|
|
|
|
# Allow mostlysecure access to all service accounts in a given namespace.
|
|
AllowNamespaceMostlySecure(namespace): {
|
|
rb: kube.RoleBinding("policy:allow-mostlysecure-in-" + namespace) {
|
|
metadata+: {
|
|
namespace: namespace,
|
|
},
|
|
roleRef_: policies.Cluster.mostlySecureRole,
|
|
subjects: [
|
|
{
|
|
kind: "Group",
|
|
apiGroup: "rbac.authorization.k8s.io",
|
|
name: "system:serviceaccounts",
|
|
}
|
|
],
|
|
},
|
|
},
|
|
}
|