linux/arch/x86/ia32/sys_ia32.c
Linus Torvalds 644473e9c6 Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace
Pull user namespace enhancements from Eric Biederman:
 "This is a course correction for the user namespace, so that we can
  reach an inexpensive, maintainable, and reasonably complete
  implementation.

  Highlights:
   - Config guards make it impossible to enable the user namespace and
     code that has not been converted to be user namespace safe.

   - Use of the new kuid_t type ensures the if you somehow get past the
     config guards the kernel will encounter type errors if you enable
     user namespaces and attempt to compile in code whose permission
     checks have not been updated to be user namespace safe.

   - All uids from child user namespaces are mapped into the initial
     user namespace before they are processed.  Removing the need to add
     an additional check to see if the user namespace of the compared
     uids remains the same.

   - With the user namespaces compiled out the performance is as good or
     better than it is today.

   - For most operations absolutely nothing changes performance or
     operationally with the user namespace enabled.

   - The worst case performance I could come up with was timing 1
     billion cache cold stat operations with the user namespace code
     enabled.  This went from 156s to 164s on my laptop (or 156ns to
     164ns per stat operation).

   - (uid_t)-1 and (gid_t)-1 are reserved as an internal error value.
     Most uid/gid setting system calls treat these value specially
     anyway so attempting to use -1 as a uid would likely cause
     entertaining failures in userspace.

   - If setuid is called with a uid that can not be mapped setuid fails.
     I have looked at sendmail, login, ssh and every other program I
     could think of that would call setuid and they all check for and
     handle the case where setuid fails.

   - If stat or a similar system call is called from a context in which
     we can not map a uid we lie and return overflowuid.  The LFS
     experience suggests not lying and returning an error code might be
     better, but the historical precedent with uids is different and I
     can not think of anything that would break by lying about a uid we
     can't map.

   - Capabilities are localized to the current user namespace making it
     safe to give the initial user in a user namespace all capabilities.

  My git tree covers all of the modifications needed to convert the core
  kernel and enough changes to make a system bootable to runlevel 1."

Fix up trivial conflicts due to nearby independent changes in fs/stat.c

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace: (46 commits)
  userns:  Silence silly gcc warning.
  cred: use correct cred accessor with regards to rcu read lock
  userns: Convert the move_pages, and migrate_pages permission checks to use uid_eq
  userns: Convert cgroup permission checks to use uid_eq
  userns: Convert tmpfs to use kuid and kgid where appropriate
  userns: Convert sysfs to use kgid/kuid where appropriate
  userns: Convert sysctl permission checks to use kuid and kgids.
  userns: Convert proc to use kuid/kgid where appropriate
  userns: Convert ext4 to user kuid/kgid where appropriate
  userns: Convert ext3 to use kuid/kgid where appropriate
  userns: Convert ext2 to use kuid/kgid where appropriate.
  userns: Convert devpts to use kuid/kgid where appropriate
  userns: Convert binary formats to use kuid/kgid where appropriate
  userns: Add negative depends on entries to avoid building code that is userns unsafe
  userns: signal remove unnecessary map_cred_ns
  userns: Teach inode_capable to understand inodes whose uids map to other namespaces.
  userns: Fail exec for suid and sgid binaries with ids outside our user namespace.
  userns: Convert stat to return values mapped from kuids and kgids
  userns: Convert user specfied uids and gids in chown into kuids and kgid
  userns: Use uid_eq gid_eq helpers when comparing kuids and kgids in the vfs
  ...
2012-05-23 17:42:39 -07:00

493 lines
13 KiB
C

/*
* sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Based on
* sys_sparc32
*
* Copyright (C) 2000 VA Linux Co
* Copyright (C) 2000 Don Dugger <n0ano@valinux.com>
* Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
* Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
* Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
* Copyright (C) 2000 Hewlett-Packard Co.
* Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
* Copyright (C) 2000,2001,2002 Andi Kleen, SuSE Labs (x86-64 port)
*
* These routines maintain argument size conversion between 32bit and 64bit
* environment. In 2.5 most of this should be moved to a generic directory.
*
* This file assumes that there is a hole at the end of user address space.
*
* Some of the functions are LE specific currently. These are
* hopefully all marked. This should be fixed.
*/
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/signal.h>
#include <linux/syscalls.h>
#include <linux/times.h>
#include <linux/utsname.h>
#include <linux/mm.h>
#include <linux/uio.h>
#include <linux/poll.h>
#include <linux/personality.h>
#include <linux/stat.h>
#include <linux/rwsem.h>
#include <linux/compat.h>
#include <linux/vfs.h>
#include <linux/ptrace.h>
#include <linux/highuid.h>
#include <linux/sysctl.h>
#include <linux/slab.h>
#include <asm/mman.h>
#include <asm/types.h>
#include <asm/uaccess.h>
#include <linux/atomic.h>
#include <asm/vgtod.h>
#include <asm/sys_ia32.h>
#define AA(__x) ((unsigned long)(__x))
asmlinkage long sys32_truncate64(const char __user *filename,
unsigned long offset_low,
unsigned long offset_high)
{
return sys_truncate(filename, ((loff_t) offset_high << 32) | offset_low);
}
asmlinkage long sys32_ftruncate64(unsigned int fd, unsigned long offset_low,
unsigned long offset_high)
{
return sys_ftruncate(fd, ((loff_t) offset_high << 32) | offset_low);
}
/*
* Another set for IA32/LFS -- x86_64 struct stat is different due to
* support for 64bit inode numbers.
*/
static int cp_stat64(struct stat64 __user *ubuf, struct kstat *stat)
{
typeof(ubuf->st_uid) uid = 0;
typeof(ubuf->st_gid) gid = 0;
SET_UID(uid, from_kuid_munged(current_user_ns(), stat->uid));
SET_GID(gid, from_kgid_munged(current_user_ns(), stat->gid));
if (!access_ok(VERIFY_WRITE, ubuf, sizeof(struct stat64)) ||
__put_user(huge_encode_dev(stat->dev), &ubuf->st_dev) ||
__put_user(stat->ino, &ubuf->__st_ino) ||
__put_user(stat->ino, &ubuf->st_ino) ||
__put_user(stat->mode, &ubuf->st_mode) ||
__put_user(stat->nlink, &ubuf->st_nlink) ||
__put_user(uid, &ubuf->st_uid) ||
__put_user(gid, &ubuf->st_gid) ||
__put_user(huge_encode_dev(stat->rdev), &ubuf->st_rdev) ||
__put_user(stat->size, &ubuf->st_size) ||
__put_user(stat->atime.tv_sec, &ubuf->st_atime) ||
__put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec) ||
__put_user(stat->mtime.tv_sec, &ubuf->st_mtime) ||
__put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec) ||
__put_user(stat->ctime.tv_sec, &ubuf->st_ctime) ||
__put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec) ||
__put_user(stat->blksize, &ubuf->st_blksize) ||
__put_user(stat->blocks, &ubuf->st_blocks))
return -EFAULT;
return 0;
}
asmlinkage long sys32_stat64(const char __user *filename,
struct stat64 __user *statbuf)
{
struct kstat stat;
int ret = vfs_stat(filename, &stat);
if (!ret)
ret = cp_stat64(statbuf, &stat);
return ret;
}
asmlinkage long sys32_lstat64(const char __user *filename,
struct stat64 __user *statbuf)
{
struct kstat stat;
int ret = vfs_lstat(filename, &stat);
if (!ret)
ret = cp_stat64(statbuf, &stat);
return ret;
}
asmlinkage long sys32_fstat64(unsigned int fd, struct stat64 __user *statbuf)
{
struct kstat stat;
int ret = vfs_fstat(fd, &stat);
if (!ret)
ret = cp_stat64(statbuf, &stat);
return ret;
}
asmlinkage long sys32_fstatat(unsigned int dfd, const char __user *filename,
struct stat64 __user *statbuf, int flag)
{
struct kstat stat;
int error;
error = vfs_fstatat(dfd, filename, &stat, flag);
if (error)
return error;
return cp_stat64(statbuf, &stat);
}
/*
* Linux/i386 didn't use to be able to handle more than
* 4 system call parameters, so these system calls used a memory
* block for parameter passing..
*/
struct mmap_arg_struct32 {
unsigned int addr;
unsigned int len;
unsigned int prot;
unsigned int flags;
unsigned int fd;
unsigned int offset;
};
asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *arg)
{
struct mmap_arg_struct32 a;
if (copy_from_user(&a, arg, sizeof(a)))
return -EFAULT;
if (a.offset & ~PAGE_MASK)
return -EINVAL;
return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
a.offset>>PAGE_SHIFT);
}
asmlinkage long sys32_mprotect(unsigned long start, size_t len,
unsigned long prot)
{
return sys_mprotect(start, len, prot);
}
asmlinkage long sys32_rt_sigaction(int sig, struct sigaction32 __user *act,
struct sigaction32 __user *oact,
unsigned int sigsetsize)
{
struct k_sigaction new_ka, old_ka;
int ret;
compat_sigset_t set32;
/* XXX: Don't preclude handling different sized sigset_t's. */
if (sigsetsize != sizeof(compat_sigset_t))
return -EINVAL;
if (act) {
compat_uptr_t handler, restorer;
if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
__get_user(handler, &act->sa_handler) ||
__get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
__get_user(restorer, &act->sa_restorer) ||
__copy_from_user(&set32, &act->sa_mask,
sizeof(compat_sigset_t)))
return -EFAULT;
new_ka.sa.sa_handler = compat_ptr(handler);
new_ka.sa.sa_restorer = compat_ptr(restorer);
/*
* FIXME: here we rely on _COMPAT_NSIG_WORS to be >=
* than _NSIG_WORDS << 1
*/
switch (_NSIG_WORDS) {
case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6]
| (((long)set32.sig[7]) << 32);
case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4]
| (((long)set32.sig[5]) << 32);
case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2]
| (((long)set32.sig[3]) << 32);
case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0]
| (((long)set32.sig[1]) << 32);
}
}
ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
if (!ret && oact) {
/*
* FIXME: here we rely on _COMPAT_NSIG_WORS to be >=
* than _NSIG_WORDS << 1
*/
switch (_NSIG_WORDS) {
case 4:
set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32);
set32.sig[6] = old_ka.sa.sa_mask.sig[3];
case 3:
set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32);
set32.sig[4] = old_ka.sa.sa_mask.sig[2];
case 2:
set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32);
set32.sig[2] = old_ka.sa.sa_mask.sig[1];
case 1:
set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32);
set32.sig[0] = old_ka.sa.sa_mask.sig[0];
}
if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
__put_user(ptr_to_compat(old_ka.sa.sa_handler),
&oact->sa_handler) ||
__put_user(ptr_to_compat(old_ka.sa.sa_restorer),
&oact->sa_restorer) ||
__put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
__copy_to_user(&oact->sa_mask, &set32,
sizeof(compat_sigset_t)))
return -EFAULT;
}
return ret;
}
asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act,
struct old_sigaction32 __user *oact)
{
struct k_sigaction new_ka, old_ka;
int ret;
if (act) {
compat_old_sigset_t mask;
compat_uptr_t handler, restorer;
if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
__get_user(handler, &act->sa_handler) ||
__get_user(new_ka.sa.sa_flags, &act->sa_flags) ||
__get_user(restorer, &act->sa_restorer) ||
__get_user(mask, &act->sa_mask))
return -EFAULT;
new_ka.sa.sa_handler = compat_ptr(handler);
new_ka.sa.sa_restorer = compat_ptr(restorer);
siginitset(&new_ka.sa.sa_mask, mask);
}
ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
if (!ret && oact) {
if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
__put_user(ptr_to_compat(old_ka.sa.sa_handler),
&oact->sa_handler) ||
__put_user(ptr_to_compat(old_ka.sa.sa_restorer),
&oact->sa_restorer) ||
__put_user(old_ka.sa.sa_flags, &oact->sa_flags) ||
__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask))
return -EFAULT;
}
return ret;
}
asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int *stat_addr,
int options)
{
return compat_sys_wait4(pid, stat_addr, options, NULL);
}
/* 32-bit timeval and related flotsam. */
asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid,
struct compat_timespec __user *interval)
{
struct timespec t;
int ret;
mm_segment_t old_fs = get_fs();
set_fs(KERNEL_DS);
ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t);
set_fs(old_fs);
if (put_compat_timespec(&t, interval))
return -EFAULT;
return ret;
}
asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set,
compat_size_t sigsetsize)
{
sigset_t s;
compat_sigset_t s32;
int ret;
mm_segment_t old_fs = get_fs();
set_fs(KERNEL_DS);
ret = sys_rt_sigpending((sigset_t __user *)&s, sigsetsize);
set_fs(old_fs);
if (!ret) {
switch (_NSIG_WORDS) {
case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3];
case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2];
case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1];
case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0];
}
if (copy_to_user(set, &s32, sizeof(compat_sigset_t)))
return -EFAULT;
}
return ret;
}
asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig,
compat_siginfo_t __user *uinfo)
{
siginfo_t info;
int ret;
mm_segment_t old_fs = get_fs();
if (copy_siginfo_from_user32(&info, uinfo))
return -EFAULT;
set_fs(KERNEL_DS);
ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *)&info);
set_fs(old_fs);
return ret;
}
/* warning: next two assume little endian */
asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count,
u32 poslo, u32 poshi)
{
return sys_pread64(fd, ubuf, count,
((loff_t)AA(poshi) << 32) | AA(poslo));
}
asmlinkage long sys32_pwrite(unsigned int fd, const char __user *ubuf,
u32 count, u32 poslo, u32 poshi)
{
return sys_pwrite64(fd, ubuf, count,
((loff_t)AA(poshi) << 32) | AA(poslo));
}
asmlinkage long sys32_sendfile(int out_fd, int in_fd,
compat_off_t __user *offset, s32 count)
{
mm_segment_t old_fs = get_fs();
int ret;
off_t of;
if (offset && get_user(of, offset))
return -EFAULT;
set_fs(KERNEL_DS);
ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *)&of : NULL,
count);
set_fs(old_fs);
if (offset && put_user(of, offset))
return -EFAULT;
return ret;
}
asmlinkage long sys32_execve(const char __user *name, compat_uptr_t __user *argv,
compat_uptr_t __user *envp, struct pt_regs *regs)
{
long error;
char *filename;
filename = getname(name);
error = PTR_ERR(filename);
if (IS_ERR(filename))
return error;
error = compat_do_execve(filename, argv, envp, regs);
putname(filename);
return error;
}
asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp,
struct pt_regs *regs)
{
void __user *parent_tid = (void __user *)regs->dx;
void __user *child_tid = (void __user *)regs->di;
if (!newsp)
newsp = regs->sp;
return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
}
/*
* Some system calls that need sign extended arguments. This could be
* done by a generic wrapper.
*/
long sys32_lseek(unsigned int fd, int offset, unsigned int whence)
{
return sys_lseek(fd, offset, whence);
}
long sys32_kill(int pid, int sig)
{
return sys_kill(pid, sig);
}
long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
__u32 len_low, __u32 len_high, int advice)
{
return sys_fadvise64_64(fd,
(((u64)offset_high)<<32) | offset_low,
(((u64)len_high)<<32) | len_low,
advice);
}
long sys32_vm86_warning(void)
{
struct task_struct *me = current;
static char lastcomm[sizeof(me->comm)];
if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
compat_printk(KERN_INFO
"%s: vm86 mode not supported on 64 bit kernel\n",
me->comm);
strncpy(lastcomm, me->comm, sizeof(lastcomm));
}
return -ENOSYS;
}
long sys32_lookup_dcookie(u32 addr_low, u32 addr_high,
char __user *buf, size_t len)
{
return sys_lookup_dcookie(((u64)addr_high << 32) | addr_low, buf, len);
}
asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi,
size_t count)
{
return sys_readahead(fd, ((u64)off_hi << 32) | off_lo, count);
}
asmlinkage long sys32_sync_file_range(int fd, unsigned off_low, unsigned off_hi,
unsigned n_low, unsigned n_hi, int flags)
{
return sys_sync_file_range(fd,
((u64)off_hi << 32) | off_low,
((u64)n_hi << 32) | n_low, flags);
}
asmlinkage long sys32_fadvise64(int fd, unsigned offset_lo, unsigned offset_hi,
size_t len, int advice)
{
return sys_fadvise64_64(fd, ((u64)offset_hi << 32) | offset_lo,
len, advice);
}
asmlinkage long sys32_fallocate(int fd, int mode, unsigned offset_lo,
unsigned offset_hi, unsigned len_lo,
unsigned len_hi)
{
return sys_fallocate(fd, mode, ((u64)offset_hi << 32) | offset_lo,
((u64)len_hi << 32) | len_lo);
}
asmlinkage long sys32_fanotify_mark(int fanotify_fd, unsigned int flags,
u32 mask_lo, u32 mask_hi,
int fd, const char __user *pathname)
{
return sys_fanotify_mark(fanotify_fd, flags,
((u64)mask_hi << 32) | mask_lo,
fd, pathname);
}