linux/fs/ramfs/file-nommu.c

268 lines
6.6 KiB
C
Raw Normal View History

/* file-nommu.c: no-MMU version of ramfs
*
* Copyright (C) 2005 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/backing-dev.h>
#include <linux/ramfs.h>
#include <linux/pagevec.h>
#include <linux/mman.h>
#include <linux/sched.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 08:04:11 +00:00
#include <linux/slab.h>
#include <asm/uaccess.h>
#include "internal.h"
static int ramfs_nommu_setattr(struct dentry *, struct iattr *);
const struct address_space_operations ramfs_aops = {
.readpage = simple_readpage,
.write_begin = simple_write_begin,
.write_end = simple_write_end,
.set_page_dirty = __set_page_dirty_no_writeback,
};
const struct file_operations ramfs_file_operations = {
.mmap = ramfs_nommu_mmap,
.get_unmapped_area = ramfs_nommu_get_unmapped_area,
.read = do_sync_read,
.aio_read = generic_file_aio_read,
.write = do_sync_write,
.aio_write = generic_file_aio_write,
.fsync = noop_fsync,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
.llseek = generic_file_llseek,
};
const struct inode_operations ramfs_file_inode_operations = {
.setattr = ramfs_nommu_setattr,
.getattr = simple_getattr,
};
/*****************************************************************************/
/*
* add a contiguous set of pages into a ramfs inode when it's truncated from
* size 0 on the assumption that it's going to be used for an mmap of shared
* memory
*/
int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
{
unsigned long npages, xpages, loop;
struct page *pages;
unsigned order;
void *data;
int ret;
/* make various checks */
order = get_order(newsize);
if (unlikely(order >= MAX_ORDER))
return -EFBIG;
ret = inode_newsize_ok(inode, newsize);
if (ret)
return ret;
i_size_write(inode, newsize);
/* allocate enough contiguous pages to be able to satisfy the
* request */
pages = alloc_pages(mapping_gfp_mask(inode->i_mapping), order);
if (!pages)
return -ENOMEM;
/* split the high-order page into an array of single pages */
xpages = 1UL << order;
npages = (newsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
split_page(pages, order);
/* trim off any pages we don't actually require */
for (loop = npages; loop < xpages; loop++)
__free_page(pages + loop);
/* clear the memory we allocated */
newsize = PAGE_SIZE * npages;
data = page_address(pages);
memset(data, 0, newsize);
/* attach all the pages to the inode's address space */
for (loop = 0; loop < npages; loop++) {
struct page *page = pages + loop;
ret = add_to_page_cache_lru(page, inode->i_mapping, loop,
GFP_KERNEL);
if (ret < 0)
goto add_error;
/* prevent the page from being discarded on memory pressure */
SetPageDirty(page);
SetPageUptodate(page);
unlock_page(page);
put_page(page);
}
return 0;
add_error:
while (loop < npages)
__free_page(pages + loop++);
return ret;
}
/*****************************************************************************/
/*
*
*/
static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
{
int ret;
/* assume a truncate from zero size is going to be for the purposes of
* shared mmap */
if (size == 0) {
if (unlikely(newsize >> 32))
return -EFBIG;
return ramfs_nommu_expand_for_mapping(inode, newsize);
}
/* check that a decrease in size doesn't cut off any shared mappings */
if (newsize < size) {
nommu: fix shared mmap after truncate shrinkage problems Fix a problem in NOMMU mmap with ramfs whereby a shared mmap can happen over the end of a truncation. The problem is that ramfs_nommu_check_mappings() checks that the reduced file size against the VMA tree, but not the vm_region tree. The following sequence of events can cause the problem: fd = open("/tmp/x", O_RDWR|O_TRUNC|O_CREAT, 0600); ftruncate(fd, 32 * 1024); a = mmap(NULL, 32 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); b = mmap(NULL, 16 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); munmap(a, 32 * 1024); ftruncate(fd, 16 * 1024); c = mmap(NULL, 32 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); Mapping 'a' creates a vm_region covering 32KB of the file. Mapping 'b' sees that the vm_region from 'a' is covering the region it wants and so shares it, pinning it in memory. Mapping 'a' then goes away and the file is truncated to the end of VMA 'b'. However, the region allocated by 'a' is still in effect, and has _not_ been reduced. Mapping 'c' is then created, and because there's a vm_region covering the desired region, get_unmapped_area() is _not_ called to repeat the check, and the mapping is granted, even though the pages from the latter half of the mapping have been discarded. However: d = mmap(NULL, 16 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); Mapping 'd' should work, and should end up sharing the region allocated by 'a'. To deal with this, we shrink the vm_region struct during the truncation, lest do_mmap_pgoff() take it as licence to share the full region automatically without calling the get_unmapped_area() file op again. Signed-off-by: David Howells <dhowells@redhat.com> Acked-by: Al Viro <viro@zeniv.linux.org.uk> Cc: Greg Ungerer <gerg@snapgear.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2010-01-16 01:01:39 +00:00
ret = nommu_shrink_inode_mappings(inode, size, newsize);
if (ret < 0)
return ret;
}
truncate_setsize(inode, newsize);
return 0;
}
/*****************************************************************************/
/*
* handle a change of attributes
* - we're specifically interested in a change of size
*/
static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia)
{
struct inode *inode = dentry->d_inode;
unsigned int old_ia_valid = ia->ia_valid;
int ret = 0;
/* POSIX UID/GID verification for setting inode attributes */
ret = inode_change_ok(inode, ia);
if (ret)
return ret;
/* pick out size-changing events */
if (ia->ia_valid & ATTR_SIZE) {
loff_t size = inode->i_size;
if (ia->ia_size != size) {
ret = ramfs_nommu_resize(inode, ia->ia_size, size);
if (ret < 0 || ia->ia_valid == ATTR_SIZE)
goto out;
} else {
/* we skipped the truncate but must still update
* timestamps
*/
ia->ia_valid |= ATTR_MTIME|ATTR_CTIME;
}
}
setattr_copy(inode, ia);
out:
ia->ia_valid = old_ia_valid;
return ret;
}
/*****************************************************************************/
/*
* try to determine where a shared mapping can be made
* - we require that:
* - the pages to be mapped must exist
* - the pages be physically contiguous in sequence
*/
unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags)
{
unsigned long maxpages, lpages, nr, loop, ret;
struct inode *inode = file->f_path.dentry->d_inode;
struct page **pages = NULL, **ptr, *page;
loff_t isize;
if (!(flags & MAP_SHARED))
return addr;
/* the mapping mustn't extend beyond the EOF */
lpages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
isize = i_size_read(inode);
ret = -EINVAL;
maxpages = (isize + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (pgoff >= maxpages)
goto out;
if (maxpages - pgoff < lpages)
goto out;
/* gang-find the pages */
ret = -ENOMEM;
pages = kzalloc(lpages * sizeof(struct page *), GFP_KERNEL);
if (!pages)
goto out_free;
nr = find_get_pages(inode->i_mapping, pgoff, lpages, pages);
if (nr != lpages)
goto out_free_pages; /* leave if some pages were missing */
/* check the pages for physical adjacency */
ptr = pages;
page = *ptr++;
page++;
for (loop = lpages; loop > 1; loop--)
if (*ptr++ != page++)
goto out_free_pages;
/* okay - all conditions fulfilled */
ret = (unsigned long) page_address(pages[0]);
out_free_pages:
ptr = pages;
for (loop = nr; loop > 0; loop--)
put_page(*ptr++);
out_free:
kfree(pages);
out:
return ret;
}
/*****************************************************************************/
/*
* set up a mapping for shared memory segments
*/
int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma)
{
if (!(vma->vm_flags & VM_SHARED))
return -ENOSYS;
file_accessed(file);
vma->vm_ops = &generic_file_vm_ops;
return 0;
}