linux/drivers/pci/proc.c
Kenji Kaneshige 8cc2bfd87f PCI: kernel oops on access to pci proc file while hot-removal
I encountered the problem that /proc/bus/pci/XX/YY is not removed even
after the corresponding device is hot-removed, if the file is still
being opened. In addtion, accessing this file in this situation causes
kernel panic (see below).

Becasue the pci_proc_detach_device() doesn't call remove_proc_entry()
if struct proc_dir_entry->count > 1, access to /proc/bus/pci/XX/YY
would refer to struct pci_dev that was already freed.

Though I don't know why the check for proc_dir_entry->count was added,
I don't think it is needed. Removing this check fixes the problem.

Steps to reproduce
------------------
# cd /sys/bus/pci/slots/2/
# PROC_BUS_PCI_FILE=/proc/bus/pci/`awk -F: '{print $2"/"$3}' < address`.0
# sleep 10000 < $PROC_BUS_PCI_FILE &
# echo 0 > power
# while true; do cat $PROC_BUS_PCI_FILE > /dev/null; done

Oops Messages
-------------
BUG: unable to handle kernel NULL pointer dereference at 00000042
IP: [<c05c82d5>] pci_user_read_config_dword+0x65/0xa0
*pdpt = 000000002185e001 *pde = 0000000476a79067
Oops: 0000 [#1] SMP
last sysfs file: /sys/devices/pci0000:00/0000:00:1c.0/0000:10:00.0/local_cpus
Modules linked in: autofs4 sunrpc cpufreq_ondemand acpi_cpufreq ipv6 dm_mirror dm_region_hash dm_log dm_mod e1000e i2c_i801 i2c_core iTCO_wdt igb sg pcspkr dca iTCO_vendor_support ext4 mbcache jbd2 sd_mod crc_t10dif lpfc mptsas scsi_transport_fc mptscsih mptbase scsi_tgt scsi_transport_sas [last unloaded: microcode]

Pid: 2997, comm: cat Not tainted 2.6.34-kk #32 SB/PRIMEQUEST 1800E
EIP: 0060:[<c05c82d5>] EFLAGS: 00010046 CPU: 19
EIP is at pci_user_read_config_dword+0x65/0xa0
EAX: 00000002 EBX: e44f1800 ECX: e144df14 EDX: 155668c7
ESI: 00000087 EDI: 00000000 EBP: e144df40 ESP: e144df0c
 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
Process cat (pid: 2997, ti=e144c000 task=e26f2570 task.ti=e144c000)
Stack:
 c09ceac0 c0570f72 ffffffff 08c57000 00000000 00001000 e44f1800 c05d2404
<0> e144df40 00001000 00000000 00001000 08c57000 3093ae50 e420cb40 e358d5c0
<0> c05d2300 fffffffb c054984f e144df9c 00008000 08c57000 e358d5c0 00008000
Call Trace:
 [<c0570f72>] ? security_capable+0x22/0x30
 [<c05d2404>] ? proc_bus_pci_read+0x104/0x220
 [<c05d2300>] ? proc_bus_pci_read+0x0/0x220
 [<c054984f>] ? proc_reg_read+0x5f/0x90
 [<c05497f0>] ? proc_reg_read+0x0/0x90
 [<c050694d>] ? vfs_read+0x9d/0x190
 [<c04958f4>] ? audit_syscall_entry+0x204/0x230
 [<c0506a81>] ? sys_read+0x41/0x70
 [<c0402f1f>] ? sysenter_do_call+0x12/0x28
Code: b4 26 00 00 00 00 b8 20 88 b1 c0 c7 44 24 08 ff ff ff ff e8 3e 52 22 00 f6 83 24 04 00 00 20 75 34 8b 43 08 8d 4c 24 08 8b 53 1c <8b> 70 40 89 4c 24 04 89 f9 c7 04 24 04 00 00 00 ff 16 89 c6 f0
EIP: [<c05c82d5>] pci_user_read_config_dword+0x65/0xa0 SS:ESP 0068:e144df0c
CR2: 0000000000000042

Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: Kenji Kaneshige <kaneshige.kenji@jp.fujitsu.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
2010-07-30 09:29:33 -07:00

493 lines
10 KiB
C

/*
* Procfs interface for the PCI bus.
*
* Copyright (c) 1997--1999 Martin Mares <mj@ucw.cz>
*/
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/smp_lock.h>
#include <linux/capability.h>
#include <asm/uaccess.h>
#include <asm/byteorder.h>
#include "pci.h"
static int proc_initialized; /* = 0 */
static loff_t
proc_bus_pci_lseek(struct file *file, loff_t off, int whence)
{
loff_t new = -1;
struct inode *inode = file->f_path.dentry->d_inode;
mutex_lock(&inode->i_mutex);
switch (whence) {
case 0:
new = off;
break;
case 1:
new = file->f_pos + off;
break;
case 2:
new = inode->i_size + off;
break;
}
if (new < 0 || new > inode->i_size)
new = -EINVAL;
else
file->f_pos = new;
mutex_unlock(&inode->i_mutex);
return new;
}
static ssize_t
proc_bus_pci_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
{
const struct inode *ino = file->f_path.dentry->d_inode;
const struct proc_dir_entry *dp = PDE(ino);
struct pci_dev *dev = dp->data;
unsigned int pos = *ppos;
unsigned int cnt, size;
/*
* Normal users can read only the standardized portion of the
* configuration space as several chips lock up when trying to read
* undefined locations (think of Intel PIIX4 as a typical example).
*/
if (capable(CAP_SYS_ADMIN))
size = dp->size;
else if (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
size = 128;
else
size = 64;
if (pos >= size)
return 0;
if (nbytes >= size)
nbytes = size;
if (pos + nbytes > size)
nbytes = size - pos;
cnt = nbytes;
if (!access_ok(VERIFY_WRITE, buf, cnt))
return -EINVAL;
if ((pos & 1) && cnt) {
unsigned char val;
pci_user_read_config_byte(dev, pos, &val);
__put_user(val, buf);
buf++;
pos++;
cnt--;
}
if ((pos & 3) && cnt > 2) {
unsigned short val;
pci_user_read_config_word(dev, pos, &val);
__put_user(cpu_to_le16(val), (__le16 __user *) buf);
buf += 2;
pos += 2;
cnt -= 2;
}
while (cnt >= 4) {
unsigned int val;
pci_user_read_config_dword(dev, pos, &val);
__put_user(cpu_to_le32(val), (__le32 __user *) buf);
buf += 4;
pos += 4;
cnt -= 4;
}
if (cnt >= 2) {
unsigned short val;
pci_user_read_config_word(dev, pos, &val);
__put_user(cpu_to_le16(val), (__le16 __user *) buf);
buf += 2;
pos += 2;
cnt -= 2;
}
if (cnt) {
unsigned char val;
pci_user_read_config_byte(dev, pos, &val);
__put_user(val, buf);
buf++;
pos++;
cnt--;
}
*ppos = pos;
return nbytes;
}
static ssize_t
proc_bus_pci_write(struct file *file, const char __user *buf, size_t nbytes, loff_t *ppos)
{
struct inode *ino = file->f_path.dentry->d_inode;
const struct proc_dir_entry *dp = PDE(ino);
struct pci_dev *dev = dp->data;
int pos = *ppos;
int size = dp->size;
int cnt;
if (pos >= size)
return 0;
if (nbytes >= size)
nbytes = size;
if (pos + nbytes > size)
nbytes = size - pos;
cnt = nbytes;
if (!access_ok(VERIFY_READ, buf, cnt))
return -EINVAL;
if ((pos & 1) && cnt) {
unsigned char val;
__get_user(val, buf);
pci_user_write_config_byte(dev, pos, val);
buf++;
pos++;
cnt--;
}
if ((pos & 3) && cnt > 2) {
__le16 val;
__get_user(val, (__le16 __user *) buf);
pci_user_write_config_word(dev, pos, le16_to_cpu(val));
buf += 2;
pos += 2;
cnt -= 2;
}
while (cnt >= 4) {
__le32 val;
__get_user(val, (__le32 __user *) buf);
pci_user_write_config_dword(dev, pos, le32_to_cpu(val));
buf += 4;
pos += 4;
cnt -= 4;
}
if (cnt >= 2) {
__le16 val;
__get_user(val, (__le16 __user *) buf);
pci_user_write_config_word(dev, pos, le16_to_cpu(val));
buf += 2;
pos += 2;
cnt -= 2;
}
if (cnt) {
unsigned char val;
__get_user(val, buf);
pci_user_write_config_byte(dev, pos, val);
buf++;
pos++;
cnt--;
}
*ppos = pos;
i_size_write(ino, dp->size);
return nbytes;
}
struct pci_filp_private {
enum pci_mmap_state mmap_state;
int write_combine;
};
static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
const struct proc_dir_entry *dp = PDE(file->f_dentry->d_inode);
struct pci_dev *dev = dp->data;
#ifdef HAVE_PCI_MMAP
struct pci_filp_private *fpriv = file->private_data;
#endif /* HAVE_PCI_MMAP */
int ret = 0;
lock_kernel();
switch (cmd) {
case PCIIOC_CONTROLLER:
ret = pci_domain_nr(dev->bus);
break;
#ifdef HAVE_PCI_MMAP
case PCIIOC_MMAP_IS_IO:
fpriv->mmap_state = pci_mmap_io;
break;
case PCIIOC_MMAP_IS_MEM:
fpriv->mmap_state = pci_mmap_mem;
break;
case PCIIOC_WRITE_COMBINE:
if (arg)
fpriv->write_combine = 1;
else
fpriv->write_combine = 0;
break;
#endif /* HAVE_PCI_MMAP */
default:
ret = -EINVAL;
break;
};
unlock_kernel();
return ret;
}
#ifdef HAVE_PCI_MMAP
static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file->f_path.dentry->d_inode;
const struct proc_dir_entry *dp = PDE(inode);
struct pci_dev *dev = dp->data;
struct pci_filp_private *fpriv = file->private_data;
int i, ret;
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
/* Make sure the caller is mapping a real resource for this device */
for (i = 0; i < PCI_ROM_RESOURCE; i++) {
if (pci_mmap_fits(dev, i, vma))
break;
}
if (i >= PCI_ROM_RESOURCE)
return -ENODEV;
ret = pci_mmap_page_range(dev, vma,
fpriv->mmap_state,
fpriv->write_combine);
if (ret < 0)
return ret;
return 0;
}
static int proc_bus_pci_open(struct inode *inode, struct file *file)
{
struct pci_filp_private *fpriv = kmalloc(sizeof(*fpriv), GFP_KERNEL);
if (!fpriv)
return -ENOMEM;
fpriv->mmap_state = pci_mmap_io;
fpriv->write_combine = 0;
file->private_data = fpriv;
return 0;
}
static int proc_bus_pci_release(struct inode *inode, struct file *file)
{
kfree(file->private_data);
file->private_data = NULL;
return 0;
}
#endif /* HAVE_PCI_MMAP */
static const struct file_operations proc_bus_pci_operations = {
.owner = THIS_MODULE,
.llseek = proc_bus_pci_lseek,
.read = proc_bus_pci_read,
.write = proc_bus_pci_write,
.unlocked_ioctl = proc_bus_pci_ioctl,
#ifdef HAVE_PCI_MMAP
.open = proc_bus_pci_open,
.release = proc_bus_pci_release,
.mmap = proc_bus_pci_mmap,
#ifdef HAVE_ARCH_PCI_GET_UNMAPPED_AREA
.get_unmapped_area = get_pci_unmapped_area,
#endif /* HAVE_ARCH_PCI_GET_UNMAPPED_AREA */
#endif /* HAVE_PCI_MMAP */
};
/* iterator */
static void *pci_seq_start(struct seq_file *m, loff_t *pos)
{
struct pci_dev *dev = NULL;
loff_t n = *pos;
for_each_pci_dev(dev) {
if (!n--)
break;
}
return dev;
}
static void *pci_seq_next(struct seq_file *m, void *v, loff_t *pos)
{
struct pci_dev *dev = v;
(*pos)++;
dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
return dev;
}
static void pci_seq_stop(struct seq_file *m, void *v)
{
if (v) {
struct pci_dev *dev = v;
pci_dev_put(dev);
}
}
static int show_device(struct seq_file *m, void *v)
{
const struct pci_dev *dev = v;
const struct pci_driver *drv;
int i;
if (dev == NULL)
return 0;
drv = pci_dev_driver(dev);
seq_printf(m, "%02x%02x\t%04x%04x\t%x",
dev->bus->number,
dev->devfn,
dev->vendor,
dev->device,
dev->irq);
/* only print standard and ROM resources to preserve compatibility */
for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
resource_size_t start, end;
pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
seq_printf(m, "\t%16llx",
(unsigned long long)(start |
(dev->resource[i].flags & PCI_REGION_FLAG_MASK)));
}
for (i = 0; i <= PCI_ROM_RESOURCE; i++) {
resource_size_t start, end;
pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
seq_printf(m, "\t%16llx",
dev->resource[i].start < dev->resource[i].end ?
(unsigned long long)(end - start) + 1 : 0);
}
seq_putc(m, '\t');
if (drv)
seq_printf(m, "%s", drv->name);
seq_putc(m, '\n');
return 0;
}
static const struct seq_operations proc_bus_pci_devices_op = {
.start = pci_seq_start,
.next = pci_seq_next,
.stop = pci_seq_stop,
.show = show_device
};
static struct proc_dir_entry *proc_bus_pci_dir;
int pci_proc_attach_device(struct pci_dev *dev)
{
struct pci_bus *bus = dev->bus;
struct proc_dir_entry *e;
char name[16];
if (!proc_initialized)
return -EACCES;
if (!bus->procdir) {
if (pci_proc_domain(bus)) {
sprintf(name, "%04x:%02x", pci_domain_nr(bus),
bus->number);
} else {
sprintf(name, "%02x", bus->number);
}
bus->procdir = proc_mkdir(name, proc_bus_pci_dir);
if (!bus->procdir)
return -ENOMEM;
}
sprintf(name, "%02x.%x", PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn));
e = proc_create_data(name, S_IFREG | S_IRUGO | S_IWUSR, bus->procdir,
&proc_bus_pci_operations, dev);
if (!e)
return -ENOMEM;
e->size = dev->cfg_size;
dev->procent = e;
return 0;
}
int pci_proc_detach_device(struct pci_dev *dev)
{
struct proc_dir_entry *e;
if ((e = dev->procent)) {
remove_proc_entry(e->name, dev->bus->procdir);
dev->procent = NULL;
}
return 0;
}
#if 0
int pci_proc_attach_bus(struct pci_bus* bus)
{
struct proc_dir_entry *de = bus->procdir;
if (!proc_initialized)
return -EACCES;
if (!de) {
char name[16];
sprintf(name, "%02x", bus->number);
de = bus->procdir = proc_mkdir(name, proc_bus_pci_dir);
if (!de)
return -ENOMEM;
}
return 0;
}
#endif /* 0 */
int pci_proc_detach_bus(struct pci_bus* bus)
{
struct proc_dir_entry *de = bus->procdir;
if (de)
remove_proc_entry(de->name, proc_bus_pci_dir);
return 0;
}
static int proc_bus_pci_dev_open(struct inode *inode, struct file *file)
{
return seq_open(file, &proc_bus_pci_devices_op);
}
static const struct file_operations proc_bus_pci_dev_operations = {
.owner = THIS_MODULE,
.open = proc_bus_pci_dev_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release,
};
static int __init pci_proc_init(void)
{
struct pci_dev *dev = NULL;
proc_bus_pci_dir = proc_mkdir("bus/pci", NULL);
proc_create("devices", 0, proc_bus_pci_dir,
&proc_bus_pci_dev_operations);
proc_initialized = 1;
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
pci_proc_attach_device(dev);
}
return 0;
}
device_initcall(pci_proc_init);