qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 2/3] VFIO driver for mediated PCI device


From: Alex Williamson
Subject: Re: [Qemu-devel] [PATCH 2/3] VFIO driver for mediated PCI device
Date: Tue, 21 Jun 2016 16:48:27 -0600

On Mon, 20 Jun 2016 22:01:47 +0530
Kirti Wankhede <address@hidden> wrote:

> VFIO driver registers with MDEV core driver. MDEV core driver creates
> mediated device and calls probe routine of MPCI VFIO driver. This MPCI
> VFIO driver adds mediated device to VFIO core module.
> Main aim of this module is to manage all VFIO APIs for each mediated PCI
> device.
> Those are:
> - get region information from vendor driver.
> - trap and emulate PCI config space and BAR region.
> - Send interrupt configuration information to vendor driver.
> - mmap mappable region with invalidate mapping and fault on access to
>   remap pfn.
> 
> Signed-off-by: Kirti Wankhede <address@hidden>
> Signed-off-by: Neo Jia <address@hidden>
> Change-Id: I583f4734752971d3d112324d69e2508c88f359ec
> ---
>  drivers/vfio/mdev/Kconfig           |   7 +
>  drivers/vfio/mdev/Makefile          |   1 +
>  drivers/vfio/mdev/vfio_mpci.c       | 654 
> ++++++++++++++++++++++++++++++++++++
>  drivers/vfio/pci/vfio_pci_private.h |   6 -
>  drivers/vfio/pci/vfio_pci_rdwr.c    |   1 +
>  include/linux/vfio.h                |   7 +
>  6 files changed, 670 insertions(+), 6 deletions(-)
>  create mode 100644 drivers/vfio/mdev/vfio_mpci.c
> 
> diff --git a/drivers/vfio/mdev/Kconfig b/drivers/vfio/mdev/Kconfig
> index 951e2bb06a3f..8d9e78aaa80f 100644
> --- a/drivers/vfio/mdev/Kconfig
> +++ b/drivers/vfio/mdev/Kconfig
> @@ -9,3 +9,10 @@ config MDEV
>  
>          If you don't know what do here, say N.
>  
> +config VFIO_MPCI
> +    tristate "VFIO support for Mediated PCI devices"
> +    depends on VFIO && PCI && MDEV
> +    default n
> +    help
> +        VFIO based driver for mediated PCI devices.
> +
> diff --git a/drivers/vfio/mdev/Makefile b/drivers/vfio/mdev/Makefile
> index 2c6d11f7bc24..cd5e7625e1ec 100644
> --- a/drivers/vfio/mdev/Makefile
> +++ b/drivers/vfio/mdev/Makefile
> @@ -2,4 +2,5 @@
>  mdev-y := mdev_core.o mdev_sysfs.o mdev_driver.o
>  
>  obj-$(CONFIG_MDEV) += mdev.o
> +obj-$(CONFIG_VFIO_MPCI) += vfio_mpci.o
>  
> diff --git a/drivers/vfio/mdev/vfio_mpci.c b/drivers/vfio/mdev/vfio_mpci.c
> new file mode 100644
> index 000000000000..267879a05c39
> --- /dev/null
> +++ b/drivers/vfio/mdev/vfio_mpci.c
> @@ -0,0 +1,654 @@
> +/*
> + * VFIO based Mediated PCI device driver
> + *
> + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
> + *     Author: Neo Jia <address@hidden>
> + *          Kirti Wankhede <address@hidden>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#include <linux/init.h>
> +#include <linux/module.h>
> +#include <linux/device.h>
> +#include <linux/kernel.h>
> +#include <linux/slab.h>
> +#include <linux/uuid.h>
> +#include <linux/vfio.h>
> +#include <linux/iommu.h>
> +#include <linux/mdev.h>
> +
> +#include "mdev_private.h"
> +
> +#define DRIVER_VERSION  "0.1"
> +#define DRIVER_AUTHOR   "NVIDIA Corporation"
> +#define DRIVER_DESC     "VFIO based Mediated PCI device driver"
> +
> +struct vfio_mdev {
> +     struct iommu_group *group;
> +     struct mdev_device *mdev;
> +     int                 refcnt;
> +     struct pci_region_info vfio_region_info[VFIO_PCI_NUM_REGIONS];
> +     u8                  *vconfig;
> +     struct mutex        vfio_mdev_lock;
> +};
> +
> +static int get_mdev_region_info(struct mdev_device *mdev,
> +                             struct pci_region_info *vfio_region_info,
> +                             int index)
> +{
> +     int ret = -EINVAL;
> +     struct parent_device *parent = mdev->parent;
> +
> +     if (parent && dev_is_pci(parent->dev) && parent->ops->get_region_info) {
> +             mutex_lock(&mdev->ops_lock);
> +             ret = parent->ops->get_region_info(mdev, index,
> +                                                 vfio_region_info);
> +             mutex_unlock(&mdev->ops_lock);

Why do we have two ops_lock, one on the parent_device and one on the
mdev_device?!  Is this one actually locking anything or also just
providing serialization?  Why do some things get serialized at the
parent level and some things at the device level?  Very confused by
ops_lock.

> +     }
> +     return ret;
> +}
> +
> +static void mdev_read_base(struct vfio_mdev *vmdev)
> +{
> +     int index, pos;
> +     u32 start_lo, start_hi;
> +     u32 mem_type;
> +
> +     pos = PCI_BASE_ADDRESS_0;
> +
> +     for (index = 0; index <= VFIO_PCI_BAR5_REGION_INDEX; index++) {
> +
> +             if (!vmdev->vfio_region_info[index].size)
> +                     continue;
> +
> +             start_lo = (*(u32 *)(vmdev->vconfig + pos)) &
> +                                     PCI_BASE_ADDRESS_MEM_MASK;
> +             mem_type = (*(u32 *)(vmdev->vconfig + pos)) &
> +                                     PCI_BASE_ADDRESS_MEM_TYPE_MASK;
> +
> +             switch (mem_type) {
> +             case PCI_BASE_ADDRESS_MEM_TYPE_64:
> +                     start_hi = (*(u32 *)(vmdev->vconfig + pos + 4));
> +                     pos += 4;
> +                     break;
> +             case PCI_BASE_ADDRESS_MEM_TYPE_32:
> +             case PCI_BASE_ADDRESS_MEM_TYPE_1M:
> +                     /* 1M mem BAR treated as 32-bit BAR */
> +             default:
> +                     /* mem unknown type treated as 32-bit BAR */
> +                     start_hi = 0;
> +                     break;
> +             }
> +             pos += 4;
> +             vmdev->vfio_region_info[index].start = ((u64)start_hi << 32) |
> +                                                     start_lo;
> +     }
> +}
> +
> +static int vfio_mpci_open(void *device_data)
> +{
> +     int ret = 0;
> +     struct vfio_mdev *vmdev = device_data;
> +
> +     if (!try_module_get(THIS_MODULE))
> +             return -ENODEV;
> +
> +     mutex_lock(&vmdev->vfio_mdev_lock);
> +     if (!vmdev->refcnt) {
> +             u8 *vconfig;
> +             int index;
> +             struct pci_region_info *cfg_reg;
> +
> +             for (index = VFIO_PCI_BAR0_REGION_INDEX;
> +                  index < VFIO_PCI_NUM_REGIONS; index++) {
> +                     ret = get_mdev_region_info(vmdev->mdev,
> +                                             &vmdev->vfio_region_info[index],
> +                                             index);
> +                     if (ret)
> +                             goto open_error;
> +             }
> +             cfg_reg = 
> &vmdev->vfio_region_info[VFIO_PCI_CONFIG_REGION_INDEX];
> +             if (!cfg_reg->size)
> +                     goto open_error;
> +
> +             vconfig = kzalloc(cfg_reg->size, GFP_KERNEL);
> +             if (IS_ERR(vconfig)) {
> +                     ret = PTR_ERR(vconfig);
> +                     goto open_error;
> +             }
> +
> +             vmdev->vconfig = vconfig;
> +     }
> +
> +     vmdev->refcnt++;
> +open_error:
> +
> +     mutex_unlock(&vmdev->vfio_mdev_lock);
> +     if (ret)
> +             module_put(THIS_MODULE);
> +
> +     return ret;
> +}
> +
> +static void vfio_mpci_close(void *device_data)
> +{
> +     struct vfio_mdev *vmdev = device_data;
> +
> +     mutex_lock(&vmdev->vfio_mdev_lock);
> +     vmdev->refcnt--;
> +     if (!vmdev->refcnt) {
> +             memset(&vmdev->vfio_region_info, 0,
> +                     sizeof(vmdev->vfio_region_info));
> +             kfree(vmdev->vconfig);
> +     }
> +     mutex_unlock(&vmdev->vfio_mdev_lock);
> +     module_put(THIS_MODULE);
> +}
> +
> +static int mdev_get_irq_count(struct vfio_mdev *vmdev, int irq_type)
> +{
> +     /* Don't support MSIX for now */
> +     if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX)
> +             return -1;
> +
> +     return 1;

Too much hard coding here, the mediated driver should define this.

> +}
> +
> +static long vfio_mpci_unlocked_ioctl(void *device_data,
> +                                  unsigned int cmd, unsigned long arg)
> +{
> +     int ret = 0;
> +     struct vfio_mdev *vmdev = device_data;
> +     unsigned long minsz;
> +
> +     switch (cmd) {
> +     case VFIO_DEVICE_GET_INFO:
> +     {
> +             struct vfio_device_info info;
> +
> +             minsz = offsetofend(struct vfio_device_info, num_irqs);
> +
> +             if (copy_from_user(&info, (void __user *)arg, minsz))
> +                     return -EFAULT;
> +
> +             if (info.argsz < minsz)
> +                     return -EINVAL;
> +
> +             info.flags = VFIO_DEVICE_FLAGS_PCI;
> +             info.num_regions = VFIO_PCI_NUM_REGIONS;
> +             info.num_irqs = VFIO_PCI_NUM_IRQS;
> +
> +             return copy_to_user((void __user *)arg, &info, minsz);
> +     }
> +     case VFIO_DEVICE_GET_REGION_INFO:
> +     {
> +             struct vfio_region_info info;
> +
> +             minsz = offsetofend(struct vfio_region_info, offset);
> +
> +             if (copy_from_user(&info, (void __user *)arg, minsz))
> +                     return -EFAULT;
> +
> +             if (info.argsz < minsz)
> +                     return -EINVAL;
> +
> +             switch (info.index) {
> +             case VFIO_PCI_CONFIG_REGION_INDEX:
> +             case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
> +                     info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
> +                     info.size = vmdev->vfio_region_info[info.index].size;
> +                     if (!info.size) {
> +                             info.flags = 0;
> +                             break;
> +                     }
> +
> +                     info.flags = vmdev->vfio_region_info[info.index].flags;
> +                     break;
> +             case VFIO_PCI_VGA_REGION_INDEX:
> +             case VFIO_PCI_ROM_REGION_INDEX:
> +             default:
> +                     return -EINVAL;
> +             }
> +
> +             return copy_to_user((void __user *)arg, &info, minsz);
> +     }
> +     case VFIO_DEVICE_GET_IRQ_INFO:
> +     {
> +             struct vfio_irq_info info;
> +
> +             minsz = offsetofend(struct vfio_irq_info, count);
> +
> +             if (copy_from_user(&info, (void __user *)arg, minsz))
> +                     return -EFAULT;
> +
> +             if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
> +                     return -EINVAL;
> +
> +             switch (info.index) {
> +             case VFIO_PCI_INTX_IRQ_INDEX ... VFIO_PCI_MSI_IRQ_INDEX:
> +             case VFIO_PCI_REQ_IRQ_INDEX:
> +                     break;
> +                     /* pass thru to return error */
> +             case VFIO_PCI_MSIX_IRQ_INDEX:
> +             default:
> +                     return -EINVAL;
> +             }
> +
> +             info.count = VFIO_PCI_NUM_IRQS;

???  This is set again 2 lines below

> +             info.flags = VFIO_IRQ_INFO_EVENTFD;
> +             info.count = mdev_get_irq_count(vmdev, info.index);
> +
> +             if (info.count == -1)
> +                     return -EINVAL;
> +
> +             if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
> +                     info.flags |= (VFIO_IRQ_INFO_MASKABLE |
> +                                     VFIO_IRQ_INFO_AUTOMASKED);
> +             else
> +                     info.flags |= VFIO_IRQ_INFO_NORESIZE;
> +
> +             return copy_to_user((void __user *)arg, &info, minsz);
> +     }
> +     case VFIO_DEVICE_SET_IRQS:
> +     {
> +             struct vfio_irq_set hdr;
> +             struct mdev_device *mdev = vmdev->mdev;
> +             struct parent_device *parent = vmdev->mdev->parent;
> +             u8 *data = NULL, *ptr = NULL;
> +
> +             minsz = offsetofend(struct vfio_irq_set, count);
> +
> +             if (copy_from_user(&hdr, (void __user *)arg, minsz))
> +                     return -EFAULT;
> +
> +             if (hdr.argsz < minsz || hdr.index >= VFIO_PCI_NUM_IRQS ||
> +                 hdr.flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
> +                 VFIO_IRQ_SET_ACTION_TYPE_MASK))
> +                     return -EINVAL;
> +
> +             if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
> +                     size_t size;
> +                     int max = mdev_get_irq_count(vmdev, hdr.index);
> +
> +                     if (hdr.flags & VFIO_IRQ_SET_DATA_BOOL)
> +                             size = sizeof(uint8_t);
> +                     else if (hdr.flags & VFIO_IRQ_SET_DATA_EVENTFD)
> +                             size = sizeof(int32_t);
> +                     else
> +                             return -EINVAL;
> +
> +                     if (hdr.argsz - minsz < hdr.count * size ||
> +                         hdr.start >= max || hdr.start + hdr.count > max)
> +                             return -EINVAL;
> +
> +                     ptr = data = memdup_user((void __user *)(arg + minsz),
> +                                              hdr.count * size);
> +                     if (IS_ERR(data))
> +                             return PTR_ERR(data);
> +             }
> +
> +             if (parent && parent->ops->set_irqs) {
> +                     mutex_lock(&mdev->ops_lock);
> +                     ret = parent->ops->set_irqs(mdev, hdr.flags, hdr.index,
> +                                                 hdr.start, hdr.count, data);
> +                     mutex_unlock(&mdev->ops_lock);

Device level serialization on set_irqs... interesting.

> +             }
> +
> +             kfree(ptr);
> +             return ret;
> +     }
> +     }
> +     return -ENOTTY;
> +}
> +
> +ssize_t mdev_dev_config_rw(struct vfio_mdev *vmdev, char __user *buf,
> +                        size_t count, loff_t *ppos, bool iswrite)
> +{
> +     struct mdev_device *mdev = vmdev->mdev;
> +     struct parent_device *parent = mdev->parent;
> +     int size = vmdev->vfio_region_info[VFIO_PCI_CONFIG_REGION_INDEX].size;
> +     int ret = 0;
> +     uint64_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
> +
> +     if (pos < 0 || pos >= size ||
> +         pos + count > size) {
> +             pr_err("%s pos 0x%llx out of range\n", __func__, pos);
> +             ret = -EFAULT;
> +             goto config_rw_exit;
> +     }
> +
> +     if (iswrite) {
> +             char *usr_data, *ptr;
> +
> +             ptr = usr_data = memdup_user(buf, count);
> +             if (IS_ERR(usr_data)) {
> +                     ret = PTR_ERR(usr_data);
> +                     goto config_rw_exit;
> +             }
> +
> +             ret = parent->ops->write(mdev, usr_data, count,
> +                                       EMUL_CONFIG_SPACE, pos);

No serialization on this ops, thank goodness, but why?

This read/write interface still seems strange to me...

> +
> +             memcpy((void *)(vmdev->vconfig + pos), (void *)usr_data, count);
> +             kfree(ptr);
> +     } else {
> +             char *ret_data, *ptr;
> +
> +             ptr = ret_data = kzalloc(count, GFP_KERNEL);
> +
> +             if (IS_ERR(ret_data)) {
> +                     ret = PTR_ERR(ret_data);
> +                     goto config_rw_exit;
> +             }
> +
> +             ret = parent->ops->read(mdev, ret_data, count,
> +                                     EMUL_CONFIG_SPACE, pos);
> +
> +             if (ret > 0) {
> +                     if (copy_to_user(buf, ret_data, ret))
> +                             ret = -EFAULT;
> +                     else
> +                             memcpy((void *)(vmdev->vconfig + pos),
> +                                     (void *)ret_data, count);
> +             }
> +             kfree(ptr);

So vconfig caches all of config space for the mdev, but we only ever
use it to read the BAR address via mdev_read_base()... why?  I hope the
mdev driver doesn't freak out if the user reads the mmio region before
writing a base address (remember the vfio API aspect of the interface
doesn't necessarily follow the VM PCI programming API)

> +     }
> +config_rw_exit:
> +
> +     if (ret > 0)
> +             *ppos += ret;
> +
> +     return ret;
> +}
> +
> +ssize_t mdev_dev_bar_rw(struct vfio_mdev *vmdev, char __user *buf,
> +                     size_t count, loff_t *ppos, bool iswrite)
> +{
> +     struct mdev_device *mdev = vmdev->mdev;
> +     struct parent_device *parent = mdev->parent;
> +     loff_t offset = *ppos & VFIO_PCI_OFFSET_MASK;
> +     loff_t pos;
> +     int bar_index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
> +     int ret = 0;
> +
> +     if (!vmdev->vfio_region_info[bar_index].start)
> +             mdev_read_base(vmdev);
> +
> +     if (offset >= vmdev->vfio_region_info[bar_index].size) {
> +             ret = -EINVAL;
> +             goto bar_rw_exit;
> +     }
> +
> +     count = min(count,
> +                 (size_t)(vmdev->vfio_region_info[bar_index].size - offset));
> +
> +     pos = vmdev->vfio_region_info[bar_index].start + offset;

In the case of a mpci dev, @start is the vconfig BAR value, so it's
user (guest) writable, and the mediated driver is supposed to
understand that?  I suppose is saw the config write too, if there was
one, but the mediated driver gives us region info based on region index.
We have the region index here.  Why wouldn't we do reads and writes
based on region index and offset and eliminate vconfig?  Seems like
that would consolidate a lot of this, we don't care what we're reading
and writing, just pass it through.  Mediated pci drivers would simply
need to match indexes to those already defined for vfio-pci.

> +
> +     if (iswrite) {
> +             char *usr_data, *ptr;
> +
> +             ptr = usr_data = memdup_user(buf, count);
> +             if (IS_ERR(usr_data)) {
> +                     ret = PTR_ERR(usr_data);
> +                     goto bar_rw_exit;
> +             }
> +
> +             ret = parent->ops->write(mdev, usr_data, count, EMUL_MMIO, pos);
> +
> +             kfree(ptr);
> +     } else {
> +             char *ret_data, *ptr;
> +
> +             ptr = ret_data = kzalloc(count, GFP_KERNEL);
> +
> +             if (!ret_data) {
> +                     ret = -ENOMEM;
> +                     goto bar_rw_exit;
> +             }
> +
> +             ret = parent->ops->read(mdev, ret_data, count, EMUL_MMIO, pos);
> +
> +             if (ret > 0) {
> +                     if (copy_to_user(buf, ret_data, ret))
> +                             ret = -EFAULT;
> +             }
> +             kfree(ptr);
> +     }
> +
> +bar_rw_exit:
> +
> +     if (ret > 0)
> +             *ppos += ret;
> +
> +     return ret;
> +}
> +
> +
> +static ssize_t mdev_dev_rw(void *device_data, char __user *buf,
> +                        size_t count, loff_t *ppos, bool iswrite)
> +{
> +     unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
> +     struct vfio_mdev *vmdev = device_data;
> +
> +     if (index >= VFIO_PCI_NUM_REGIONS)
> +             return -EINVAL;
> +
> +     switch (index) {
> +     case VFIO_PCI_CONFIG_REGION_INDEX:
> +             return mdev_dev_config_rw(vmdev, buf, count, ppos, iswrite);
> +
> +     case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
> +             return mdev_dev_bar_rw(vmdev, buf, count, ppos, iswrite);
> +
> +     case VFIO_PCI_ROM_REGION_INDEX:
> +     case VFIO_PCI_VGA_REGION_INDEX:
> +             break;
> +     }
> +
> +     return -EINVAL;
> +}
> +
> +
> +static ssize_t vfio_mpci_read(void *device_data, char __user *buf,
> +                           size_t count, loff_t *ppos)
> +{
> +     struct vfio_mdev *vmdev = device_data;
> +     struct mdev_device *mdev = vmdev->mdev;
> +     struct parent_device *parent = mdev->parent;
> +     int ret = 0;
> +
> +     if (!count)
> +             return 0;
> +
> +     if (IS_ERR_OR_NULL(buf))
> +             return -EINVAL;
> +
> +     if (parent && parent->ops->read) {
> +             mutex_lock(&mdev->ops_lock);
> +             ret = mdev_dev_rw(device_data, buf, count, ppos, false);
> +             mutex_unlock(&mdev->ops_lock);
> +     }

Argh, we do serialize reads and writes per device, why?!

> +
> +     return ret;
> +}
> +
> +static ssize_t vfio_mpci_write(void *device_data, const char __user *buf,
> +                            size_t count, loff_t *ppos)
> +{
> +     struct vfio_mdev *vmdev = device_data;
> +     struct mdev_device *mdev = vmdev->mdev;
> +     struct parent_device *parent = mdev->parent;
> +     int ret = 0;
> +
> +     if (!count)
> +             return 0;
> +
> +     if (IS_ERR_OR_NULL(buf))
> +             return -EINVAL;
> +
> +     if (parent && parent->ops->write) {
> +             mutex_lock(&mdev->ops_lock);
> +             ret = mdev_dev_rw(device_data, (char __user *)buf, count,
> +                               ppos, true);
> +             mutex_unlock(&mdev->ops_lock);
> +     }
> +
> +     return ret;
> +}
> +
> +static int mdev_dev_mmio_fault(struct vm_area_struct *vma, struct vm_fault 
> *vmf)
> +{
> +     int ret;
> +     struct vfio_mdev *vmdev = vma->vm_private_data;
> +     struct mdev_device *mdev;
> +     struct parent_device *parent;
> +     u64 virtaddr = (u64)vmf->virtual_address;
> +     u64 offset, phyaddr;
> +     unsigned long req_size, pgoff;
> +     pgprot_t pg_prot;
> +
> +     if (!vmdev && !vmdev->mdev)
> +             return -EINVAL;
> +
> +     mdev = vmdev->mdev;
> +     parent  = mdev->parent;
> +
> +     offset   = virtaddr - vma->vm_start;
> +     phyaddr  = (vma->vm_pgoff << PAGE_SHIFT) + offset;
> +     pgoff    = phyaddr >> PAGE_SHIFT;
> +     req_size = vma->vm_end - virtaddr;
> +     pg_prot  = vma->vm_page_prot;
> +
> +     if (parent && parent->ops->validate_map_request) {
> +             mutex_lock(&mdev->ops_lock);
> +             ret = parent->ops->validate_map_request(mdev, virtaddr,
> +                                                      &pgoff, &req_size,
> +                                                      &pg_prot);
> +             mutex_unlock(&mdev->ops_lock);
> +             if (ret)
> +                     return ret;
> +
> +             if (!req_size)
> +                     return -EINVAL;
> +     }
> +
> +     ret = remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
> +
> +     return ret | VM_FAULT_NOPAGE;
> +}
> +
> +static const struct vm_operations_struct mdev_dev_mmio_ops = {
> +     .fault = mdev_dev_mmio_fault,
> +};
> +
> +
> +static int vfio_mpci_mmap(void *device_data, struct vm_area_struct *vma)
> +{
> +     unsigned int index;
> +     struct vfio_mdev *vmdev = device_data;
> +     struct mdev_device *mdev = vmdev->mdev;
> +     struct pci_dev *pdev;
> +     unsigned long pgoff;
> +     loff_t offset;
> +
> +     if (!mdev->parent || !dev_is_pci(mdev->parent->dev))
> +             return -EINVAL;
> +
> +     pdev = to_pci_dev(mdev->parent->dev);
> +
> +     offset = vma->vm_pgoff << PAGE_SHIFT;
> +
> +     index = VFIO_PCI_OFFSET_TO_INDEX(offset);
> +
> +     if (index >= VFIO_PCI_ROM_REGION_INDEX)
> +             return -EINVAL;
> +
> +     pgoff = vma->vm_pgoff &
> +             ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
> +
> +     vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff;
> +
> +     vma->vm_private_data = vmdev;
> +     vma->vm_ops = &mdev_dev_mmio_ops;
> +
> +     return 0;
> +}
> +
> +static const struct vfio_device_ops vfio_mpci_dev_ops = {
> +     .name           = "vfio-mpci",
> +     .open           = vfio_mpci_open,
> +     .release        = vfio_mpci_close,
> +     .ioctl          = vfio_mpci_unlocked_ioctl,
> +     .read           = vfio_mpci_read,
> +     .write          = vfio_mpci_write,
> +     .mmap           = vfio_mpci_mmap,
> +};
> +
> +int vfio_mpci_probe(struct device *dev)
> +{
> +     struct vfio_mdev *vmdev;
> +     struct mdev_device *mdev = to_mdev_device(dev);
> +     int ret;
> +
> +     if (!mdev)
> +             return -EINVAL;

How could that happen?

> +
> +     vmdev = kzalloc(sizeof(*vmdev), GFP_KERNEL);
> +     if (IS_ERR(vmdev))
> +             return PTR_ERR(vmdev);
> +
> +     vmdev->mdev = mdev_get_device(mdev);
> +     vmdev->group = mdev->group;
> +     mutex_init(&vmdev->vfio_mdev_lock);
> +
> +     ret = vfio_add_group_dev(dev, &vfio_mpci_dev_ops, vmdev);
> +     if (ret)
> +             kfree(vmdev);
> +
> +     mdev_put_device(mdev);
> +     return ret;
> +}
> +
> +void vfio_mpci_remove(struct device *dev)
> +{
> +     struct vfio_mdev *vmdev;
> +
> +     vmdev = vfio_del_group_dev(dev);
> +     kfree(vmdev);
> +}
> +
> +int vfio_mpci_match(struct device *dev)
> +{
> +     if (dev_is_pci(dev->parent))
> +             return 1;
> +
> +     return 0;
> +}
> +
> +struct mdev_driver vfio_mpci_driver = {
> +     .name   = "vfio_mpci",
> +     .probe  = vfio_mpci_probe,
> +     .remove = vfio_mpci_remove,
> +     .match  = vfio_mpci_match,
> +};
> +
> +static int __init vfio_mpci_init(void)
> +{
> +     return mdev_register_driver(&vfio_mpci_driver, THIS_MODULE);
> +}
> +
> +static void __exit vfio_mpci_exit(void)
> +{
> +     mdev_unregister_driver(&vfio_mpci_driver);
> +}
> +
> +module_init(vfio_mpci_init)
> +module_exit(vfio_mpci_exit)
> +
> +MODULE_VERSION(DRIVER_VERSION);
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR(DRIVER_AUTHOR);
> +MODULE_DESCRIPTION(DRIVER_DESC);
> diff --git a/drivers/vfio/pci/vfio_pci_private.h 
> b/drivers/vfio/pci/vfio_pci_private.h
> index 8a7d546d18a0..04a450908ffb 100644
> --- a/drivers/vfio/pci/vfio_pci_private.h
> +++ b/drivers/vfio/pci/vfio_pci_private.h
> @@ -19,12 +19,6 @@
>  #ifndef VFIO_PCI_PRIVATE_H
>  #define VFIO_PCI_PRIVATE_H
>  
> -#define VFIO_PCI_OFFSET_SHIFT   40
> -
> -#define VFIO_PCI_OFFSET_TO_INDEX(off)        (off >> VFIO_PCI_OFFSET_SHIFT)
> -#define VFIO_PCI_INDEX_TO_OFFSET(index)      ((u64)(index) << 
> VFIO_PCI_OFFSET_SHIFT)
> -#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
> -
>  /* Special capability IDs predefined access */
>  #define PCI_CAP_ID_INVALID           0xFF    /* default raw access */
>  #define PCI_CAP_ID_INVALID_VIRT              0xFE    /* default virt access 
> */
> diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c 
> b/drivers/vfio/pci/vfio_pci_rdwr.c
> index 5ffd1d9ad4bd..5b912be9d9c3 100644
> --- a/drivers/vfio/pci/vfio_pci_rdwr.c
> +++ b/drivers/vfio/pci/vfio_pci_rdwr.c
> @@ -18,6 +18,7 @@
>  #include <linux/uaccess.h>
>  #include <linux/io.h>
>  #include <linux/vgaarb.h>
> +#include <linux/vfio.h>
>  
>  #include "vfio_pci_private.h"
>  
> diff --git a/include/linux/vfio.h b/include/linux/vfio.h
> index 0ecae0b1cd34..431b824b0d3e 100644
> --- a/include/linux/vfio.h
> +++ b/include/linux/vfio.h
> @@ -18,6 +18,13 @@
>  #include <linux/poll.h>
>  #include <uapi/linux/vfio.h>
>  
> +#define VFIO_PCI_OFFSET_SHIFT   40
> +
> +#define VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> VFIO_PCI_OFFSET_SHIFT)
> +#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << 
> VFIO_PCI_OFFSET_SHIFT)
> +#define VFIO_PCI_OFFSET_MASK    (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
> +
> +
>  /**
>   * struct vfio_device_ops - VFIO bus driver device callbacks
>   *




reply via email to

[Prev in Thread] Current Thread [Next in Thread]