qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH v7 2/4] vfio: VFIO driver for mediated devices


From: Kirti Wankhede
Subject: [Qemu-devel] [PATCH v7 2/4] vfio: VFIO driver for mediated devices
Date: Thu, 25 Aug 2016 09:23:53 +0530

VFIO MDEV driver registers with MDEV core driver. MDEV core driver creates
mediated device and calls probe routine of MPCI VFIO driver. This driver
adds mediated device to VFIO core module.
Main aim of this module is to manage all VFIO APIs for each mediated
device. Those are:
- get VFIO device information about type of device, maximum number of
  regions and maximum number of interrupts supported.
- get region information from vendor driver.
- Get interrupt information and send interrupt configuration information to
  vendor driver.
- Device reset
- Trap and forward read/write for emulated regions.

Signed-off-by: Kirti Wankhede <address@hidden>
Signed-off-by: Neo Jia <address@hidden>
Change-Id: I583f4734752971d3d112324d69e2508c88f359ec
Reviewed-on: http://git-master/r/1175706
Reviewed-by: Automatic_Commit_Validation_User
---
 drivers/vfio/mdev/Kconfig           |   6 +
 drivers/vfio/mdev/Makefile          |   1 +
 drivers/vfio/mdev/vfio_mdev.c       | 467 ++++++++++++++++++++++++++++++++++++
 drivers/vfio/pci/vfio_pci_private.h |   6 +-
 4 files changed, 477 insertions(+), 3 deletions(-)
 create mode 100644 drivers/vfio/mdev/vfio_mdev.c

diff --git a/drivers/vfio/mdev/Kconfig b/drivers/vfio/mdev/Kconfig
index a34fbc66f92f..703abd0a9bff 100644
--- a/drivers/vfio/mdev/Kconfig
+++ b/drivers/vfio/mdev/Kconfig
@@ -9,4 +9,10 @@ config VFIO_MDEV
 
         If you don't know what do here, say N.
 
+config VFIO_MDEV_DEVICE
+    tristate "VFIO support for Mediated devices"
+    depends on VFIO && VFIO_MDEV
+    default n
+    help
+        VFIO based driver for mediated devices.
 
diff --git a/drivers/vfio/mdev/Makefile b/drivers/vfio/mdev/Makefile
index 56a75e689582..e5087ed83a34 100644
--- a/drivers/vfio/mdev/Makefile
+++ b/drivers/vfio/mdev/Makefile
@@ -2,4 +2,5 @@
 mdev-y := mdev_core.o mdev_sysfs.o mdev_driver.o
 
 obj-$(CONFIG_VFIO_MDEV) += mdev.o
+obj-$(CONFIG_VFIO_MDEV_DEVICE) += vfio_mdev.o
 
diff --git a/drivers/vfio/mdev/vfio_mdev.c b/drivers/vfio/mdev/vfio_mdev.c
new file mode 100644
index 000000000000..28f13aeaa46b
--- /dev/null
+++ b/drivers/vfio/mdev/vfio_mdev.c
@@ -0,0 +1,467 @@
+/*
+ * VFIO based Mediated PCI device driver
+ *
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *     Author: Neo Jia <address@hidden>
+ *            Kirti Wankhede <address@hidden>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/uuid.h>
+#include <linux/vfio.h>
+#include <linux/iommu.h>
+#include <linux/mdev.h>
+
+#include "mdev_private.h"
+
+#define DRIVER_VERSION  "0.1"
+#define DRIVER_AUTHOR   "NVIDIA Corporation"
+#define DRIVER_DESC     "VFIO based Mediated PCI device driver"
+
+struct vfio_mdev {
+       struct iommu_group *group;
+       struct mdev_device *mdev;
+       struct vfio_device_info dev_info;
+};
+
+static int vfio_mdev_open(void *device_data)
+{
+       int ret = 0;
+
+       if (!try_module_get(THIS_MODULE))
+               return -ENODEV;
+
+       return ret;
+}
+
+static void vfio_mdev_close(void *device_data)
+{
+       module_put(THIS_MODULE);
+}
+
+static int sparse_mmap_cap(struct vfio_info_cap *caps, void *cap_type)
+{
+       struct vfio_info_cap_header *header;
+       struct vfio_region_info_cap_sparse_mmap *sparse_cap, *sparse = cap_type;
+       size_t size;
+
+       size = sizeof(*sparse) + sparse->nr_areas *  sizeof(*sparse->areas);
+       header = vfio_info_cap_add(caps, size,
+                                  VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1);
+       if (IS_ERR(header))
+               return PTR_ERR(header);
+
+       sparse_cap = container_of(header,
+                       struct vfio_region_info_cap_sparse_mmap, header);
+       sparse_cap->nr_areas = sparse->nr_areas;
+       memcpy(sparse_cap->areas, sparse->areas,
+              sparse->nr_areas * sizeof(*sparse->areas));
+       return 0;
+}
+
+static int region_type_cap(struct vfio_info_cap *caps, void *cap_type)
+{
+       struct vfio_info_cap_header *header;
+       struct vfio_region_info_cap_type *type_cap, *cap = cap_type;
+
+       header = vfio_info_cap_add(caps, sizeof(*cap),
+                                  VFIO_REGION_INFO_CAP_TYPE, 1);
+       if (IS_ERR(header))
+               return PTR_ERR(header);
+
+       type_cap = container_of(header, struct vfio_region_info_cap_type,
+                               header);
+       type_cap->type = cap->type;
+       type_cap->subtype = cap->type;
+       return 0;
+}
+
+static long vfio_mdev_unlocked_ioctl(void *device_data,
+                                    unsigned int cmd, unsigned long arg)
+{
+       int ret = 0;
+       struct vfio_mdev *vmdev = device_data;
+       struct parent_device *parent = vmdev->mdev->parent;
+       unsigned long minsz;
+
+       switch (cmd) {
+       case VFIO_DEVICE_GET_INFO:
+       {
+               struct vfio_device_info info;
+
+               minsz = offsetofend(struct vfio_device_info, num_irqs);
+
+               if (copy_from_user(&info, (void __user *)arg, minsz))
+                       return -EFAULT;
+
+               if (info.argsz < minsz)
+                       return -EINVAL;
+
+               if (parent->ops->get_device_info)
+                       ret = parent->ops->get_device_info(vmdev->mdev, &info);
+               else
+                       return -EINVAL;
+
+               if (ret)
+                       return ret;
+
+               if (parent->ops->reset)
+                       info.flags |= VFIO_DEVICE_FLAGS_RESET;
+
+               memcpy(&vmdev->dev_info, &info, sizeof(info));
+
+               return copy_to_user((void __user *)arg, &info, minsz);
+       }
+       case VFIO_DEVICE_GET_REGION_INFO:
+       {
+               struct vfio_region_info info;
+               struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
+               u16 cap_type_id = 0;
+               void *cap_type = NULL;
+
+               minsz = offsetofend(struct vfio_region_info, offset);
+
+               if (copy_from_user(&info, (void __user *)arg, minsz))
+                       return -EFAULT;
+
+               if (info.argsz < minsz)
+                       return -EINVAL;
+
+               if (parent->ops->get_region_info)
+                       ret = parent->ops->get_region_info(vmdev->mdev, &info,
+                                                      &cap_type_id, &cap_type);
+               else
+                       return -EINVAL;
+
+               if (ret)
+                       return ret;
+
+               if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && cap_type) {
+                       switch (cap_type_id) {
+                       case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
+                               ret = sparse_mmap_cap(&caps, cap_type);
+                               if (ret)
+                                       return ret;
+                               break;
+
+                       case VFIO_REGION_INFO_CAP_TYPE:
+                               ret = region_type_cap(&caps, cap_type);
+                               if (ret)
+                                       return ret;
+                               break;
+                       default:
+                               return -EINVAL;
+                       }
+               }
+
+               if (caps.size) {
+                       if (info.argsz < sizeof(info) + caps.size) {
+                               info.argsz = sizeof(info) + caps.size;
+                               info.cap_offset = 0;
+                       } else {
+                               vfio_info_cap_shift(&caps, sizeof(info));
+                               if (copy_to_user((void __user *)arg +
+                                                       sizeof(info), caps.buf,
+                                                       caps.size)) {
+                                       kfree(caps.buf);
+                                       return -EFAULT;
+                               }
+                               info.cap_offset = sizeof(info);
+                       }
+                       kfree(caps.buf);
+               }
+
+               return copy_to_user((void __user *)arg, &info, minsz);
+       }
+       case VFIO_DEVICE_GET_IRQ_INFO:
+       {
+               struct vfio_irq_info info;
+
+               minsz = offsetofend(struct vfio_irq_info, count);
+
+               if (copy_from_user(&info, (void __user *)arg, minsz))
+                       return -EFAULT;
+
+               if ((info.argsz < minsz) ||
+                   (info.index >= vmdev->dev_info.num_irqs))
+                       return -EINVAL;
+
+               if (parent->ops->get_irq_info)
+                       ret = parent->ops->get_irq_info(vmdev->mdev, &info);
+               else
+                       return -EINVAL;
+
+               if (ret)
+                       return ret;
+
+               if (info.count == -1)
+                       return -EINVAL;
+
+               return copy_to_user((void __user *)arg, &info, minsz);
+       }
+       case VFIO_DEVICE_SET_IRQS:
+       {
+               struct vfio_irq_set hdr;
+               u8 *data = NULL, *ptr = NULL;
+
+               minsz = offsetofend(struct vfio_irq_set, count);
+
+               if (copy_from_user(&hdr, (void __user *)arg, minsz))
+                       return -EFAULT;
+
+               if ((hdr.argsz < minsz) ||
+                   (hdr.index >= vmdev->dev_info.num_irqs) ||
+                   (hdr.flags & ~(VFIO_IRQ_SET_DATA_TYPE_MASK |
+                                 VFIO_IRQ_SET_ACTION_TYPE_MASK)))
+                       return -EINVAL;
+
+               if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
+                       size_t size;
+
+                       if (hdr.flags & VFIO_IRQ_SET_DATA_BOOL)
+                               size = sizeof(uint8_t);
+                       else if (hdr.flags & VFIO_IRQ_SET_DATA_EVENTFD)
+                               size = sizeof(int32_t);
+                       else
+                               return -EINVAL;
+
+                       if (hdr.argsz - minsz < hdr.count * size)
+                               return -EINVAL;
+
+                       ptr = data = memdup_user((void __user *)(arg + minsz),
+                                                hdr.count * size);
+                       if (IS_ERR(data))
+                               return PTR_ERR(data);
+               }
+
+               if (parent->ops->set_irqs)
+                       ret = parent->ops->set_irqs(vmdev->mdev, hdr.flags,
+                                                   hdr.index, hdr.start,
+                                                   hdr.count, data);
+               else
+                       ret = -EINVAL;
+
+               kfree(ptr);
+               return ret;
+       }
+       case VFIO_DEVICE_RESET:
+       {
+               if (parent->ops->reset)
+                       return parent->ops->reset(vmdev->mdev);
+
+               return -EINVAL;
+       }
+       }
+       return -ENOTTY;
+}
+
+static ssize_t vfio_mdev_read(void *device_data, char __user *buf,
+                             size_t count, loff_t *ppos)
+{
+       struct vfio_mdev *vmdev = device_data;
+       struct mdev_device *mdev = vmdev->mdev;
+       struct parent_device *parent = mdev->parent;
+       unsigned int done = 0;
+       int ret;
+
+       if (!parent->ops->read)
+               return -EINVAL;
+
+       while (count) {
+               size_t filled;
+
+               if (count >= 4 && !(*ppos % 4)) {
+                       u32 val;
+
+                       ret = parent->ops->read(mdev, (char *)&val, sizeof(val),
+                                               *ppos);
+                       if (ret <= 0)
+                               goto read_err;
+
+                       if (copy_to_user(buf, &val, sizeof(val)))
+                               goto read_err;
+
+                       filled = 4;
+               } else if (count >= 2 && !(*ppos % 2)) {
+                       u16 val;
+
+                       ret = parent->ops->read(mdev, (char *)&val, sizeof(val),
+                                               *ppos);
+                       if (ret <= 0)
+                               goto read_err;
+
+                       if (copy_to_user(buf, &val, sizeof(val)))
+                               goto read_err;
+
+                       filled = 2;
+               } else {
+                       u8 val;
+
+                       ret = parent->ops->read(mdev, &val, sizeof(val), *ppos);
+                       if (ret <= 0)
+                               goto read_err;
+
+                       if (copy_to_user(buf, &val, sizeof(val)))
+                               goto read_err;
+
+                       filled = 1;
+               }
+
+               count -= filled;
+               done += filled;
+               *ppos += filled;
+               buf += filled;
+       }
+
+       return done;
+
+read_err:
+       return -EFAULT;
+}
+
+static ssize_t vfio_mdev_write(void *device_data, const char __user *buf,
+                              size_t count, loff_t *ppos)
+{
+       struct vfio_mdev *vmdev = device_data;
+       struct mdev_device *mdev = vmdev->mdev;
+       struct parent_device *parent = mdev->parent;
+       unsigned int done = 0;
+       int ret;
+
+       if (!parent->ops->write)
+               return -EINVAL;
+
+       while (count) {
+               size_t filled;
+
+               if (count >= 4 && !(*ppos % 4)) {
+                       u32 val;
+
+                       if (copy_from_user(&val, buf, sizeof(val)))
+                               goto write_err;
+
+                       ret = parent->ops->write(mdev, (char *)&val,
+                                                sizeof(val), *ppos);
+                       if (ret <= 0)
+                               goto write_err;
+
+                       filled = 4;
+               } else if (count >= 2 && !(*ppos % 2)) {
+                       u16 val;
+
+                       if (copy_from_user(&val, buf, sizeof(val)))
+                               goto write_err;
+
+                       ret = parent->ops->write(mdev, (char *)&val,
+                                                sizeof(val), *ppos);
+                       if (ret <= 0)
+                               goto write_err;
+
+                       filled = 2;
+               } else {
+                       u8 val;
+
+                       if (copy_from_user(&val, buf, sizeof(val)))
+                               goto write_err;
+
+                       ret = parent->ops->write(mdev, &val, sizeof(val),
+                                                *ppos);
+                       if (ret <= 0)
+                               goto write_err;
+
+                       filled = 1;
+               }
+
+               count -= filled;
+               done += filled;
+               *ppos += filled;
+               buf += filled;
+       }
+
+       return done;
+write_err:
+       return -EFAULT;
+}
+
+static int vfio_mdev_mmap(void *device_data, struct vm_area_struct *vma)
+{
+       struct vfio_mdev *vmdev = device_data;
+       struct mdev_device *mdev = vmdev->mdev;
+       struct parent_device *parent = mdev->parent;
+
+       if (parent->ops->mmap)
+               return parent->ops->mmap(mdev, vma);
+
+       return -EINVAL;
+}
+
+static const struct vfio_device_ops vfio_mdev_dev_ops = {
+       .name           = "vfio-mdev",
+       .open           = vfio_mdev_open,
+       .release        = vfio_mdev_close,
+       .ioctl          = vfio_mdev_unlocked_ioctl,
+       .read           = vfio_mdev_read,
+       .write          = vfio_mdev_write,
+       .mmap           = vfio_mdev_mmap,
+};
+
+int vfio_mdev_probe(struct device *dev)
+{
+       struct vfio_mdev *vmdev;
+       struct mdev_device *mdev = to_mdev_device(dev);
+       int ret;
+
+       vmdev = kzalloc(sizeof(*vmdev), GFP_KERNEL);
+       if (IS_ERR(vmdev))
+               return PTR_ERR(vmdev);
+
+       vmdev->mdev = mdev_get_device(mdev);
+       vmdev->group = mdev->group;
+
+       ret = vfio_add_group_dev(dev, &vfio_mdev_dev_ops, vmdev);
+       if (ret)
+               kfree(vmdev);
+
+       mdev_put_device(mdev);
+       return ret;
+}
+
+void vfio_mdev_remove(struct device *dev)
+{
+       struct vfio_mdev *vmdev;
+
+       vmdev = vfio_del_group_dev(dev);
+       kfree(vmdev);
+}
+
+struct mdev_driver vfio_mdev_driver = {
+       .name   = "vfio_mdev",
+       .probe  = vfio_mdev_probe,
+       .remove = vfio_mdev_remove,
+};
+
+static int __init vfio_mdev_init(void)
+{
+       return mdev_register_driver(&vfio_mdev_driver, THIS_MODULE);
+}
+
+static void __exit vfio_mdev_exit(void)
+{
+       mdev_unregister_driver(&vfio_mdev_driver);
+}
+
+module_init(vfio_mdev_init)
+module_exit(vfio_mdev_exit)
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/vfio/pci/vfio_pci_private.h 
b/drivers/vfio/pci/vfio_pci_private.h
index 016c14a1b454..776cc2b063d4 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -21,9 +21,9 @@
 
 #define VFIO_PCI_OFFSET_SHIFT   40
 
-#define VFIO_PCI_OFFSET_TO_INDEX(off)  (off >> VFIO_PCI_OFFSET_SHIFT)
-#define VFIO_PCI_INDEX_TO_OFFSET(index)        ((u64)(index) << 
VFIO_PCI_OFFSET_SHIFT)
-#define VFIO_PCI_OFFSET_MASK   (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
+#define VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> VFIO_PCI_OFFSET_SHIFT)
+#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
+#define VFIO_PCI_OFFSET_MASK    (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
 
 /* Special capability IDs predefined access */
 #define PCI_CAP_ID_INVALID             0xFF    /* default raw access */
-- 
2.7.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]