Re: [Qemu-devel] [RFC PATCH 3/5] VFIO: Base framework for new VFIO drive

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC PATCH 3/5] VFIO: Base framework for new VFIO drive

From:	Alex Williamson
Subject:	Re: [Qemu-devel] [RFC PATCH 3/5] VFIO: Base framework for new VFIO driver
Date:	Mon, 19 Sep 2011 10:42:56 -0600

Sorry for the delay, just getting back from LPC and some time off...

On Wed, 2011-09-07 at 10:52 -0400, Konrad Rzeszutek Wilk wrote:
> > +static long vfio_iommu_unl_ioctl(struct file *filep,
> > +                            unsigned int cmd, unsigned long arg)
> > +{
> > +   struct vfio_iommu *viommu = filep->private_data;
> > +   struct vfio_dma_map dm;
> > +   int ret = -ENOSYS;
> > +
> > +   switch (cmd) {
> > +   case VFIO_IOMMU_MAP_DMA:
> > +           if (copy_from_user(&dm, (void __user *)arg, sizeof dm))
> > +                   return -EFAULT;
> > +           ret = 0; // XXX - Do something
> 
> <chuckles>

Truly an RFC ;)

> > +           if (!ret && copy_to_user((void __user *)arg, &dm, sizeof dm))
> > +                   ret = -EFAULT;
> > +           break;
> > +
> > +   case VFIO_IOMMU_UNMAP_DMA:
> > +           if (copy_from_user(&dm, (void __user *)arg, sizeof dm))
> > +                   return -EFAULT;
> > +           ret = 0; // XXX - Do something
> > +           if (!ret && copy_to_user((void __user *)arg, &dm, sizeof dm))
> > +                   ret = -EFAULT;
> > +           break;
> > +   }
> > +   return ret;
> > +}
> > +
> > +#ifdef CONFIG_COMPAT
> > +static long vfio_iommu_compat_ioctl(struct file *filep,
> > +                               unsigned int cmd, unsigned long arg)
> > +{
> > +   arg = (unsigned long)compat_ptr(arg);
> > +   return vfio_iommu_unl_ioctl(filep, cmd, arg);
> > +}
> > +#endif     /* CONFIG_COMPAT */
> > +
> > +const struct file_operations vfio_iommu_fops = {
> > +   .owner          = THIS_MODULE,
> > +   .release        = vfio_iommu_release,
> > +   .unlocked_ioctl = vfio_iommu_unl_ioctl,
> > +#ifdef CONFIG_COMPAT
> > +   .compat_ioctl   = vfio_iommu_compat_ioctl,
> > +#endif
> > +};
> > diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
> .. snip..
> > +int vfio_group_add_dev(struct device *dev, void *data)
> > +{
> > +   struct vfio_device_ops *ops = data;
> > +   struct list_head *pos;
> > +   struct vfio_group *vgroup = NULL;
> > +   struct vfio_device *vdev = NULL;
> > +   unsigned int group;
> > +   int ret = 0, new_group = 0;
> 
> 'new_group' should probably be 'bool'.

ok

> > +
> > +   if (iommu_device_group(dev, &group))
> > +           return 0;
> 
> -EEXIST?

I think I made this return 0 because it's called from device add
notifiers and walking devices lists.  It's ok for it to fail, not all
devices have to be backed by an iommu, they just won't show up in vfio.
Maybe I should leave that to the leaf callers though.  EINVAL is
probably more appropriate.

> > +
> > +   mutex_lock(&vfio.group_lock);
> > +
> > +   list_for_each(pos, &vfio.group_list) {
> > +           vgroup = list_entry(pos, struct vfio_group, next);
> > +           if (vgroup->group == group)
> > +                   break;
> > +           vgroup = NULL;
> > +   }
> > +
> > +   if (!vgroup) {
> > +           int id;
> > +
> > +           if (unlikely(idr_pre_get(&vfio.idr, GFP_KERNEL) == 0)) {
> > +                   ret = -ENOMEM;
> > +                   goto out;
> > +           }
> > +           vgroup = kzalloc(sizeof(*vgroup), GFP_KERNEL);
> > +           if (!vgroup) {
> > +                   ret = -ENOMEM;
> > +                   goto out;
> > +           }
> > +
> > +           vgroup->group = group;
> > +           INIT_LIST_HEAD(&vgroup->device_list);
> > +
> > +           ret = idr_get_new(&vfio.idr, vgroup, &id);
> > +           if (ret == 0 && id > MINORMASK) {
> > +                   idr_remove(&vfio.idr, id);
> > +                   kfree(vgroup);
> > +                   ret = -ENOSPC;
> > +                   goto out;
> > +           }
> > +
> > +           vgroup->devt = MKDEV(MAJOR(vfio.devt), id);
> > +           list_add(&vgroup->next, &vfio.group_list);
> > +           device_create(vfio.class, NULL, vgroup->devt,
> > +                         vgroup, "%u", group);
> > +
> > +           new_group = 1;
> > +   } else {
> > +           list_for_each(pos, &vgroup->device_list) {
> > +                   vdev = list_entry(pos, struct vfio_device, next);
> > +                   if (vdev->dev == dev)
> > +                           break;
> > +                   vdev = NULL;
> > +           }
> > +   }
> > +
> > +   if (!vdev) {
> > +           /* Adding a device for a group that's already in use? */
> > +           /* Maybe we should attach to the domain so others can't */
> > +           BUG_ON(vgroup->container &&
> > +                  vgroup->container->iommu &&
> > +                  vgroup->container->iommu->refcnt);
> > +
> > +           vdev = ops->new(dev);
> > +           if (IS_ERR(vdev)) {
> > +                   /* If we just created this vgroup, tear it down */
> > +                   if (new_group) {
> > +                           device_destroy(vfio.class, vgroup->devt);
> > +                           idr_remove(&vfio.idr, MINOR(vgroup->devt));
> > +                           list_del(&vgroup->next);
> > +                           kfree(vgroup);
> > +                   }
> > +                   ret = PTR_ERR(vdev);
> > +                   goto out;
> > +           }
> > +           list_add(&vdev->next, &vgroup->device_list);
> > +           vdev->dev = dev;
> > +           vdev->ops = ops;
> > +           vdev->vfio = &vfio;
> > +   }
> > +out:
> > +   mutex_unlock(&vfio.group_lock);
> > +   return ret;
> > +}
> > +
> > +void vfio_group_del_dev(struct device *dev)
> > +{
> > +   struct list_head *pos;
> > +   struct vfio_container *vcontainer;
> > +   struct vfio_group *vgroup = NULL;
> > +   struct vfio_device *vdev = NULL;
> > +   unsigned int group;
> > +
> > +   if (iommu_device_group(dev, &group))
> > +           return;
> > +
> > +   mutex_lock(&vfio.group_lock);
> > +
> > +   list_for_each(pos, &vfio.group_list) {
> > +           vgroup = list_entry(pos, struct vfio_group, next);
> > +           if (vgroup->group == group)
> > +                   break;
> > +           vgroup = NULL;
> > +   }
> > +
> > +   if (!vgroup)
> > +           goto out;
> > +
> > +   vcontainer = vgroup->container;
> > +
> > +   list_for_each(pos, &vgroup->device_list) {
> > +           vdev = list_entry(pos, struct vfio_device, next);
> > +           if (vdev->dev == dev)
> > +                   break;
> > +           vdev = NULL;
> > +   }
> > +
> > +   if (!vdev)
> > +           goto out;
> > +
> > +   /* XXX Did a device we're using go away? */
> > +   BUG_ON(vdev->refcnt);
> > +
> > +   if (vcontainer && vcontainer->iommu) {
> > +           iommu_detach_device(vcontainer->iommu->domain, vdev->dev);
> > +           vfio_container_reset_read(vcontainer);
> > +   }
> > +
> > +   list_del(&vdev->next);
> > +   vdev->ops->free(vdev);
> > +
> > +   if (list_empty(&vgroup->device_list) && vgroup->refcnt == 0) {
> > +           device_destroy(vfio.class, vgroup->devt);
> > +           idr_remove(&vfio.idr, MINOR(vgroup->devt));
> > +           list_del(&vgroup->next);
> > +           kfree(vgroup);
> > +   }
> > +out:
> > +   mutex_unlock(&vfio.group_lock);
> > +}
> > +
> > +static int __vfio_group_viable(struct vfio_container *vcontainer)
> 
> Just return 'bool'

Sure

> > +{
> > +   struct list_head *gpos, *dpos;
> > +
> > +   list_for_each(gpos, &vfio.group_list) {
> > +           struct vfio_group *vgroup;
> > +           vgroup = list_entry(gpos, struct vfio_group, next);
> > +           if (vgroup->container != vcontainer)
> > +                   continue;
> > +
> > +           list_for_each(dpos, &vgroup->device_list) {
> > +                   struct vfio_device *vdev;
> > +                   vdev = list_entry(dpos, struct vfio_device, next);
> > +
> > +                   if (!vdev->dev->driver ||
> > +                       vdev->dev->driver->owner != THIS_MODULE)
> > +                           return 0;
> > +           }
> > +   }
> > +   return 1;
> > +}
> > +
> > +static int __vfio_close_iommu(struct vfio_container *vcontainer)
> > +{
> > +   struct list_head *gpos, *dpos;
> > +   struct vfio_iommu *viommu = vcontainer->iommu;
> > +   struct vfio_group *vgroup;
> > +   struct vfio_device *vdev;
> > +
> > +   if (!viommu)
> > +           return 0;
> > +
> > +   if (viommu->refcnt)
> > +           return -EBUSY;
> > +
> > +   list_for_each(gpos, &vfio.group_list) {
> > +           vgroup = list_entry(gpos, struct vfio_group, next);
> > +           if (vgroup->container != vcontainer)
> > +                   continue;
> > +
> > +           list_for_each(dpos, &vgroup->device_list) {
> > +                   vdev = list_entry(dpos, struct vfio_device, next);
> > +                   iommu_detach_device(viommu->domain, vdev->dev);
> > +                   vdev->iommu = NULL;
> > +           }
> > +   }
> > +   iommu_domain_free(viommu->domain);
> > +   kfree(viommu);
> > +   vcontainer->iommu = NULL;
> > +   return 0;
> > +}
> > +
> > +static int __vfio_open_iommu(struct vfio_container *vcontainer)
> > +{
> > +   struct list_head *gpos, *dpos;
> > +   struct vfio_iommu *viommu;
> > +   struct vfio_group *vgroup;
> > +   struct vfio_device *vdev;
> > +
> > +   if (!__vfio_group_viable(vcontainer))
> > +           return -EBUSY;
> > +
> > +   viommu = kzalloc(sizeof(*viommu), GFP_KERNEL);
> > +   if (!viommu)
> > +           return -ENOMEM;
> > +
> > +   viommu->domain = iommu_domain_alloc();
> > +   if (!viommu->domain) {
> > +           kfree(viommu);
> > +           return -EFAULT;
> > +   }
> > +
> > +   viommu->vfio = &vfio;
> > +   vcontainer->iommu = viommu;
> > +
> 
> No need for
>   mutex_lock(&vfio.group_lock);
> 
> Ah, you already hold the lock when using this function.

Right, just really simple, broad locking right now.

> > +   list_for_each(gpos, &vfio.group_list) {
> > +           vgroup = list_entry(gpos, struct vfio_group, next);
> > +           if (vgroup->container != vcontainer)
> > +                   continue;
> > +
> > +           list_for_each(dpos, &vgroup->device_list) {
> > +                   int ret;
> > +
> > +                   vdev = list_entry(dpos, struct vfio_device, next);
> > +
> > +                   ret = iommu_attach_device(viommu->domain, vdev->dev);
> > +                   if (ret) {
> > +                           __vfio_close_iommu(vcontainer);
> > +                           return ret;
> > +                   }
> > +                   vdev->iommu = viommu;
> > +           }
> > +   }
> > +
> > +   if (!allow_unsafe_intrs &&
> > +       !iommu_domain_has_cap(viommu->domain, IOMMU_CAP_INTR_REMAP)) {
> > +           __vfio_close_iommu(vcontainer);
> > +           return -EFAULT;
> > +   }
> > +
> > +   return 0;
> > +}
> > +
> > +static int vfio_group_merge(struct vfio_group *vgroup, int fd)
> > +{
> > +   struct vfio_group *vgroup2;
> > +   struct iommu_domain *domain;
> > +   struct list_head *pos;
> > +   struct file *file;
> > +   int ret = 0;
> > +
> > +   mutex_lock(&vfio.group_lock);
> > +
> > +   file = fget(fd);
> > +   if (!file) {
> > +           ret = -EBADF;
> > +           goto out_noput;
> > +   }
> > +   if (file->f_op != &vfio_group_fops) {
> > +           ret = -EINVAL;
> > +           goto out;
> > +   }
> > +
> > +   vgroup2 = file->private_data;
> > +   if (!vgroup2 || vgroup2 == vgroup || vgroup2->mm != vgroup->mm ||
> > +       (vgroup2->container->iommu && vgroup2->container->iommu->refcnt)) {
> > +           ret = -EINVAL;
> > +           goto out;
> > +   }
> > +
> > +   if (!vgroup->container->iommu) {
> > +           ret = __vfio_open_iommu(vgroup->container);
> > +           if (ret)
> > +                   goto out;
> > +   }
> > +
> > +   if (!vgroup2->container->iommu) {
> > +           ret = __vfio_open_iommu(vgroup2->container);
> > +           if (ret)
> > +                   goto out;
> > +   }
> > +
> > +   if (iommu_domain_has_cap(vgroup->container->iommu->domain,
> > +                            IOMMU_CAP_CACHE_COHERENCY) !=
> > +       iommu_domain_has_cap(vgroup2->container->iommu->domain,
> > +                            IOMMU_CAP_CACHE_COHERENCY)) {
> > +           ret = -EINVAL;
> > +           goto out;
> > +   }
> > +
> > +   ret = __vfio_close_iommu(vgroup2->container);
> > +   if (ret)
> > +           goto out;
> > +
> > +   domain = vgroup->container->iommu->domain;
> > +
> > +   list_for_each(pos, &vgroup2->device_list) {
> > +           struct vfio_device *vdev;
> > +
> > +           vdev = list_entry(pos, struct vfio_device, next);
> > +
> > +           ret = iommu_attach_device(domain, vdev->dev);
> > +           if (ret) {
> > +                   list_for_each(pos, &vgroup2->device_list) {
> > +                           struct vfio_device *vdev2;
> > +
> > +                           vdev2 = list_entry(pos,
> > +                                              struct vfio_device, next);
> > +                           if (vdev2 == vdev)
> > +                                   break;
> > +
> > +                           iommu_detach_device(domain, vdev2->dev);
> > +                           vdev2->iommu = NULL;
> > +                   }
> > +                   goto out;
> > +           }
> > +           vdev->iommu = vgroup->container->iommu;
> > +   }
> > +
> > +   kfree(vgroup2->container->read_buf);
> > +   kfree(vgroup2->container);
> > +
> > +   vgroup2->container = vgroup->container;
> > +   vgroup->container->refcnt++;
> > +   vfio_container_reset_read(vgroup->container);
> > +
> > +out:
> > +   fput(file);
> > +out_noput:
> > +   mutex_unlock(&vfio.group_lock);
> > +   return ret;
> > +}
> > +
> > +static int vfio_group_unmerge(struct vfio_group *vgroup, int fd)
> > +{
> > +   struct vfio_group *vgroup2;
> > +   struct vfio_container *vcontainer2;
> > +   struct vfio_device *vdev;
> > +   struct list_head *pos;
> > +   struct file *file;
> > +   int ret = 0;
> > +
> > +   vcontainer2 = kzalloc(sizeof(*vcontainer2), GFP_KERNEL);
> > +   if (!vcontainer2)
> > +           return -ENOMEM;
> > +
> > +   mutex_lock(&vfio.group_lock);
> > +
> > +   file = fget(fd);
> > +   if (!file) {
> > +           ret = -EBADF;
> > +           goto out_noput;
> > +   }
> > +   if (file->f_op != &vfio_group_fops) {
> 
> Hm, I think scripts/checkpath.pl will not like that, but as
> you said - it is RFC.

Will check

Thanks for the review!

Alex

[Prev in Thread]

Current Thread

[Next in Thread]

[Qemu-devel] [RFC PATCH 0/5] VFIO-NG group/device/iommu framework, Alex Williamson, 2011/09/01
- [Qemu-devel] [RFC PATCH 1/5] iommu: Add iommu_device_group callback and iommu_group sysfs entry, Alex Williamson, 2011/09/01
- [Qemu-devel] [RFC PATCH 2/5] intel-iommu: Implement iommu_device_group, Alex Williamson, 2011/09/01
- [Qemu-devel] [RFC PATCH 3/5] VFIO: Base framework for new VFIO driver, Alex Williamson, 2011/09/01
  - Re: [Qemu-devel] [RFC PATCH 3/5] VFIO: Base framework for new VFIO driver, Konrad Rzeszutek Wilk, 2011/09/07
    - Re: [Qemu-devel] [RFC PATCH 3/5] VFIO: Base framework for new VFIO driver, Alex Williamson <=
- [Qemu-devel] [RFC PATCH 5/5] VFIO: Simple test tool, Alex Williamson, 2011/09/01
- [Qemu-devel] [RFC PATCH 4/5] VFIO: Add PCI device support, Alex Williamson, 2011/09/01
  - Re: [Qemu-devel] [RFC PATCH 4/5] VFIO: Add PCI device support, Konrad Rzeszutek Wilk, 2011/09/07
    - Re: [Qemu-devel] [RFC PATCH 4/5] VFIO: Add PCI device support, Avi Kivity, 2011/09/08
    - Re: [Qemu-devel] [RFC PATCH 4/5] VFIO: Add PCI device support, Alex Williamson, 2011/09/08
- Re: [Qemu-devel] [RFC PATCH 0/5] VFIO-NG group/device/iommu framework, Alexander Graf, 2011/09/07
  - Re: [Qemu-devel] [RFC PATCH 0/5] VFIO-NG group/device/iommu framework, Alex Williamson, 2011/09/08

Prev by Date: Re: [Qemu-devel] [PATCH 5/8] tcg: Add interpreter for bytecode
Next by Date: Re: [Qemu-devel] [Qemu-ppc] [PATCH 33/58] KVM: update kernel headers
Previous by thread: Re: [Qemu-devel] [RFC PATCH 3/5] VFIO: Base framework for new VFIO driver
Next by thread: [Qemu-devel] [RFC PATCH 5/5] VFIO: Simple test tool
Index(es):
- Date
- Thread