[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
RE: [PATCH v5 03/20] vfio/iommufd: Implement the iommufd backend
|
From: |
Duan, Zhenzhong |
|
Subject: |
RE: [PATCH v5 03/20] vfio/iommufd: Implement the iommufd backend |
|
Date: |
Fri, 10 Nov 2023 10:18:59 +0000 |
>-----Original Message-----
>From: Cédric Le Goater <clg@redhat.com>
>Sent: Friday, November 10, 2023 5:34 PM
>Subject: Re: [PATCH v5 03/20] vfio/iommufd: Implement the iommufd backend
>
>On 11/9/23 12:45, Zhenzhong Duan wrote:
>> From: Yi Liu <yi.l.liu@intel.com>
>>
>> Add the iommufd backend. The IOMMUFD container class is implemented
>> based on the new /dev/iommu user API. This backend obviously depends
>> on CONFIG_IOMMUFD.
>>
>> So far, the iommufd backend doesn't support dirty page sync yet due
>> to missing support in the host kernel.
>>
>> Co-authored-by: Eric Auger <eric.auger@redhat.com>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> v5: Switch to IOAS attach/detach and hide hwpt
>>
>> include/hw/vfio/vfio-common.h | 11 +
>> hw/vfio/common.c | 20 +-
>> hw/vfio/iommufd.c | 429 ++++++++++++++++++++++++++++++++++
>> hw/vfio/meson.build | 3 +
>> hw/vfio/trace-events | 10 +
>> 5 files changed, 469 insertions(+), 4 deletions(-)
>> create mode 100644 hw/vfio/iommufd.c
>>
>> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
>> index 24ecc0e7ee..3dac5c167e 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -89,6 +89,14 @@ typedef struct VFIOHostDMAWindow {
>> QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next;
>> } VFIOHostDMAWindow;
>>
>> +typedef struct IOMMUFDBackend IOMMUFDBackend;
>> +
>> +typedef struct VFIOIOMMUFDContainer {
>> + VFIOContainerBase bcontainer;
>> + IOMMUFDBackend *be;
>> + uint32_t ioas_id;
>> +} VFIOIOMMUFDContainer;
>> +
>> typedef struct VFIODeviceOps VFIODeviceOps;
>>
>> typedef struct VFIODevice {
>> @@ -116,6 +124,8 @@ typedef struct VFIODevice {
>> OnOffAuto pre_copy_dirty_page_tracking;
>> bool dirty_pages_supported;
>> bool dirty_tracking;
>> + int devid;
>> + IOMMUFDBackend *iommufd;
>> } VFIODevice;
>>
>> struct VFIODeviceOps {
>> @@ -201,6 +211,7 @@ typedef QLIST_HEAD(VFIODeviceList, VFIODevice)
>VFIODeviceList;
>> extern VFIOGroupList vfio_group_list;
>> extern VFIODeviceList vfio_device_list;
>> extern const VFIOIOMMUOps vfio_legacy_ops;
>> +extern const VFIOIOMMUOps vfio_iommufd_ops;
>> extern const MemoryListener vfio_memory_listener;
>> extern int vfio_kvm_device_fd;
>>
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 572ae7c934..3b7e11158f 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -19,6 +19,7 @@
>> */
>>
>> #include "qemu/osdep.h"
>> +#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
>> #include <sys/ioctl.h>
>> #ifdef CONFIG_KVM
>> #include <linux/kvm.h>
>> @@ -1462,10 +1463,13 @@ VFIOAddressSpace
>*vfio_get_address_space(AddressSpace *as)
>>
>> void vfio_put_address_space(VFIOAddressSpace *space)
>> {
>> - if (QLIST_EMPTY(&space->containers)) {
>> - QLIST_REMOVE(space, list);
>> - g_free(space);
>> + if (!QLIST_EMPTY(&space->containers)) {
>> + return;
>
>I think this change deserves to be in a separate patch, even if simple.
>Is there some relation with iommufd ? This is not clear.
OK, will do. It's unrelated to iommufd, just avoid unnecessary check below.
>
>> }
>> +
>> + QLIST_REMOVE(space, list);
>> + g_free(space);
>> +
>> if (QLIST_EMPTY(&vfio_address_spaces)) {
>> qemu_unregister_reset(vfio_reset_handler, NULL);
>> }
>> @@ -1498,8 +1502,16 @@ retry:
>> int vfio_attach_device(char *name, VFIODevice *vbasedev,
>> AddressSpace *as, Error **errp)
>> {
>> - const VFIOIOMMUOps *ops = &vfio_legacy_ops;
>> + const VFIOIOMMUOps *ops;
>>
>> +#ifdef CONFIG_IOMMUFD
>> + if (vbasedev->iommufd) {
>> + ops = &vfio_iommufd_ops;
>> + } else
>> +#endif
>> + {
>> + ops = &vfio_legacy_ops;
>> + }
>
>Simply adding :
>
> +#ifdef CONFIG_IOMMUFD
> + if (vbasedev->iommufd) {
> + ops = &vfio_iommufd_ops;
> + }
> +#endif
>
>would have the same effect with less change.
Indeed, will do.
>
>That said, it would also be nice to find a way to avoid the use of
>CONFIG_IOMMUFD in hw/vfio/common.c. May be with a helper returning
>'const VFIOIOMMUOps *'. This is minor. Still, I find some redundancy
>with vfio_container_init() and I don't a good alternative yet :)
Sure, will do, guess you mean a helper function in hw/vfio/helpers.c with
CONFIG_IOMMUFD check?
>
>
>> return ops->attach_device(name, vbasedev, as, errp);
>> }
>>
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> new file mode 100644
>> index 0000000000..ea4e23f4ec
>> --- /dev/null
>> +++ b/hw/vfio/iommufd.c
>> @@ -0,0 +1,429 @@
>> +/*
>> + * iommufd container backend
>> + *
>> + * Copyright (C) 2023 Intel Corporation.
>> + * Copyright Red Hat, Inc. 2023
>> + *
>> + * Authors: Yi Liu <yi.l.liu@intel.com>
>> + * Eric Auger <eric.auger@redhat.com>
>> + *
>> + * SPDX-License-Identifier: GPL-2.0-or-later
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include <sys/ioctl.h>
>> +#include <linux/vfio.h>
>> +#include <linux/iommufd.h>
>> +
>> +#include "hw/vfio/vfio-common.h"
>> +#include "qemu/error-report.h"
>> +#include "trace.h"
>> +#include "qapi/error.h"
>> +#include "sysemu/iommufd.h"
>> +#include "hw/qdev-core.h"
>> +#include "sysemu/reset.h"
>> +#include "qemu/cutils.h"
>> +#include "qemu/chardev_open.h"
>> +
>> +static int iommufd_map(VFIOContainerBase *bcontainer, hwaddr iova,
>> + ram_addr_t size, void *vaddr, bool readonly)
>> +{
>> + VFIOIOMMUFDContainer *container =
>> + container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
>> +
>> + return iommufd_backend_map_dma(container->be,
>> + container->ioas_id,
>> + iova, size, vaddr, readonly);
>> +}
>> +
>> +static int iommufd_unmap(VFIOContainerBase *bcontainer,
>> + hwaddr iova, ram_addr_t size,
>> + IOMMUTLBEntry *iotlb)
>> +{
>> + VFIOIOMMUFDContainer *container =
>> + container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
>> +
>> + /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */
>> + return iommufd_backend_unmap_dma(container->be,
>> + container->ioas_id, iova, size);
>> +}
>> +
>> +static void iommufd_cdev_kvm_device_add(VFIODevice *vbasedev)
>> +{
>> + Error *err = NULL;
>> +
>> + if (vfio_kvm_device_add_fd(vbasedev->fd, &err)) {
>> + error_report_err(err);
>
>We should propagate this error to the callers instead.
This is to follow legacy backend where the error doesn't treated as
a serious issue.
>
>> + }
>> +}
>> +
>> +static void iommufd_cdev_kvm_device_del(VFIODevice *vbasedev)
>> +{
>> + Error *err = NULL;
>> +
>> + if (vfio_kvm_device_del_fd(vbasedev->fd, &err)) {
>> + error_report_err(err);
>
>Possibly this one also but It might be more complex. Let's keep it that
>way.
>
>> + }
>> +}
>> +
>> +static int iommufd_connect_and_bind(VFIODevice *vbasedev, Error **errp)
>> +{
>> + IOMMUFDBackend *iommufd = vbasedev->iommufd;
>> + struct vfio_device_bind_iommufd bind = {
>> + .argsz = sizeof(bind),
>> + .flags = 0,
>> + };
>> + int ret;
>> +
>> + ret = iommufd_backend_connect(iommufd, errp);
>> + if (ret) {
>> + return ret;
>> + }
>> +
>> + /*
>> + * Add device to kvm-vfio to be prepared for the tracking
>> + * in KVM. Especially for some emulated devices, it requires
>> + * to have kvm information in the device open.
>> + */
>> + iommufd_cdev_kvm_device_add(vbasedev);
>
>We shoud return a possible error.
This is to follow legacy backend where this error is reported and ignored.
Do we need a difference for iommufd BE?
>
>> +
>> + /* Bind device to iommufd */
>> + bind.iommufd = iommufd->fd;
>> + ret = ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind);
>> + if (ret) {
>> + error_setg_errno(errp, errno, "error bind device fd=%d to
>> iommufd=%d",
>> + vbasedev->fd, bind.iommufd);
>> + goto err_bind;
>> + }
>> +
>> + vbasedev->devid = bind.out_devid;
>> + trace_iommufd_connect_and_bind(bind.iommufd, vbasedev->name,
>vbasedev->fd,
>> + vbasedev->devid);
>> + return ret;
>> +err_bind:
>> + iommufd_cdev_kvm_device_del(vbasedev);
>> + iommufd_backend_disconnect(iommufd);
>
>These two calls look like iommufd_unbind_and_disconnect() below.
Yes, they are same as iommufd doesn't support explicit device unbind.
But it looks strange to call iommufd_unbind_and_disconnect in
iommufd_connect_and_bind.
>
>> + return ret;
>> +}
>> +
>> +static void iommufd_unbind_and_disconnect(VFIODevice *vbasedev)
>> +{
>> + /* Unbind is automatically conducted when device fd is closed */
>> + iommufd_cdev_kvm_device_del(vbasedev);
>> + iommufd_backend_disconnect(vbasedev->iommufd);
>> +}
>> +
>> +static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
>> +{
>> + long int ret = -ENOTTY;
>> + char *path, *vfio_dev_path = NULL, *vfio_path = NULL;
>> + DIR *dir = NULL;
>> + struct dirent *dent;
>> + gchar *contents;
>> + struct stat st;
>> + gsize length;
>> + int major, minor;
>> + dev_t vfio_devt;
>> +
>> + path = g_strdup_printf("%s/vfio-dev", sysfs_path);
>> + if (stat(path, &st) < 0) {
>> + error_setg_errno(errp, errno, "no such host device");
>> + goto out_free_path;
>> + }
>> +
>> + dir = opendir(path);
>> + if (!dir) {
>> + error_setg_errno(errp, errno, "couldn't open dirrectory %s", path);
>
>
>directory
Good catch, will fix.
Thanks
Zhenzhong
[PATCH v5 04/20] vfio/iommufd: Relax assert check for iommufd backend, Zhenzhong Duan, 2023/11/09
[PATCH v5 05/20] vfio/iommufd: Add support for iova_ranges and pgsizes, Zhenzhong Duan, 2023/11/09
[PATCH v5 06/20] vfio/pci: Extract out a helper vfio_pci_get_pci_hot_reset_info, Zhenzhong Duan, 2023/11/09