qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v1 03/24] vfio-user: add container IO ops vector


From: Cédric Le Goater
Subject: Re: [PATCH v1 03/24] vfio-user: add container IO ops vector
Date: Fri, 9 Dec 2022 17:10:55 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Thunderbird/102.5.1

Hello John,

On 11/9/22 00:13, John Johnson wrote:
Used for communication with VFIO driver
(prep work for vfio-user, which will communicate over a socket)

Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
---
  hw/vfio/common.c              | 126 ++++++++++++++++++++++++++++--------------
  include/hw/vfio/vfio-common.h |  33 +++++++++++
  2 files changed, 117 insertions(+), 42 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index ace9562..83d69b9 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -432,12 +432,12 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container,
          goto unmap_exit;
      }
- ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
+    ret = CONT_DMA_UNMAP(container, unmap, bitmap);

I am not sure these macros are very useful, compared to :

    container->ops->dma_unmap(container, unmap, bitmap);

      if (!ret) {
          cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data,
                  iotlb->translated_addr, pages);
      } else {
-        error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m");
+        error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %s", strerror(-ret));
      }
g_free(bitmap->data);
@@ -465,30 +465,7 @@ static int vfio_dma_unmap(VFIOContainer *container,
          return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
      }
- while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
-        /*
-         * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
-         * v4.15) where an overflow in its wrap-around check prevents us from
-         * unmapping the last page of the address space.  Test for the error
-         * condition and re-try the unmap excluding the last page.  The
-         * expectation is that we've never mapped the last page anyway and this
-         * unmap request comes via vIOMMU support which also makes it unlikely
-         * that this page is used.  This bug was introduced well after type1 v2
-         * support was introduced, so we shouldn't need to test for v1.  A fix
-         * is queued for kernel v5.0 so this workaround can be removed once
-         * affected kernels are sufficiently deprecated.
-         */
-        if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) &&
-            container->iommu_type == VFIO_TYPE1v2_IOMMU) {
-            trace_vfio_dma_unmap_overflow_workaround();
-            unmap.size -= 1ULL << ctz64(container->pgsizes);
-            continue;
-        }
-        error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno));
-        return -errno;
-    }
-
-    return 0;
+    return CONT_DMA_UNMAP(container, &unmap, NULL);
  }
static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
@@ -501,24 +478,18 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr 
iova,
          .iova = iova,
          .size = size,
      };
+    int ret;
if (!readonly) {
          map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
      }
- /*
-     * Try the mapping, if it fails with EBUSY, unmap the region and try
-     * again.  This shouldn't be necessary, but we sometimes see it in
-     * the VGA ROM space.
-     */
-    if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
-        (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 &&
-         ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
-        return 0;
-    }
+    ret = CONT_DMA_MAP(container, &map);
- error_report("VFIO_MAP_DMA failed: %s", strerror(errno));
-    return -errno;
+    if (ret < 0) {
+        error_report("VFIO_MAP_DMA failed: %s", strerror(-ret));
+    }
+    return ret;
  }
static void vfio_host_win_add(VFIOContainer *container,
@@ -1263,10 +1234,10 @@ static void vfio_set_dirty_page_tracking(VFIOContainer 
*container, bool start)
          dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP;
      }
- ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty);
+    ret = CONT_DIRTY_BITMAP(container, &dirty, NULL);
      if (ret) {
          error_report("Failed to set dirty tracking flag 0x%x errno: %d",
-                     dirty.flags, errno);
+                     dirty.flags, -ret);
      }
  }
@@ -1316,11 +1287,11 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
          goto err_out;
      }
- ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap);
+    ret = CONT_DIRTY_BITMAP(container, dbitmap, range);
      if (ret) {
          error_report("Failed to get dirty bitmap for iova: 0x%"PRIx64
                  " size: 0x%"PRIx64" err: %d", (uint64_t)range->iova,
-                (uint64_t)range->size, errno);
+                (uint64_t)range->size, -ret);
          goto err_out;
      }
@@ -2090,6 +2061,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
      container->error = NULL;
      container->dirty_pages_supported = false;
      container->dma_max_mappings = 0;
+    container->io_ops = &vfio_cont_io_ioctl;
      QLIST_INIT(&container->giommu_list);
      QLIST_INIT(&container->hostwin_list);
      QLIST_INIT(&container->vrdl_list);
@@ -2626,3 +2598,73 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
      }
      return vfio_eeh_container_op(container, op);
  }
+
+/*
+ * Traditional ioctl() based io_ops
+ */
+
+static int vfio_io_dma_map(VFIOContainer *container,
+                           struct vfio_iommu_type1_dma_map *map)
+{
+
+    /*
+     * Try the mapping, if it fails with EBUSY, unmap the region and try
+     * again.  This shouldn't be necessary, but we sometimes see it in
+     * the VGA ROM space.
+     */
+    if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, map) == 0 ||
+        (errno == EBUSY &&
+         vfio_dma_unmap(container, map->iova, map->size, NULL) == 0 &&
+         ioctl(container->fd, VFIO_IOMMU_MAP_DMA, map) == 0)) {
+        return 0;
+    }
+    return -errno;
+}
+
+static int vfio_io_dma_unmap(VFIOContainer *container,
+                             struct vfio_iommu_type1_dma_unmap *unmap,
+                             struct vfio_bitmap *bitmap)
+{
+
+    while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap)) {
+        /*
+         * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
+         * v4.15) where an overflow in its wrap-around check prevents us from
+         * unmapping the last page of the address space.  Test for the error
+         * condition and re-try the unmap excluding the last page.  The
+         * expectation is that we've never mapped the last page anyway and this
+         * unmap request comes via vIOMMU support which also makes it unlikely
+         * that this page is used.  This bug was introduced well after type1 v2
+         * support was introduced, so we shouldn't need to test for v1.  A fix
+         * is queued for kernel v5.0 so this workaround can be removed once
+         * affected kernels are sufficiently deprecated.
+         */
+        if (errno == EINVAL && unmap->size && !(unmap->iova + unmap->size) &&
+            container->iommu_type == VFIO_TYPE1v2_IOMMU) {
+            trace_vfio_dma_unmap_overflow_workaround();
+            unmap->size -= 1ULL << ctz64(container->pgsizes);
+            continue;
+        }
+        error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno));
+        return -errno;
+    }
+
+    return 0;
+}
+
+static int vfio_io_dirty_bitmap(VFIOContainer *container,
+                                struct vfio_iommu_type1_dirty_bitmap *bitmap,
+                                struct vfio_iommu_type1_dirty_bitmap_get 
*range)
+{
+    int ret;
+
+    ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, bitmap);
+
+    return ret < 0 ? -errno : ret;
+}
+
+VFIOContIO vfio_cont_io_ioctl = {
+    .dma_map = vfio_io_dma_map,
+    .dma_unmap = vfio_io_dma_unmap,
+    .dirty_bitmap = vfio_io_dirty_bitmap,
+};
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index e573f5a..6fd40f1 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -75,6 +75,7 @@ typedef struct VFIOAddressSpace {
  } VFIOAddressSpace;
struct VFIOGroup;
+typedef struct VFIOContIO VFIOContIO;
typedef struct VFIOContainer {
      VFIOAddressSpace *space;
@@ -83,6 +84,7 @@ typedef struct VFIOContainer {
      MemoryListener prereg_listener;
      unsigned iommu_type;
      Error *error;
+    VFIOContIO *io_ops;

ops should be enough.

      bool initialized;
      bool dirty_pages_supported;
      uint64_t dirty_pgsizes;
@@ -154,6 +156,37 @@ struct VFIODeviceOps {
      int (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f);
  };
+#ifdef CONFIG_LINUX
+
+/*
+ * The next 2 ops vectors are how Devices and Containers
+ * communicate with the server.  The default option is
+ * through ioctl() to the kernel VFIO driver, but vfio-user
+ * can use a socket to a remote process.
+ */
+
+struct VFIOContIO {

VFIOContainerOps seems more adequate with the current VFIO terminology
in QEMU.

Thanks,

C.

+    int (*dma_map)(VFIOContainer *container,
+                   struct vfio_iommu_type1_dma_map *map);
+    int (*dma_unmap)(VFIOContainer *container,
+                     struct vfio_iommu_type1_dma_unmap *unmap,
+                     struct vfio_bitmap *bitmap);
+    int (*dirty_bitmap)(VFIOContainer *container,
+                        struct vfio_iommu_type1_dirty_bitmap *bitmap,
+                        struct vfio_iommu_type1_dirty_bitmap_get *range);
+};
+
+#define CONT_DMA_MAP(cont, map) \
+    ((cont)->io_ops->dma_map((cont), (map)))
+#define CONT_DMA_UNMAP(cont, unmap, bitmap) \
+    ((cont)->io_ops->dma_unmap((cont), (unmap), (bitmap)))
+#define CONT_DIRTY_BITMAP(cont, bitmap, range) \
+    ((cont)->io_ops->dirty_bitmap((cont), (bitmap), (range)))
+
+extern VFIOContIO vfio_cont_io_ioctl;
+
+#endif /* CONFIG_LINUX */
+
  typedef struct VFIOGroup {
      int fd;
      int groupid;




reply via email to

[Prev in Thread] Current Thread [Next in Thread]