qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH for 2.8 11/11] vhost_net: device IOTLB support


From: Jason Wang
Subject: Re: [Qemu-devel] [PATCH for 2.8 11/11] vhost_net: device IOTLB support
Date: Thu, 1 Sep 2016 15:36:40 +0800
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Thunderbird/45.2.0



On 2016年09月01日 11:34, Peter Xu wrote:
On Tue, Aug 30, 2016 at 11:06:59AM +0800, Jason Wang wrote:
This patches implements Device IOTLB support for vhost kernel. This is
done through:

1) switch to use dma helpers when map/unmap vrings from vhost codes
2) kernel support for Device IOTLB API:

- allow vhost-net to query the IOMMU IOTLB entry through eventfd
- enable the ability for qemu to update a specified mapping of vhost
- through ioctl.
- enable the ability to invalidate a specified range of iova for the
   device IOTLB of vhost through ioctl. In x86/intel_iommu case this is
   triggered through iommu memory region notifier from device IOTLB
   invalidation descriptor processing routine.

With all the above, kernel vhost_net can co-operate with IOMMU.

Cc: Michael S. Tsirkin <address@hidden>
Signed-off-by: Jason Wang <address@hidden>
---
  hw/virtio/vhost-backend.c         | 104 ++++++++++++++++++++++++++
  hw/virtio/vhost.c                 | 149 ++++++++++++++++++++++++++++++++------
  include/hw/virtio/vhost-backend.h |  14 ++++
  include/hw/virtio/vhost.h         |   4 +
  include/hw/virtio/virtio-access.h |  44 ++++++++++-
  net/tap.c                         |   1 +
  6 files changed, 291 insertions(+), 25 deletions(-)

[...]

+
+void vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write)
+{
+    IOMMUTLBEntry iotlb;
+    uint64_t uaddr, len;
+
+    rcu_read_lock();
+
+    iotlb = address_space_get_iotlb_entry(virtio_get_dma_as(dev->vdev),
+                                          iova, write);
+    if (iotlb.target_as != NULL) {
+        if (vhost_memory_region_lookup(dev, iotlb.translated_addr,
+                                       &uaddr, &len)) {
+            error_report("Fail to lookup the translated address "
+                         "%"PRIx64, iotlb.translated_addr);
+            goto out;
+        }
+
+        len = MIN(iotlb.addr_mask + 1, len);
+        iova = iova & ~iotlb.addr_mask;
+
+        if (dev->vhost_ops->vhost_update_device_iotlb(dev, iova, uaddr,
+                                                      len, iotlb.perm)) {
+            error_report("Fail to update device iotlb");
+            goto out;
+        }
+    }
Question: when will target_as == NULL? Do we need an assertion here if
it should never happen?

Good catch, looks like we need check perm against IOMMU_NONE here for invalid translation instead of checking target_as.

+out:
+    rcu_read_unlock();
+}
+


+    hdev->vhost_ops->vhost_set_iotlb_callback(hdev, true);
+
+    if (mr_has_iommu_ops(virtio_get_dma_as(vdev)->root)) {
+        /* Update used ring information for IOTLB to work correctly */
+        for (i = 0; i < hdev->nvqs; ++i) {
+            struct vhost_virtqueue *vq = hdev->vqs + i;
+            vhost_device_iotlb_miss(hdev, vq->used_phys, true);
+        }
+    }
      return 0;
+#if 0
+fail_iotlb:
+    if (hdev->vhost_ops->vhost_set_vring_enable) {
+        hdev->vhost_ops->vhost_set_vring_enable(hdev, 0);
+    }
+#endif
Maybe we can remove these lines if not to be used.

Yes.


  fail_log:
      vhost_log_put(hdev, false);
  fail_vq:
@@ -1345,6 +1439,7 @@ fail_vq:
                               hdev->vq_index + i);
      }
      i = hdev->nvqs;
+
Nit: A newline without context change.

Will remove this in next version.


  fail_mem:
  fail_features:
@@ -1359,6 +1454,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev) /* should only be called after backend is connected */
      assert(hdev->vhost_ops);
+    hdev->vhost_ops->vhost_set_iotlb_callback(hdev, false);
for (i = 0; i < hdev->nvqs; ++i) {
          vhost_virtqueue_stop(hdev,
@@ -1367,8 +1463,13 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice 
*vdev)
                               hdev->vq_index + i);
      }
+ if (mr_has_iommu_ops(virtio_get_dma_as(vdev)->root)) {
+        memory_region_unregister_iommu_notifier(virtio_get_dma_as(vdev)->root,
+                                                &hdev->n);
+    }
      vhost_log_put(hdev, true);
      hdev->started = false;
+    hdev->vdev = NULL;
  }
int vhost_net_set_backend(struct vhost_dev *hdev,
diff --git a/include/hw/virtio/vhost-backend.h 
b/include/hw/virtio/vhost-backend.h
index cf7f0b5..5cf8c70 100644
--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
@@ -8,9 +8,12 @@
   *
   */
+
Another nit for newline.

Will remove this in next version.


  #ifndef VHOST_BACKEND_H
  #define VHOST_BACKEND_H
+#include "exec/memory.h"
+
  typedef enum VhostBackendType {
      VHOST_BACKEND_TYPE_NONE = 0,
      VHOST_BACKEND_TYPE_KERNEL = 1,
@@ -73,6 +76,14 @@ typedef int (*vhost_migration_done_op)(struct vhost_dev *dev,
  typedef bool (*vhost_backend_can_merge_op)(struct vhost_dev *dev,
                                             uint64_t start1, uint64_t size1,
                                             uint64_t start2, uint64_t size2);
+typedef void (*vhost_set_iotlb_callback_op)(struct vhost_dev *dev,
+                                           int enabled);
+typedef int (*vhost_update_device_iotlb_op)(struct vhost_dev *dev,
+                                            uint64_t iova, uint64_t uaddr,
+                                            uint64_t len,
+                                            IOMMUAccessFlags perm);
+typedef int (*vhost_invalidate_device_iotlb_op)(struct vhost_dev *dev,
+                                                uint64_t iova, uint64_t len);
typedef struct VhostOps {
      VhostBackendType backend_type;
@@ -102,6 +113,9 @@ typedef struct VhostOps {
      vhost_requires_shm_log_op vhost_requires_shm_log;
      vhost_migration_done_op vhost_migration_done;
      vhost_backend_can_merge_op vhost_backend_can_merge;
+    vhost_set_iotlb_callback_op vhost_set_iotlb_callback;
+    vhost_update_device_iotlb_op vhost_update_device_iotlb;
+    vhost_invalidate_device_iotlb_op vhost_invalidate_device_iotlb;
  } VhostOps;
extern const VhostOps user_ops;
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index e433089..b971fa1 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -20,6 +20,7 @@ struct vhost_virtqueue {
      unsigned long long ring_phys;
      unsigned ring_size;
      EventNotifier masked_notifier;
+    struct vhost_dev *dev;
  };
typedef unsigned long vhost_log_chunk_t;
@@ -37,6 +38,7 @@ struct vhost_log {
struct vhost_memory;
  struct vhost_dev {
+    VirtIODevice *vdev;
      MemoryListener memory_listener;
      struct vhost_memory *mem;
      int n_mem_sections;
@@ -61,6 +63,7 @@ struct vhost_dev {
      void *opaque;
      struct vhost_log *log;
      QLIST_ENTRY(vhost_dev) entry;
+    Notifier n;
  };
int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
@@ -90,4 +93,5 @@ bool vhost_has_free_slot(void);
  int vhost_net_set_backend(struct vhost_dev *hdev,
                            struct vhost_vring_file *file);
+void vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write);
  #endif
diff --git a/include/hw/virtio/virtio-access.h 
b/include/hw/virtio/virtio-access.h
index 4071dad..3560bba 100644
--- a/include/hw/virtio/virtio-access.h
+++ b/include/hw/virtio/virtio-access.h
@@ -18,6 +18,7 @@
#include "hw/virtio/virtio.h"
  #include "hw/virtio/virtio-bus.h"
+#include "sysemu/dma.h"
  #include "exec/address-spaces.h"
#if defined(TARGET_PPC64) || defined(TARGET_ARM)
@@ -200,4 +201,45 @@ static inline void virtio_tswap64s(VirtIODevice *vdev, 
uint64_t *s)
  {
      *s = virtio_tswap64(vdev, *s);
  }
-#endif /* QEMU_VIRTIO_ACCESS_H */
+
+static inline bool mr_has_iommu_ops(MemoryRegion *mr)
+{
+    if (mr->alias) {
+        return mr_has_iommu_ops(mr->alias);
+    }
+
+    if (mr->iommu_ops)
+        return true;
+    else
+        return false;
+}
Shall we just enhance memory_region_is_iommu() with alias handling,
then call memory_region_is_iommu() directly?

Good idea, will do this in next version.


+
+static inline void *virtio_memory_map(VirtIODevice *vdev, hwaddr addr,
+                                      hwaddr *plen, int is_write)
+{
+    AddressSpace *dma_as = virtio_get_dma_as(vdev);
+
+    if (!mr_has_iommu_ops(dma_as->root)) {
+      return dma_memory_map(dma_as, addr, plen, is_write ?
          ^^ indents :)


Will fix this :)

+                            DMA_DIRECTION_FROM_DEVICE :
+                            DMA_DIRECTION_TO_DEVICE);
+    } else {
+      return (void *)addr;
          ^^ and here

And this.

+    }
+}
+
+
+static inline void virtio_memory_unmap(VirtIODevice *vdev, void *buffer,
+                                       hwaddr len, int is_write,
+                                       hwaddr access_len)
+{
+    AddressSpace *dma_as = virtio_get_dma_as(vdev);
+
+    if (!mr_has_iommu_ops(dma_as->root)) {
+      dma_memory_unmap(dma_as, buffer, len, is_write ?
          ^^ and here

And... One general question on the vhost fd used to communicate
between QEMU and vhost: I see that data is coming from both directions
on this fd, would this be a problem?

Looks not :)


For example, invalidations are triggered by guest kernel writting to
IOMMU IQ registers (which is a vcpu thread), this may trigger write()
to the vhost fd, meanwhile QEMU event loop is reading it? How are we
handling possible concurrent operations on this same fd? Please just
point out if I missed anything. :)

The synchronization were done in kernel:

- message dequeue and enqueue were protected by a spinlock
- the data path and control path (e.g IOTLB updating) were synchronized through mutex

Thanks


Thanks,

-- peterx




reply via email to

[Prev in Thread] Current Thread [Next in Thread]