qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC v2 PATCH 2/2] virtio-iommu: vfio integration with


From: Auger Eric
Subject: Re: [Qemu-devel] [RFC v2 PATCH 2/2] virtio-iommu: vfio integration with virtio-iommu
Date: Thu, 17 Aug 2017 17:33:26 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Thunderbird/45.4.0

Hi Bharat,

On 14/07/2017 09:25, Bharat Bhushan wrote:
> This patch allows virtio-iommu protection for PCI
> device-passthrough.
> 
> MSI region is mapped by current version of virtio-iommu driver.
> This MSI region mapping in not getting pushed on hw iommu
> vfio_get_vaddr() allows only ram-region.
Why is it an issue. As far as I understand this is not needed actually
as the guest MSI doorbell is not used by the host.
 This RFC patch needed
> to be improved.
> 
> Signed-off-by: Bharat Bhushan <address@hidden>
> ---
> v1-v2:
>   - Added trace events
> 
>  hw/virtio/trace-events           |   5 ++
>  hw/virtio/virtio-iommu.c         | 133 
> +++++++++++++++++++++++++++++++++++++++
>  include/hw/virtio/virtio-iommu.h |   6 ++
>  3 files changed, 144 insertions(+)
> 
> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> index 9196b63..3a3968b 100644
> --- a/hw/virtio/trace-events
> +++ b/hw/virtio/trace-events
> @@ -39,3 +39,8 @@ virtio_iommu_unmap_left_interval(uint64_t low, uint64_t 
> high, uint64_t next_low,
>  virtio_iommu_unmap_right_interval(uint64_t low, uint64_t high, uint64_t 
> next_low, uint64_t next_high) "Unmap right [0x%"PRIx64",0x%"PRIx64"], new 
> interval=[0x%"PRIx64",0x%"PRIx64"]"
>  virtio_iommu_unmap_inc_interval(uint64_t low, uint64_t high) "Unmap inc 
> [0x%"PRIx64",0x%"PRIx64"]"
>  virtio_iommu_translate_result(uint64_t virt_addr, uint64_t phys_addr, 
> uint32_t sid) "0x%"PRIx64" -> 0x%"PRIx64 " for sid=%d"
> +virtio_iommu_notify_flag_add(const char *iommu) "Add virtio-iommu notifier 
> node for memory region %s"
> +virtio_iommu_notify_flag_del(const char *iommu) "Del virtio-iommu notifier 
> node for memory region %s"
> +virtio_iommu_remap(hwaddr iova, hwaddr pa, hwaddr size) "iova=0x%"PRIx64" 
> pa=0x%" PRIx64" size=0x%"PRIx64""
> +virtio_iommu_map_region(hwaddr iova, hwaddr paddr, hwaddr map_size) 
> "iova=0x%"PRIx64" pa=0x%" PRIx64" size=0x%"PRIx64""
> +virtio_iommu_unmap_region(hwaddr iova, hwaddr paddr, hwaddr map_size) 
> "iova=0x%"PRIx64" pa=0x%" PRIx64" size=0x%"PRIx64""
> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
> index cd188fc..61f33cb 100644
> --- a/hw/virtio/virtio-iommu.c
> +++ b/hw/virtio/virtio-iommu.c
> @@ -129,6 +129,48 @@ static gint interval_cmp(gconstpointer a, gconstpointer 
> b, gpointer user_data)
>      }
>  }
>  
> +static void virtio_iommu_map_region(VirtIOIOMMU *s, hwaddr iova, hwaddr 
> paddr,
> +                                    hwaddr size, int map)
bool map?

the function name is a bit misleading to me and does not really explain
what the function does. It "notifies" so why not using something like
virtio_iommu_map_notify and virtio_iommu_unmap_notify. I tend to think
having separate proto is cleaner and more standard.

Binding should happen on a specific IOMMUmemoryRegion (see next comment).

> +{
> +    VirtioIOMMUNotifierNode *node;
> +    IOMMUTLBEntry entry;
> +    uint64_t map_size = (1 << 12);
TODO: handle something else than 4K page.
> +    int npages;
> +    int i;
> +
> +    npages = size / map_size;
> +    entry.target_as = &address_space_memory;
> +    entry.addr_mask = map_size - 1;
> +
> +    for (i = 0; i < npages; i++) {
Although I understand we currently fail checking the consistency between
pIOMMU and vIOMMU page sizes, this will be very slow for guest DPDK use
case where hugepages are used.

Why not directly using the full size?  vfio_iommu_map_notify will report
errors if vfio_dma_map/unmap() fail.
> +        entry.iova = iova + (i * map_size);
> +        if (map) {
> +                trace_virtio_iommu_map_region(iova, paddr, map_size);
> +                entry.perm = IOMMU_RW;
> +                entry.translated_addr = paddr + (i * map_size);
> +        } else {
> +                trace_virtio_iommu_unmap_region(iova, paddr, map_size);
> +                entry.perm = IOMMU_NONE;
> +                entry.translated_addr = 0;
> +        }
> +
> +        QLIST_FOREACH(node, &s->notifiers_list, next) {
> +            memory_region_notify_iommu(&node->iommu_dev->iommu_mr, entry);
So as discussed this will notify *all* IOMMU memory regions and all
their notifiers which is not what we want. You may have a look at
vsmmuv3 v6 (or intel_iommu) where smmuv3_context_device_invalidate
retrieves the mr from the sid.

> +        }
> +    }
> +}
> +
> +static gboolean virtio_iommu_unmap_single(gpointer key, gpointer value,
> +                                          gpointer data)
> +{
> +    viommu_mapping *mapping = (viommu_mapping *) value;
> +    VirtIOIOMMU *s = (VirtIOIOMMU *) data;
> +
> +    virtio_iommu_map_region(s, mapping->virt_addr, 0, mapping->size, 0);
paddr=0? mapping->phys_addr as the trace() will be misleading. But as
mentioned earlier better use unmap() separate function.
> +
> +    return true;
> +}
> +
>  static int virtio_iommu_attach(VirtIOIOMMU *s,
>                                 struct virtio_iommu_req_attach *req)
>  {
> @@ -170,10 +212,26 @@ static int virtio_iommu_detach(VirtIOIOMMU *s,
>  {
>      uint32_t devid = le32_to_cpu(req->device);
>      uint32_t reserved = le32_to_cpu(req->reserved);
> +    viommu_dev *dev;
>      int ret;
>  
>      trace_virtio_iommu_detach(devid, reserved);
>  
> +    dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(devid));
> +    if (!dev || !dev->as) {
> +        return -EINVAL;
> +    }
> +
> +    dev->as->nr_devices--;
> +
> +    /* Unmap all if this is last device detached */
> +    if (dev->as->nr_devices == 0) {
> +        g_tree_foreach(dev->as->mappings, virtio_iommu_unmap_single, s);
> +
> +        g_tree_remove(s->address_spaces, GUINT_TO_POINTER(dev->as->id));
> +        g_tree_destroy(dev->as->mappings);
> +    }
so this should be rebased on new ref count code.
> +
>      ret = g_tree_remove(s->devices, GUINT_TO_POINTER(devid));
>  
>      return ret ? VIRTIO_IOMMU_S_OK : VIRTIO_IOMMU_S_INVAL;
> @@ -217,6 +275,7 @@ static int virtio_iommu_map(VirtIOIOMMU *s,
>  
>      g_tree_insert(as->mappings, interval, mapping);
>  
> +    virtio_iommu_map_region(s, virt_addr, phys_addr, size, 1);
>      return VIRTIO_IOMMU_S_OK;
>  }
>  
> @@ -267,7 +326,9 @@ static int virtio_iommu_unmap(VirtIOIOMMU *s,
>          } else {
>              break;
>          }
> +
>          if (interval.low >= interval.high) {
> +            virtio_iommu_map_region(s, virt_addr, 0, size, 0);
>              return VIRTIO_IOMMU_S_OK;
>          } else {
>              mapping = g_tree_lookup(as->mappings, (gpointer)&interval);
> @@ -410,6 +471,37 @@ static void virtio_iommu_handle_command(VirtIODevice 
> *vdev, VirtQueue *vq)
>      }
>  }
>  
> +static void virtio_iommu_notify_flag_changed(MemoryRegion *iommu,
> +                                          IOMMUNotifierFlag old,
> +                                          IOMMUNotifierFlag new)
> +{
> +    IOMMUDevice *sdev = container_of(iommu, IOMMUDevice, iommu_mr);
> +    VirtIOIOMMU *s = sdev->viommu;
> +    VirtioIOMMUNotifierNode *node = NULL;
> +    VirtioIOMMUNotifierNode *next_node = NULL;
> +
> +    if (old == IOMMU_NOTIFIER_NONE) {
> +        trace_virtio_iommu_notify_flag_add(iommu->name);
> +        node = g_malloc0(sizeof(*node));
> +        node->iommu_dev = sdev;
> +        QLIST_INSERT_HEAD(&s->notifiers_list, node, next);
> +        return;
> +    }
> +
> +    /* update notifier node with new flags */
> +    QLIST_FOREACH_SAFE(node, &s->notifiers_list, next, next_node) {
> +        if (node->iommu_dev == sdev) {
> +            if (new == IOMMU_NOTIFIER_NONE) {
> +                trace_virtio_iommu_notify_flag_del(iommu->name);
> +                QLIST_REMOVE(node, next);
> +                g_free(node);
> +            }
> +            return;
> +        }
> +    }
> +}
I think all that mechanics should be factorized somewhere else as all
vIOMMUs use that but this goes beyond the scope of this series.
> +
> +
>  static IOMMUTLBEntry virtio_iommu_translate(MemoryRegion *mr, hwaddr addr,
>                                              IOMMUAccessFlags flag)
>  {
> @@ -523,11 +615,50 @@ static gint int_cmp(gconstpointer a, gconstpointer b, 
> gpointer user_data)
>      return (ua > ub) - (ua < ub);
>  }
>  
> +static gboolean virtio_iommu_remap(gpointer key, gpointer value, gpointer 
> data)
> +{
> +    viommu_mapping *mapping = (viommu_mapping *) value;
> +    VirtIOIOMMU *s = (VirtIOIOMMU *) data;
> +
> +    trace_virtio_iommu_remap(mapping->virt_addr, mapping->phys_addr,
> +                             mapping->size);
> +    /* unmap previous entry and map again */
> +    virtio_iommu_map_region(s, mapping->virt_addr, 0, mapping->size, 0);
> +
> +    virtio_iommu_map_region(s, mapping->virt_addr, mapping->phys_addr,
> +                            mapping->size, 1);
> +    return true;
> +}
> +
> +static void virtio_iommu_replay(MemoryRegion *mr, IOMMUNotifier *n)
> +{
> +    IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr);
> +    VirtIOIOMMU *s = sdev->viommu;
> +    uint32_t sid;
> +    viommu_dev *dev;
> +
> +    sid = smmu_get_sid(sdev);
> +
> +    qemu_mutex_lock(&s->mutex);
> +
> +    dev = g_tree_lookup(s->devices, GUINT_TO_POINTER(sid));
> +    if (!dev) {
> +        goto unlock;
> +    }
> +
> +    g_tree_foreach(dev->as->mappings, virtio_iommu_remap, s);
> +
> +unlock:
> +    qemu_mutex_unlock(&s->mutex);
> +    return;
not needed

Thanks

Eric
> +}
> +
>  static void virtio_iommu_device_realize(DeviceState *dev, Error **errp)
>  {
>      VirtIODevice *vdev = VIRTIO_DEVICE(dev);
>      VirtIOIOMMU *s = VIRTIO_IOMMU(dev);
>  
> +    QLIST_INIT(&s->notifiers_list);
>      virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU,
>                  sizeof(struct virtio_iommu_config));
>  
> @@ -538,6 +669,8 @@ static void virtio_iommu_device_realize(DeviceState *dev, 
> Error **errp)
>      s->config.input_range.end = -1UL;
>  
>      s->iommu_ops.translate = virtio_iommu_translate;
> +    s->iommu_ops.notify_flag_changed = virtio_iommu_notify_flag_changed;
> +    s->iommu_ops.replay = virtio_iommu_replay;
>      memset(s->as_by_bus_num, 0, sizeof(s->as_by_bus_num));
>      s->as_by_busptr = g_hash_table_new_full(as_uint64_hash,
>                                              as_uint64_equal,
> diff --git a/include/hw/virtio/virtio-iommu.h 
> b/include/hw/virtio/virtio-iommu.h
> index 2259413..76c758d 100644
> --- a/include/hw/virtio/virtio-iommu.h
> +++ b/include/hw/virtio/virtio-iommu.h
> @@ -44,6 +44,11 @@ typedef struct IOMMUPciBus {
>      IOMMUDevice  *pbdev[0]; /* Parent array is sparse, so dynamically alloc 
> */
>  } IOMMUPciBus;
>  
> +typedef struct VirtioIOMMUNotifierNode {
> +    IOMMUDevice *iommu_dev;
> +    QLIST_ENTRY(VirtioIOMMUNotifierNode) next;
> +} VirtioIOMMUNotifierNode;
> +
>  typedef struct VirtIOIOMMU {
>      VirtIODevice parent_obj;
>      VirtQueue *vq;
> @@ -55,6 +60,7 @@ typedef struct VirtIOIOMMU {
>      GTree *address_spaces;
>      QemuMutex mutex;
>      GTree *devices;
> +    QLIST_HEAD(, VirtioIOMMUNotifierNode) notifiers_list;
>  } VirtIOIOMMU;
>  
>  #endif
> 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]