qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vI


From: Michael S. Tsirkin
Subject: Re: [Qemu-devel] [PATCH][RFC] IOMMU: Add Support to VFIO devices with vIOMMU present
Date: Wed, 23 Mar 2016 16:34:42 +0200

On Thu, Mar 17, 2016 at 01:58:13PM +0200, Aviv B.D. wrote:
> 
> 
> On Tue, Mar 15, 2016 at 12:53 PM, Michael S. Tsirkin <address@hidden> wrote:
> 
>     On Sat, Mar 12, 2016 at 06:13:17PM +0200, Aviv B.D. wrote:
>     > From: "Aviv B.D." <address@hidden>
>     >
>     >  * Fix bug that prevent qemu from starting up when vIOMMU and VFIO
>     >     device are present.
>     >  * Advertise Cache Mode capability in iommu cap register.
>     >  * Register every VFIO device with IOMMU state.
>     >  * On page cache invalidation in vIOMMU, check if the domain belong to
>     >    VFIO device and mirror the guest requests to host.
>     >
>     > Not working (Yet!):
>     >  * Tested only with network interface card (ixgbevf) and
>     >     intel_iommu=strict in guest's kernel command line.
>     >  * Lock up under high load.
>     >  * Errors on guest poweroff.
>     >  * High relative latency compare to VFIO without IOMMU.
>     >
>     > Signed-off-by: Aviv B.D. <address@hidden>
> 
>     Thanks, this is very interesting.
>     So this needs some cleanup, and some issues that will have to be 
> addressed.
>     See below.
>     Thanks!
> 
> 
> Thanks! 
> 
> 
>     > ---
>     >  hw/i386/intel_iommu.c          | 76
>     ++++++++++++++++++++++++++++++++++++++----
>     >  hw/i386/intel_iommu_internal.h |  1 +
>     >  hw/vfio/common.c               | 12 +++++--
>     >  include/hw/i386/intel_iommu.h  |  4 +++
>     >  include/hw/vfio/vfio-common.h  |  1 +
>     >  5 files changed, 85 insertions(+), 9 deletions(-)
>     >
>     > diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>     > index 347718f..046688f 100644
>     > --- a/hw/i386/intel_iommu.c
>     > +++ b/hw/i386/intel_iommu.c
>     > @@ -43,6 +44,9 @@ static int vtd_dbgflags = VTD_DBGBIT(GENERAL) |
>     VTD_DBGBIT
>     > (CSR);
>     >  #define VTD_DPRINTF(what, fmt, ...) do {} while (0)
>     >  #endif
>     >  
>     > +static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t 
> bus_num,
>     > +                                    uint8_t devfn, VTDContextEntry 
> *ce);
>     > +
>     >  static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t
>     val,
>     >                              uint64_t wmask, uint64_t w1cmask)
>     >  {
>     > @@ -126,6 +130,19 @@ static uint32_t vtd_set_clear_mask_long
>     (IntelIOMMUState
>     > *s, hwaddr addr,
>     >      return new_val;
>     >  }
>     >  
>     > +static uint16_t vtd_get_did_dev(IntelIOMMUState *s, uint8_t bus_num,
>     uint8_t
>     > devfn)
>     > +{
>     > +    VTDContextEntry ce;
>     > +    int ret_fr;
>     > +
>     > +    ret_fr = vtd_dev_to_context_entry(s, bus_num, devfn, &ce);
>     > +    if (ret_fr){
>     > +        return -1;
>     > +    }
>     > +
>     > +    return VTD_CONTEXT_ENTRY_DID(ce.hi);
>     > +}
>     > +
>     >  static uint64_t vtd_set_clear_mask_quad(IntelIOMMUState *s, hwaddr 
> addr,
>     >                                          uint64_t clear, uint64_t mask)
>     >  {
>     > @@ -711,9 +728,9 @@ static int vtd_dev_to_context_entry(IntelIOMMUState
>     *s,
>     > uint8_t bus_num,
>     >      }
>     >  
>     >      if (!vtd_context_entry_present(ce)) {
>     > -        VTD_DPRINTF(GENERAL,
>     > +        /*VTD_DPRINTF(GENERAL,
>     >                      "error: context-entry #%"PRIu8 "(bus #%"PRIu8 ") "
>     > -                    "is not present", devfn, bus_num);
>     > +                    "is not present", devfn, bus_num);*/
>     >          return -VTD_FR_CONTEXT_ENTRY_P;
>     >      } else if ((ce->hi & VTD_CONTEXT_ENTRY_RSVD_HI) ||
>     >                 (ce->lo & VTD_CONTEXT_ENTRY_RSVD_LO)) {
>     > @@ -1020,14 +1037,53 @@ static void vtd_iotlb_page_invalidate
>     (IntelIOMMUState
>     > *s, uint16_t domain_id,
>     >                                        hwaddr addr, uint8_t am)
>     >  {
>     >      VTDIOTLBPageInvInfo info;
>     > +    VFIOGuestIOMMU * giommu;
>     > +    bool flag = false;
>     >  
>     >      assert(am <= VTD_MAMV);
>     >      info.domain_id = domain_id;
>     >      info.addr = addr;
>     >      info.mask = ~((1 << am) - 1);
>     > +
>     > +    QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
>     > +        VTDAddressSpace *vtd_as = container_of(giommu->iommu,
>     VTDAddressSpace,
>     > iommu);
>     > +        uint16_t vfio_source_id = vtd_make_source_id(pci_bus_num
>     (vtd_as->bus),
>     > vtd_as->devfn);
>     > +        uint16_t vfio_domain_id = vtd_get_did_dev(s, pci_bus_num
>     (vtd_as->bus),
>     > vtd_as->devfn);
>     > +        if (vfio_domain_id != (uint16_t)-1 && 
>     > +                domain_id == vfio_domain_id){
>     > +            VTDIOTLBEntry *iotlb_entry = vtd_lookup_iotlb(s,
>     vfio_source_id,
>     > addr);
>     > +            if (iotlb_entry != NULL){
>     > +                IOMMUTLBEntry entry; 
>     > +                VTD_DPRINTF(GENERAL, "Remove addr 0x%"PRIx64 " mask 
> %d",
>     addr,
>     > am);
>     > +                entry.iova = addr & VTD_PAGE_MASK_4K;
>     > +                entry.translated_addr = 
> vtd_get_slpte_addr(iotlb_entry->
>     slpte)
>     > & VTD_PAGE_MASK_4K;
>     > +                entry.addr_mask = ~VTD_PAGE_MASK_4K;
>     > +                entry.perm = IOMMU_NONE;
>     > +                memory_region_notify_iommu(giommu->iommu, entry);
>     > +                flag = true;
>     > +
>     > +            }
>     > +        }
>     > +
>     > +    }
>     > +
>     >      g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page, &
>     info);
>     > -}
>     >  
>     > +    QLIST_FOREACH(giommu, &(s->giommu_list), iommu_next){
>     > +        VTDAddressSpace *vtd_as = container_of(giommu->iommu,
>     VTDAddressSpace,
>     > iommu);
>     > +        uint16_t vfio_domain_id = vtd_get_did_dev(s, pci_bus_num
>     (vtd_as->bus),
>     > vtd_as->devfn);
>     > +        if (vfio_domain_id != (uint16_t)-1 && 
>     > +                domain_id == vfio_domain_id && !flag){
>     > +            /* do vfio map */
>     > +            VTD_DPRINTF(GENERAL, "add addr 0x%"PRIx64 " mask %d", addr,
>     am);
>     > +            /* call to vtd_iommu_translate */
>     > +            IOMMUTLBEntry entry = s->iommu_ops.translate(giommu->iommu,
>     addr,
>     > 0); 
>     > +            entry.perm = IOMMU_RW;
>     > +            memory_region_notify_iommu(giommu->iommu, entry);
> 
>     So this just triggers the iommu notifiers in the regular way.
>     But what if two devices have conflicting entries?
>     We need to setup host IOMMU differently for them.
>     But see below
> 
>     > +            //g_vfio_iommu->n.notify(&g_vfio_iommu->n, &entry);
>     > +        }
>     > +    }
>     > +}
>     >  /* Flush IOTLB
>     >   * Returns the IOTLB Actual Invalidation Granularity.
>     >   * @val: the content of the IOTLB_REG
>     > @@ -1895,6 +1951,13 @@ static Property vtd_properties[] = {
>     >      DEFINE_PROP_END_OF_LIST(),
>     >  };
>     >  
>     > +void vtd_register_giommu(VFIOGuestIOMMU * giommu)
>     > +{
>     > +    VTDAddressSpace *vtd_as = container_of(giommu->iommu,
>     VTDAddressSpace,
>     > iommu);
>     > +    IntelIOMMUState *s = vtd_as->iommu_state;
>     > +    
>     > +    QLIST_INSERT_HEAD(&s->giommu_list, giommu, iommu_next);
>     > +}
>     >  
>     >  VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int
>     devfn)
>     >  {
>     > @@ -1949,7 +2012,8 @@ static void vtd_init(IntelIOMMUState *s)
>     >      s->iq_last_desc_type = VTD_INV_DESC_NONE;
>     >      s->next_frcd_reg = 0;
>     >      s->cap = VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | VTD_CAP_MGAW |
>     > -             VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | 
> VTD_CAP_SLLPS;
>     > +             VTD_CAP_SAGAW | VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS
>     |
>     > +             VTD_CAP_CM;
>     >      s->ecap = VTD_ECAP_QI | VTD_ECAP_IRO;
>     >  
>     >      vtd_reset_context_cache(s);
>     > diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/
>     intel_iommu_internal.h
>     > index e5f514c..ae40f73 100644
>     > --- a/hw/i386/intel_iommu_internal.h
>     > +++ b/hw/i386/intel_iommu_internal.h
>     > @@ -190,6 +190,7 @@
>     >  #define VTD_CAP_MAMV                (VTD_MAMV << 48)
>     >  #define VTD_CAP_PSI                 (1ULL << 39)
>     >  #define VTD_CAP_SLLPS               ((1ULL << 34) | (1ULL << 35))
>     > +#define VTD_CAP_CM                  (1ULL << 7)
>     >  
>     >  /* Supported Adjusted Guest Address Widths */
>     >  #define VTD_CAP_SAGAW_SHIFT         8
>     > diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>     > index 607ec70..98c8d67 100644
>     > --- a/hw/vfio/common.c
>     > +++ b/hw/vfio/common.c
>     > @@ -32,6 +32,9 @@
>     >  #include "sysemu/kvm.h"
>     >  #include "trace.h"
>     >  
>     > +#include "hw/sysbus.h"
>     > +#include "hw/i386/intel_iommu.h"
>     > +
>     >  struct vfio_group_head vfio_group_list =
>     >      QLIST_HEAD_INITIALIZER(vfio_group_list);
>     >  struct vfio_as_head vfio_address_spaces =
>     > @@ -312,12 +315,12 @@ static void vfio_iommu_map_notify(Notifier *n, 
> void
>     > *data)
>     >  out:
>     >      rcu_read_unlock();
>     >  }
>     > -
>     > +#if 0
>     >  static hwaddr vfio_container_granularity(VFIOContainer *container)
>     >  {
>     >      return (hwaddr)1 << ctz64(container->iova_pgsizes);
>     >  }
>     > -
>     > +#endif
>     >  static void vfio_listener_region_add(MemoryListener *listener,
>     >                                       MemoryRegionSection *section)
>     >  {
>     > @@ -344,6 +347,7 @@ static void vfio_listener_region_add(MemoryListener
>     > *listener,
>     >      iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
>     >      llend = int128_make64(section->offset_within_address_space);
>     >      llend = int128_add(llend, section->size);
>     > +    llend = int128_add(llend, int128_exts64(-1));
>     >      llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
>     >  
>     >      if (int128_ge(int128_make64(iova), llend)) {
>     > @@ -381,11 +385,13 @@ static void 
> vfio_listener_region_add(MemoryListener
>     > *listener,
>     >          giommu->n.notify = vfio_iommu_map_notify;
>     >          QLIST_INSERT_HEAD(&container->giommu_list, giommu, 
> giommu_next);
>     >  
>     > +        vtd_register_giommu(giommu);
>     >          memory_region_register_iommu_notifier(giommu->iommu, &giommu->
>     n);
>     > +#if 0
>     >          memory_region_iommu_replay(giommu->iommu, &giommu->n,
>     >                                     vfio_container_granularity
>     (container),
>     >                                     false);
>     > -
>     > +#endif
>     >          return;
>     >      }
>     >
> 
>     Looks like there is still a single global domain on the host.
>     Works up to a point as long as we have a single VFIO device.
> 
> 
> You are correct, I should call (from page_invalidation) only to the correct
> vfio notifier that belong to this domain. This way (as long as no conflicts
> mapping is present) several VFIO devices can works at the same time. I'll add
> this feature to the next version of the patch.
> currently I am not sure how the code should handle conflicting mapping between
> two different domains. 

If you have different vfio domains, then what is the
issue with fixing it?

> 
>      
>     > diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/
>     intel_iommu.h
>     > index b024ffa..22f3f83 100644
>     > --- a/include/hw/i386/intel_iommu.h
>     > +++ b/include/hw/i386/intel_iommu.h
>     > @@ -23,6 +23,7 @@
>     >  #define INTEL_IOMMU_H
>     >  #include "hw/qdev.h"
>     >  #include "sysemu/dma.h"
>     > +#include "hw/vfio/vfio-common.h"
>     >  
>     >  #define TYPE_INTEL_IOMMU_DEVICE "intel-iommu"
>     >  #define INTEL_IOMMU_DEVICE(obj) \
>     > @@ -123,6 +124,8 @@ struct IntelIOMMUState {
>     >      MemoryRegionIOMMUOps iommu_ops;
>     >      GHashTable *vtd_as_by_busptr;   /* VTDBus objects indexed by 
> PCIBus*
>     > reference */
>     >      VTDBus *vtd_as_by_bus_num[VTD_PCI_BUS_MAX]; /* VTDBus objects
>     indexed by
>     > bus number */
>     > +
>     > +    QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
>     >  };
>     >  
>     >  /* Find the VTD Address space associated with the given bus pointer,
>     > @@ -130,4 +133,5 @@ struct IntelIOMMUState {
>     >   */
>     >  VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus, int
>     devfn);
>     >  
>     > +void vtd_register_giommu(VFIOGuestIOMMU * giommu);
>     >  #endif
>     > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/
>     vfio-common.h
>     > index f037f3c..9225ba3 100644
>     > --- a/include/hw/vfio/vfio-common.h
>     > +++ b/include/hw/vfio/vfio-common.h
>     > @@ -82,6 +82,7 @@ typedef struct VFIOGuestIOMMU {
>     >      MemoryRegion *iommu;
>     >      Notifier n;
>     >      QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
>     > +    QLIST_ENTRY(VFIOGuestIOMMU) iommu_next;
>     >  } VFIOGuestIOMMU;
>     >  
>     >  typedef struct VFIODeviceOps VFIODeviceOps;
>     > -- 
>     > 1.9.1
>     >  
> 
> 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]