qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v4 11/18] spapr_pci/spapr_pci_vfio: Support Dyna


From: David Gibson
Subject: Re: [Qemu-devel] [PATCH v4 11/18] spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW)
Date: Thu, 5 Feb 2015 14:51:44 +1100
User-agent: Mutt/1.5.23 (2014-03-12)

On Thu, Jan 29, 2015 at 08:27:23PM +1100, Alexey Kardashevskiy wrote:
> This implements DDW for emulated and VFIO PHB.
> 
> This removes all DMA windows on reset and creates the default window,
> same is done on the "ibm,reset-pe-dma-window" call.
> This converts sPAPRPHBClass::finish_realize to sPAPRPHBClass::ddw_reset
> and others.
> 
> The "ddw" property is enabled by default on a PHB but for compatibility
> pseries-2.1 machine disables it.

Now that we're past the 2.2 release, this should change to only be
enabled for 2.3+, yes?

> Signed-off-by: Alexey Kardashevskiy <address@hidden>
> ---
> Changes:
> v4:
> * reset handler is back in generalized form
> 
> v3:
> * removed reset
> * windows_num is now 1 or bigger rather than 0-based value and it is only
> changed in PHB code, not in RTAS
> * added page mask check in create()
> * added SPAPR_PCI_DDW_MAX_WINDOWS to track how many windows are already
> created
> 
> v2:
> * tested on hacked emulated E1000
> * implemented DDW reset on the PHB reset
> * spapr_pci_ddw_remove/spapr_pci_ddw_reset are public for reuse by VFIO
> 
> spapr_pci_vfio: Enable DDW
> 
> This implements DDW for VFIO. Host kernel support is required for this.
> 
> After this patch DDW will be enabled on all machines but pseries-2.1.
> 
> Signed-off-by: Alexey Kardashevskiy <address@hidden>
> ---
> Changes:
> v2:
> * remove()/reset() callbacks use spapr_pci's ones
> ---
>  hw/ppc/spapr_pci.c          | 160 
> +++++++++++++++++++++++++++++++++++---------
>  hw/ppc/spapr_pci_vfio.c     |  98 +++++++++++++++++----------
>  include/hw/pci-host/spapr.h |  15 ++++-
>  3 files changed, 203 insertions(+), 70 deletions(-)
> 
> diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
> index 6bd00e8..3ec03be 100644
> --- a/hw/ppc/spapr_pci.c
> +++ b/hw/ppc/spapr_pci.c
> @@ -469,6 +469,126 @@ static const MemoryRegionOps spapr_msi_ops = {
>      .endianness = DEVICE_LITTLE_ENDIAN
>  };
>  
> +static int spapr_phb_get_win_num_cb(Object *child, void *opaque)
> +{
> +    if (object_dynamic_cast(child, TYPE_SPAPR_TCE_TABLE)) {
> +        ++*(unsigned *)opaque;
> +    }
> +    return 0;
> +}
> +
> +unsigned spapr_phb_get_win_num(sPAPRPHBState *sphb)
> +{
> +    unsigned ret = 0;
> +
> +    object_child_foreach(OBJECT(sphb), spapr_phb_get_win_num_cb, &ret);
> +
> +    return ret;
> +}
> +
> +/*
> + * Dynamic DMA windows
> + */
> +static int spapr_pci_ddw_query(sPAPRPHBState *sphb,
> +                               uint32_t *windows_supported,
> +                               uint32_t *page_size_mask,
> +                               uint32_t *dma32_window_size,
> +                               uint64_t *dma64_window_size)
> +{
> +    *windows_supported = SPAPR_PCI_DDW_MAX_WINDOWS;
> +    *page_size_mask = DDW_PGSIZE_64K | DDW_PGSIZE_16M;
> +    *dma32_window_size = SPAPR_PCI_TCE32_WIN_SIZE;
> +    *dma64_window_size = ram_size;
> +
> +    return 0;
> +}
> +
> +static int spapr_pci_ddw_create(sPAPRPHBState *sphb, uint32_t liobn,
> +                                uint32_t page_shift, uint32_t window_shift,
> +                                sPAPRTCETable **ptcet)
> +{
> +    uint64_t bus_offset = spapr_phb_get_win_num(sphb) ?
> +                          SPAPR_PCI_TCE64_START : 0;

Should you also have an assert that spapr_phb_get_win_num(sphb) <=1 at
this point?

> +
> +    if (((page_shift != 16) && (page_shift != 24) && (page_shift != 12))) {
> +        return -1;

You only have two return values: failure and success.  So is there a
reason you're using an int, rather than returning the sPAPRTCETable *
or NULL?

> +    }
> +
> +    *ptcet = spapr_tce_new_table(DEVICE(sphb), liobn,
> +                                 bus_offset,
> +                                 page_shift,
> +                                 1ULL << (window_shift - page_shift),
> +                                 false);
> +    if (!*ptcet) {
> +        return -1;
> +    }
> +    memory_region_add_subregion(&sphb->iommu_root, (*ptcet)->bus_offset,
> +                                spapr_tce_get_iommu(*ptcet));
> +
> +    return 0;
> +}
> +
> +int spapr_pci_ddw_remove(sPAPRPHBState *sphb, sPAPRTCETable *tcet)
> +{
> +    memory_region_del_subregion(&sphb->iommu_root,
> +                                spapr_tce_get_iommu(tcet));
> +    spapr_tce_free_table(tcet);
> +
> +    return 0;
> +}
> +
> +static int spapr_pci_remove_ddw_cb(Object *child, void *opaque)
> +{
> +    sPAPRTCETable *tcet;
> +
> +    tcet = (sPAPRTCETable *) object_dynamic_cast(child, 
> TYPE_SPAPR_TCE_TABLE);
> +
> +    if (tcet) {
> +        sPAPRPHBState *sphb = opaque;
> +        sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
> +
> +        spc->ddw_remove(sphb, tcet);
> +    }
> +
> +    return 0;
> +}
> +
> +int spapr_pci_ddw_reset(sPAPRPHBState *sphb)
> +{
> +    int ret;
> +    sPAPRPHBClass *spc;
> +    sPAPRTCETable *tcet;
> +    uint32_t windows_supported = 0, page_size_mask = 0, dma32_window_size = 
> 0;
> +    uint64_t dma64_window_size = 0;
> +
> +    /* Remove all windows */
> +    object_child_foreach(OBJECT(sphb), spapr_pci_remove_ddw_cb, sphb);
> +
> +    /* Create default 32bit window */

This comment seems to below a few lines down from here.

> +    spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
> +    if (!spc->ddw_create || !spc->ddw_query) {
> +        return -1;
> +    }
> +
> +    ret = spc->ddw_query(sphb, &windows_supported, &page_size_mask,
> +                         &dma32_window_size, &dma64_window_size);
> +    if (ret) {
> +        return ret;
> +    }
> +
> +    sphb->ddw_enabled = (windows_supported > 1);

ddw_enabled doesn't actually seem to be tested anywhere.  And
shouldn't it depend on the externall set property for pre-2.3
compat, not just on the # windows supported by the underlying
implementation?

> +    ret = spc->ddw_create(sphb, SPAPR_PCI_LIOBN(sphb->index, 0),
> +                          SPAPR_TCE_PAGE_SHIFT, ctzl(dma32_window_size), 
> &tcet);
> +    if (ret) {
> +        return ret;
> +    }
> +
> +    object_unref(OBJECT(tcet));

This could perhaps do with a comment saying why you've ended up with
an extraneous reference.

> +
> +    return 0;
> +}
> +
>  /*
>   * PHB PCI device
>   */
> @@ -484,7 +604,6 @@ static void spapr_phb_realize(DeviceState *dev, Error 
> **errp)
>      SysBusDevice *s = SYS_BUS_DEVICE(dev);
>      sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
>      PCIHostState *phb = PCI_HOST_BRIDGE(s);
> -    sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(s);
>      char *namebuf;
>      int i;
>      PCIBus *bus;
> @@ -622,37 +741,9 @@ static void spapr_phb_realize(DeviceState *dev, Error 
> **errp)
>          sphb->lsi_table[i].irq = irq;
>      }
>  
> -    if (!info->finish_realize) {
> -        error_setg(errp, "finish_realize not defined");
> -        return;
> -    }
> -
> -    info->finish_realize(sphb, errp);
> -
>      sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, 
> g_free);
>  }
>  
> -static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
> -{
> -    sPAPRTCETable *tcet;
> -
> -    tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
> -                               0,
> -                               SPAPR_TCE_PAGE_SHIFT,
> -                               0x40000000 >> SPAPR_TCE_PAGE_SHIFT, false);
> -    if (!tcet) {
> -        error_setg(errp, "Unable to create TCE table for %s",
> -                   sphb->dtbusname);
> -        return ;
> -    }
> -
> -    /* Register default 32bit DMA window */
> -    memory_region_add_subregion(&sphb->iommu_root, 0,
> -                                spapr_tce_get_iommu(tcet));
> -
> -    object_unref(OBJECT(tcet));
> -}
> -
>  static int spapr_phb_children_reset(Object *child, void *opaque)
>  {
>      DeviceState *dev = (DeviceState *) object_dynamic_cast(child, 
> TYPE_DEVICE);
> @@ -666,7 +757,11 @@ static int spapr_phb_children_reset(Object *child, void 
> *opaque)
>  
>  static void spapr_phb_reset(DeviceState *qdev)
>  {
> -    /* Reset the IOMMU state */
> +    sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(qdev);
> +
> +    if (spc->ddw_reset) {
> +        spc->ddw_reset(SPAPR_PCI_HOST_BRIDGE(qdev));
> +    }
>      object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL);
>  }
>  
> @@ -801,7 +896,10 @@ static void spapr_phb_class_init(ObjectClass *klass, 
> void *data)
>      dc->vmsd = &vmstate_spapr_pci;
>      set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
>      dc->cannot_instantiate_with_device_add_yet = false;
> -    spc->finish_realize = spapr_phb_finish_realize;
> +    spc->ddw_query = spapr_pci_ddw_query;
> +    spc->ddw_create = spapr_pci_ddw_create;
> +    spc->ddw_remove = spapr_pci_ddw_remove;
> +    spc->ddw_reset = spapr_pci_ddw_reset;
>  }
>  
>  static const TypeInfo spapr_phb_info = {
> diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
> index aabf0ae..b20ac90 100644
> --- a/hw/ppc/spapr_pci_vfio.c
> +++ b/hw/ppc/spapr_pci_vfio.c
> @@ -27,65 +27,89 @@ static Property spapr_phb_vfio_properties[] = {
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> -static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp)
> +static int spapr_pci_vfio_ddw_query(sPAPRPHBState *sphb,
> +                                    uint32_t *windows_supported,
> +                                    uint32_t *page_size_mask,
> +                                    uint32_t *dma32_window_size,
> +                                    uint64_t *dma64_window_size)
>  {
>      sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
>      struct vfio_iommu_spapr_tce_info info = { .argsz = sizeof(info) };
>      int ret;
> -    sPAPRTCETable *tcet;
> -    uint32_t liobn = svphb->phb.dma_liobn;
>  
> -    if (svphb->iommugroupid == -1) {
> -        error_setg(errp, "Wrong IOMMU group ID %d", svphb->iommugroupid);
> -        return;
> -    }
> -
> -    ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
> -                               VFIO_CHECK_EXTENSION,
> -                               (void *) VFIO_SPAPR_TCE_IOMMU);
> -    if (ret != 1) {
> -        error_setg_errno(errp, -ret,
> -                         "spapr-vfio: SPAPR extension is not supported");
> -        return;
> -    }
> -
> -    ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
> +    ret = vfio_container_ioctl(&sphb->iommu_as, svphb->iommugroupid,
>                                 VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
>      if (ret) {
> -        error_setg_errno(errp, -ret,
> -                         "spapr-vfio: get info from container failed");
> -        return;
> +        return ret;
>      }
>  
> -    tcet = spapr_tce_new_table(DEVICE(sphb), liobn, info.dma32_window_start,
> -                               SPAPR_TCE_PAGE_SHIFT,
> -                               info.dma32_window_size >> 
> SPAPR_TCE_PAGE_SHIFT,
> -                               true);
> -    if (!tcet) {
> -        error_setg(errp, "spapr-vfio: failed to create VFIO TCE table");
> -        return;
> +    *windows_supported = info.windows_supported;
> +    *page_size_mask = info.flags & DDW_PGSIZE_MASK;
> +    *dma32_window_size = info.dma32_window_size;
> +    *dma64_window_size = ram_size;
> +
> +    return ret;
> +}
> +
> +static int spapr_pci_vfio_ddw_create(sPAPRPHBState *sphb, uint32_t liobn,
> +                                     uint32_t page_shift, uint32_t 
> window_shift,
> +                                     sPAPRTCETable **ptcet)
> +{
> +    sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
> +    struct vfio_iommu_spapr_tce_create create = {
> +        .argsz = sizeof(create),
> +        .page_shift = page_shift,
> +        .window_shift = window_shift,
> +        .levels = 1,
> +        .start_addr = 0,
> +    };
> +    int ret;
> +
> +    ret = vfio_container_ioctl(&sphb->iommu_as, svphb->iommugroupid,
> +                               VFIO_IOMMU_SPAPR_TCE_CREATE, &create);
> +    if (ret) {
> +        return ret;
>      }
>  
> -    /* Register default 32bit DMA window */
> -    memory_region_add_subregion(&sphb->iommu_root, tcet->bus_offset,
> -                                spapr_tce_get_iommu(tcet));
> +    *ptcet = spapr_tce_new_table(DEVICE(sphb), liobn,
> +                                 create.start_addr,
> +                                 page_shift,
> +                                 1ULL << (window_shift - page_shift),
> +                                 true);
> +    if (!*ptcet) {
> +        return -1;
> +    }
> +    memory_region_add_subregion(&sphb->iommu_root, (*ptcet)->bus_offset,
> +                                spapr_tce_get_iommu(*ptcet));
>  
> -    object_unref(OBJECT(tcet));
> +    return ret;
>  }
>  
> -static void spapr_phb_vfio_reset(DeviceState *qdev)
> +static int spapr_pci_vfio_ddw_remove(sPAPRPHBState *sphb, sPAPRTCETable 
> *tcet)
>  {
> -    /* Do nothing */
> +    sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
> +    struct vfio_iommu_spapr_tce_remove remove = {
> +        .argsz = sizeof(remove),
> +        .start_addr = tcet->bus_offset
> +    };
> +    int ret;
> +
> +    spapr_pci_ddw_remove(sphb, tcet);
> +    ret = vfio_container_ioctl(&sphb->iommu_as, svphb->iommugroupid,
> +                               VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove);
> +
> +    return ret;
>  }
>  
>  static void spapr_phb_vfio_class_init(ObjectClass *klass, void *data)
>  {
> -    DeviceClass *dc = DEVICE_CLASS(klass);
>      sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass);
> +    DeviceClass *dc = DEVICE_CLASS(klass);
>  
>      dc->props = spapr_phb_vfio_properties;
> -    dc->reset = spapr_phb_vfio_reset;
> -    spc->finish_realize = spapr_phb_vfio_finish_realize;
> +    spc->ddw_query = spapr_pci_vfio_ddw_query;
> +    spc->ddw_create = spapr_pci_vfio_ddw_create;
> +    spc->ddw_remove = spapr_pci_vfio_ddw_remove;
>  }
>  
>  static const TypeInfo spapr_phb_vfio_info = {
> diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
> index eec95f3..577f908 100644
> --- a/include/hw/pci-host/spapr.h
> +++ b/include/hw/pci-host/spapr.h
> @@ -48,8 +48,6 @@ typedef struct sPAPRPHBVFIOState sPAPRPHBVFIOState;
>  struct sPAPRPHBClass {
>      PCIHostBridgeClass parent_class;
>  
> -    void (*finish_realize)(sPAPRPHBState *sphb, Error **errp);
> -
>  /* sPAPR spec defined pagesize mask values */
>  #define DDW_PGSIZE_4K       0x01
>  #define DDW_PGSIZE_64K      0x02
> @@ -106,6 +104,8 @@ struct sPAPRPHBState {
>      int32_t msi_devs_num;
>      spapr_pci_msi_mig *msi_devs;
>  
> +    bool ddw_enabled;
> +
>      QLIST_ENTRY(sPAPRPHBState) list;
>  };
>  
> @@ -129,6 +129,14 @@ struct sPAPRPHBVFIOState {
>  
>  #define SPAPR_PCI_MSI_WINDOW         0x40000000000ULL
>  
> +#define SPAPR_PCI_TCE32_WIN_SIZE     0x80000000ULL
> +
> +/* Default 64bit dynamic window offset */
> +#define SPAPR_PCI_TCE64_START        0x8000000000000000ULL
> +
> +/* Maximum allowed number of DMA windows for emulated PHB */
> +#define SPAPR_PCI_DDW_MAX_WINDOWS    2
> +
>  static inline qemu_irq spapr_phb_lsi_qirq(struct sPAPRPHBState *phb, int pin)
>  {
>      return xics_get_qirq(spapr->icp, phb->lsi_table[pin].irq);
> @@ -147,5 +155,8 @@ void spapr_pci_rtas_init(void);
>  sPAPRPHBState *spapr_pci_find_phb(sPAPREnvironment *spapr, uint64_t buid);
>  PCIDevice *spapr_pci_find_dev(sPAPREnvironment *spapr, uint64_t buid,
>                                uint32_t config_addr);
> +int spapr_pci_ddw_remove(sPAPRPHBState *sphb, sPAPRTCETable *tcet);
> +int spapr_pci_ddw_reset(sPAPRPHBState *sphb);
> +unsigned spapr_phb_get_win_num(sPAPRPHBState *sphb);
>  
>  #endif /* __HW_SPAPR_PCI_H__ */

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: pgprsYw9mUZjS.pgp
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]