qemu-ppc
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-ppc] [PATCH qemu 1/2] spapr_pci_vfio: Remove redundant spapr-p


From: David Gibson
Subject: Re: [Qemu-ppc] [PATCH qemu 1/2] spapr_pci_vfio: Remove redundant spapr-pci-vfio-host-bridge
Date: Thu, 3 Sep 2015 12:05:45 +1000
User-agent: Mutt/1.5.23 (2014-03-12)

On Wed, Sep 02, 2015 at 06:16:02PM +1000, Alexey Kardashevskiy wrote:
> sPAPRTCETable is handling 2 TCE tables already:
> 
> 1) guest view of the TCE table - emulated devices use only this table;
> 
> 2) hardware IOMMU table - VFIO PCI devices use it for actual work but
> it does not replace 1) and it is not visible to the guest.
> The initialization of this table is driven by vfio-pci device,
> DMA map/unmap requests are handled via MemoryListener so there is very
> little to do in spapr-pci-vfio-host-bridge.
> 
> This moves VFIO bits to the generic spapr-pci-host-bridge which allows
> putting emulated and VFIO devices on the same PHB. It is still possible
> to create multiple PHBs and avoid sharing PHB resouces for emulated and
> VFIO devices.
> 
> If there is no VFIO-PCI device attaches, no special ioctls will be called.
> If there are some VFIO-PCI devices attached, PHB may refuse to attach
> another VFIO-PCI device if a VFIO container on the host kernel side
> does not support container sharing.
> 
> This makes spapr-pci-vfio-host-bridge type equal to spapr-pci-host-bridge
> except it has an additional "iommu" property so spapr-pci-vfio-host-bridge
> still should be used for VFIO devices. The next patch will remove IOMMU ID
> property and allow putting VFIO-PCI devices onto spapr-pci-host-bridge.
> 
> This adds a number of VFIO-PCI devices currently attached to a PHB as
> PHB needs to know whether to do DMA setup for VFIO or not. Since
> at the moment of the PHB's realize() invocation we cannot tell yet
> how many VFIO-PCI devices are there (they are not attached yet),
> this moves DMA setup to the reset handler.
> 
> This moves PCI device lookup from spapr_phb_vfio_eeh_set_option() to
> rtas_ibm_set_eeh_option() as we need to know if the device is "vfio-pci"
> and decide whether to call spapr_phb_vfio_eeh_set_option() or not.
> 
> This should cause no behavioural change.
> 
> Signed-off-by: Alexey Kardashevskiy <address@hidden>

[snip]
>  static int spapr_phb_children_reset(Object *child, void *opaque)
>  {
>      DeviceState *dev = (DeviceState *) object_dynamic_cast(child, 
> TYPE_DEVICE);
> @@ -1413,8 +1401,42 @@ static int spapr_phb_children_reset(Object *child, 
> void *opaque)
>  
>  static void spapr_phb_reset(DeviceState *qdev)
>  {
> +    sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(qdev);
> +    sPAPRTCETable *tcet;
> +
>      /* Reset the IOMMU state */
>      object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL);
> +
> +    if (spapr_phb_dma_capabilities_update(sphb)) {
> +        return;
> +    }
> +
> +    /* Register default 32bit DMA window */
> +    tcet = spapr_tce_find_by_liobn(sphb->dma_liobn);
> +    if (!tcet) {
> +        const unsigned nb = sphb->dma32_window_size >> SPAPR_TCE_PAGE_SHIFT;
> +        tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
> +                                   sphb->dma32_window_start,
> +                                   SPAPR_TCE_PAGE_SHIFT, nb,
> +                                   sphb->vfio_num > 0);

Could delaying the construction of the TCE table object until reset
time cause problems with migration?  i.e. can you be sure that the
destination will have the TCE table object present and in a suitable
state to accept the incoming table information from the source?

> +        if (!tcet) {
> +            error_report("No default TCE table for %s", sphb->dtbusname);
> +            return;
> +        }
> +
> +        memory_region_add_subregion(&sphb->iommu_root, 0,
> +                                    spapr_tce_get_iommu(tcet));
> +    }
> +
> +    if (sphb->vfio_num) {
> +        /*
> +         * The PE might be in frozen state. To reenable the EEH
> +         * functionality on it will clean the frozen state, which
> +         * ensures that the contained PCI devices will work properly
> +         * after reboot.
> +         */
> +        spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev));
> +    }
>  }
>  
>  static Property spapr_phb_properties[] = {
> @@ -1535,7 +1557,6 @@ static void spapr_phb_class_init(ObjectClass *klass, 
> void *data)
>  {
>      PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
>      DeviceClass *dc = DEVICE_CLASS(klass);
> -    sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass);
>      HotplugHandlerClass *hp = HOTPLUG_HANDLER_CLASS(klass);
>  
>      hc->root_bus_path = spapr_phb_root_bus_path;
> @@ -1545,7 +1566,6 @@ static void spapr_phb_class_init(ObjectClass *klass, 
> void *data)
>      dc->vmsd = &vmstate_spapr_pci;
>      set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
>      dc->cannot_instantiate_with_device_add_yet = false;
> -    spc->finish_realize = spapr_phb_finish_realize;
>      hp->plug = spapr_phb_hot_plug_child;
>      hp->unplug = spapr_phb_hot_unplug_child;
>  }
> diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
> index cca45ed..f94d8a4 100644
> --- a/hw/ppc/spapr_pci_vfio.c
> +++ b/hw/ppc/spapr_pci_vfio.c
> @@ -20,84 +20,47 @@
>  #include "hw/ppc/spapr.h"
>  #include "hw/pci-host/spapr.h"
>  #include "hw/pci/msix.h"
> -#include "linux/vfio.h"
>  #include "hw/vfio/vfio.h"
>  
> +#ifdef CONFIG_LINUX
> +#include "linux/vfio.h"
> +
>  static Property spapr_phb_vfio_properties[] = {
> -    DEFINE_PROP_INT32("iommu", sPAPRPHBVFIOState, iommugroupid, -1),
> +    DEFINE_PROP_INT32("iommu", sPAPRPHBState, iommugroupid, -1),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> -static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp)
> +int spapr_phb_vfio_dma_capabilities_update(sPAPRPHBState *sphb)
>  {
> -    sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
>      struct vfio_iommu_spapr_tce_info info = { .argsz = sizeof(info) };
>      int ret;
> -    sPAPRTCETable *tcet;
> -    uint32_t liobn = svphb->phb.dma_liobn;
>  
> -    if (svphb->iommugroupid == -1) {
> -        error_setg(errp, "Wrong IOMMU group ID %d", svphb->iommugroupid);
> -        return;
> -    }
> -
> -    ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
> -                               VFIO_CHECK_EXTENSION,
> -                               (void *) VFIO_SPAPR_TCE_IOMMU);
> -    if (ret != 1) {
> -        error_setg_errno(errp, -ret,
> -                         "spapr-vfio: SPAPR extension is not supported");
> -        return;
> -    }
> -
> -    ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
> +    ret = vfio_container_ioctl(&sphb->iommu_as, sphb->iommugroupid,
>                                 VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
>      if (ret) {
> -        error_setg_errno(errp, -ret,
> -                         "spapr-vfio: get info from container failed");
> -        return;
> +        return ret;
>      }
>  
> -    tcet = spapr_tce_new_table(DEVICE(sphb), liobn, info.dma32_window_start,
> -                               SPAPR_TCE_PAGE_SHIFT,
> -                               info.dma32_window_size >> 
> SPAPR_TCE_PAGE_SHIFT,
> -                               true);
> -    if (!tcet) {
> -        error_setg(errp, "spapr-vfio: failed to create VFIO TCE table");
> -        return;
> -    }
> +    sphb->dma32_window_start = info.dma32_window_start;
> +    sphb->dma32_window_size = info.dma32_window_size;
>  
> -    /* Register default 32bit DMA window */
> -    memory_region_add_subregion(&sphb->iommu_root, tcet->bus_offset,
> -                                spapr_tce_get_iommu(tcet));
> +    return ret;
>  }
>  
> -static void spapr_phb_vfio_eeh_reenable(sPAPRPHBVFIOState *svphb)
> +void spapr_phb_vfio_eeh_reenable(sPAPRPHBState *sphb)
>  {
>      struct vfio_eeh_pe_op op = {
>          .argsz = sizeof(op),
>          .op    = VFIO_EEH_PE_ENABLE
>      };
>  
> -    vfio_container_ioctl(&svphb->phb.iommu_as,
> -                         svphb->iommugroupid, VFIO_EEH_PE_OP, &op);
> +    vfio_container_ioctl(&sphb->iommu_as,
> +                         sphb->iommugroupid, VFIO_EEH_PE_OP, &op);
>  }
>  
> -static void spapr_phb_vfio_reset(DeviceState *qdev)
> +int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb,
> +                                  PCIDevice *pdev, int option)
>  {
> -    /*
> -     * The PE might be in frozen state. To reenable the EEH
> -     * functionality on it will clean the frozen state, which
> -     * ensures that the contained PCI devices will work properly
> -     * after reboot.
> -     */
> -    spapr_phb_vfio_eeh_reenable(SPAPR_PCI_VFIO_HOST_BRIDGE(qdev));
> -}
> -
> -static int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb,
> -                                         unsigned int addr, int option)
> -{
> -    sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
>      struct vfio_eeh_pe_op op = { .argsz = sizeof(op) };
>      int ret;
>  
> @@ -105,25 +68,9 @@ static int spapr_phb_vfio_eeh_set_option(sPAPRPHBState 
> *sphb,
>      case RTAS_EEH_DISABLE:
>          op.op = VFIO_EEH_PE_DISABLE;
>          break;
> -    case RTAS_EEH_ENABLE: {
> -        PCIHostState *phb;
> -        PCIDevice *pdev;
> -
> -        /*
> -         * The EEH functionality is enabled on basis of PCI device,
> -         * instead of PE. We need check the validity of the PCI
> -         * device address.
> -         */
> -        phb = PCI_HOST_BRIDGE(sphb);
> -        pdev = pci_find_device(phb->bus,
> -                               (addr >> 16) & 0xFF, (addr >> 8) & 0xFF);
> -        if (!pdev) {
> -            return RTAS_OUT_PARAM_ERROR;
> -        }
> -
> +    case RTAS_EEH_ENABLE:
>          op.op = VFIO_EEH_PE_ENABLE;
>          break;
> -    }
>      case RTAS_EEH_THAW_IO:
>          op.op = VFIO_EEH_PE_UNFREEZE_IO;
>          break;
> @@ -134,7 +81,7 @@ static int spapr_phb_vfio_eeh_set_option(sPAPRPHBState 
> *sphb,
>          return RTAS_OUT_PARAM_ERROR;
>      }
>  
> -    ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
> +    ret = vfio_container_ioctl(&sphb->iommu_as, sphb->iommugroupid,
>                                 VFIO_EEH_PE_OP, &op);
>      if (ret < 0) {
>          return RTAS_OUT_HW_ERROR;
> @@ -143,14 +90,13 @@ static int spapr_phb_vfio_eeh_set_option(sPAPRPHBState 
> *sphb,
>      return RTAS_OUT_SUCCESS;
>  }
>  
> -static int spapr_phb_vfio_eeh_get_state(sPAPRPHBState *sphb, int *state)
> +int spapr_phb_vfio_eeh_get_state(sPAPRPHBState *sphb, int *state)
>  {
> -    sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
>      struct vfio_eeh_pe_op op = { .argsz = sizeof(op) };
>      int ret;
>  
>      op.op = VFIO_EEH_PE_GET_STATE;
> -    ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
> +    ret = vfio_container_ioctl(&sphb->iommu_as, sphb->iommugroupid,
>                                 VFIO_EEH_PE_OP, &op);
>      if (ret < 0) {
>          return RTAS_OUT_PARAM_ERROR;
> @@ -203,9 +149,8 @@ static void spapr_phb_vfio_eeh_pre_reset(sPAPRPHBState 
> *sphb)
>         pci_for_each_bus(phb->bus, spapr_phb_vfio_eeh_clear_bus_msix, NULL);
>  }
>  
> -static int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option)
> +int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option)
>  {
> -    sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
>      struct vfio_eeh_pe_op op = { .argsz = sizeof(op) };
>      int ret;
>  
> @@ -225,7 +170,7 @@ static int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, 
> int option)
>          return RTAS_OUT_PARAM_ERROR;
>      }
>  
> -    ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
> +    ret = vfio_container_ioctl(&sphb->iommu_as, sphb->iommugroupid,
>                                 VFIO_EEH_PE_OP, &op);
>      if (ret < 0) {
>          return RTAS_OUT_HW_ERROR;
> @@ -234,14 +179,13 @@ static int spapr_phb_vfio_eeh_reset(sPAPRPHBState 
> *sphb, int option)
>      return RTAS_OUT_SUCCESS;
>  }
>  
> -static int spapr_phb_vfio_eeh_configure(sPAPRPHBState *sphb)
> +int spapr_phb_vfio_eeh_configure(sPAPRPHBState *sphb)
>  {
> -    sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
>      struct vfio_eeh_pe_op op = { .argsz = sizeof(op) };
>      int ret;
>  
>      op.op = VFIO_EEH_PE_CONFIGURE;
> -    ret = vfio_container_ioctl(&svphb->phb.iommu_as, svphb->iommugroupid,
> +    ret = vfio_container_ioctl(&sphb->iommu_as, sphb->iommugroupid,
>                                 VFIO_EEH_PE_OP, &op);
>      if (ret < 0) {
>          return RTAS_OUT_PARAM_ERROR;
> @@ -253,23 +197,14 @@ static int spapr_phb_vfio_eeh_configure(sPAPRPHBState 
> *sphb)
>  static void spapr_phb_vfio_class_init(ObjectClass *klass, void *data)
>  {
>      DeviceClass *dc = DEVICE_CLASS(klass);
> -    sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass);
>  
>      dc->props = spapr_phb_vfio_properties;
> -    dc->reset = spapr_phb_vfio_reset;
> -    spc->finish_realize = spapr_phb_vfio_finish_realize;
> -    spc->eeh_set_option = spapr_phb_vfio_eeh_set_option;
> -    spc->eeh_get_state = spapr_phb_vfio_eeh_get_state;
> -    spc->eeh_reset = spapr_phb_vfio_eeh_reset;
> -    spc->eeh_configure = spapr_phb_vfio_eeh_configure;
>  }
>  
>  static const TypeInfo spapr_phb_vfio_info = {
>      .name          = TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE,
>      .parent        = TYPE_SPAPR_PCI_HOST_BRIDGE,
> -    .instance_size = sizeof(sPAPRPHBVFIOState),
>      .class_init    = spapr_phb_vfio_class_init,
> -    .class_size    = sizeof(sPAPRPHBClass),
>  };
>  
>  static void spapr_pci_vfio_register_types(void)
> @@ -278,3 +213,36 @@ static void spapr_pci_vfio_register_types(void)
>  }
>  
>  type_init(spapr_pci_vfio_register_types)
> +
> +#else /* !CONFIG_LINUX */
> +
> +int spapr_phb_vfio_dma_capabilities_update(sPAPRPHBState *sphb)
> +{
> +    return -EINVAL;
> +}
> +
> +int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb,
> +                                  PCIDevice *pdev, int option)
> +{
> +    return RTAS_OUT_HW_ERROR;
> +}
> +
> +int spapr_phb_vfio_eeh_get_state(sPAPRPHBState *sphb, int *state)
> +{
> +    return RTAS_OUT_HW_ERROR;
> +}
> +
> +int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option)
> +{
> +    return RTAS_OUT_HW_ERROR;
> +}
> +
> +int spapr_phb_vfio_eeh_configure(sPAPRPHBState *sphb)
> +{
> +    return RTAS_OUT_HW_ERROR;
> +}
> +
> +void spapr_phb_vfio_eeh_reenable(sPAPRPHBState *sphb)
> +{
> +}
> +#endif
> diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
> index 5322b56..7f3c712 100644
> --- a/include/hw/pci-host/spapr.h
> +++ b/include/hw/pci-host/spapr.h
> @@ -47,12 +47,6 @@ typedef struct sPAPRPHBVFIOState sPAPRPHBVFIOState;
>  
>  struct sPAPRPHBClass {
>      PCIHostBridgeClass parent_class;
> -
> -    void (*finish_realize)(sPAPRPHBState *sphb, Error **errp);
> -    int (*eeh_set_option)(sPAPRPHBState *sphb, unsigned int addr, int 
> option);
> -    int (*eeh_get_state)(sPAPRPHBState *sphb, int *state);
> -    int (*eeh_reset)(sPAPRPHBState *sphb, int option);
> -    int (*eeh_configure)(sPAPRPHBState *sphb);
>  };
>  
>  typedef struct spapr_pci_msi {
> @@ -91,12 +85,11 @@ struct sPAPRPHBState {
>      spapr_pci_msi_mig *msi_devs;
>  
>      QLIST_ENTRY(sPAPRPHBState) list;
> -};
>  
> -struct sPAPRPHBVFIOState {
> -    sPAPRPHBState phb;
> -
> -    int32_t iommugroupid;
> +    int32_t iommugroupid; /* obsolete */
> +    uint32_t dma32_window_start;
> +    uint32_t dma32_window_size;
> +    unsigned vfio_num;
>  };
>  
>  #define SPAPR_PCI_MAX_INDEX          255
> @@ -138,4 +131,12 @@ sPAPRPHBState *spapr_pci_find_phb(sPAPRMachineState 
> *spapr, uint64_t buid);
>  PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, uint64_t buid,
>                                uint32_t config_addr);
>  
> +int spapr_phb_vfio_dma_capabilities_update(sPAPRPHBState *sphb);
> +int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb,
> +                                  PCIDevice *pdev, int option);
> +int spapr_phb_vfio_eeh_get_state(sPAPRPHBState *sphb, int *state);
> +int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option);
> +int spapr_phb_vfio_eeh_configure(sPAPRPHBState *sphb);
> +void spapr_phb_vfio_eeh_reenable(sPAPRPHBState *sphb);
> +
>  #endif /* __HW_SPAPR_PCI_H__ */

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: pgp3leSV1J29G.pgp
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]