qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created i


From: Igor Mammedov
Subject: Re: [Qemu-devel] [PATCH V3] virtio: do not require IOMMU to be created in advance
Date: Thu, 9 Mar 2017 10:28:49 +0100

On Thu, 9 Mar 2017 10:32:44 +0800
Jason Wang <address@hidden> wrote:

> On 2017年03月09日 00:40, Igor Mammedov wrote:
> > On Tue, 7 Mar 2017 14:47:30 +0200
> > Marcel Apfelbaum<address@hidden>  wrote:
> >  
> >> On 03/07/2017 11:09 AM, Jason Wang wrote:  
> >>> After commit 96a8821d2141 ("virtio: unbreak virtio-pci with IOMMU
> >>> after caching ring translations"), IOMMU was required to be created in
> >>> advance. This is because we can only get the correct dma_as after pci
> >>> IOMMU (e.g intel_iommu) was initialized. This is suboptimal and
> >>> inconvenient for user. This patch releases this by:
> >>>
> >>> - introduce a bus_master_ready method for PCIDeviceClass and trigger
> >>>    this during pci_init_bus_master
> >>> - implement virtio-pci method and 1) reset the dma_as 2) re-register
> >>>    the memory listener to the new dma_as

Instead of trying to fix up later it's possible to refuse
adding iommu device if other devices has been added before
it with -device/device_add.
That would match current CLI semantics where device that
others depend on should be listed on CLI before that others
are listed.

A bit hackish but something along this lines:

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 9349acb..9d8ecc6 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -460,7 +460,7 @@ void pci_device_deassert_intx(PCIDevice *dev);
 typedef AddressSpace *(*PCIIOMMUFunc)(PCIBus *, void *, int);
 
 AddressSpace *pci_device_iommu_address_space(PCIDevice *dev);
-void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque);
+void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque, Error **errp);
 
 static inline void
 pci_set_byte(uint8_t *config, uint8_t val)
diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index e0732cc..9d9a76b 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1160,7 +1160,7 @@ static void amdvi_realize(DeviceState *dev, Error **err)
 
     sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio);
     sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR);
-    pci_setup_iommu(bus, amdvi_host_dma_iommu, s);
+    pci_setup_iommu(bus, amdvi_host_dma_iommu, s, err);
     s->devid = object_property_get_int(OBJECT(&s->pci), "addr", err);
     msi_init(&s->pci.dev, 0, 1, true, false, err);
     amdvi_init(s);
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 22d8226..f4f208c 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -2585,7 +2585,7 @@ static void vtd_realize(DeviceState *dev, Error **errp)
                                               g_free, g_free);
     vtd_init(s);
     sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, Q35_HOST_BRIDGE_IOMMU_ADDR);
-    pci_setup_iommu(bus, vtd_host_dma_iommu, dev);
+    pci_setup_iommu(bus, vtd_host_dma_iommu, dev, errp);
     /* Pseudo address space under root PCI bus. */
     pcms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC);
 }
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 273f1e4..2d01589 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -317,6 +317,21 @@ static void pci_host_bus_register(DeviceState *host)
     QLIST_INSERT_HEAD(&pci_host_bridges, host_bridge, next);
 }
 
+static bool pcibus_has_devices(PCIBus *bus)
+{
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
+        if (bus->devices[i]) {
+            DeviceState *dev = DEVICE(bus->devices[i]);
+            if (dev->opts) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
 PCIBus *pci_find_primary_bus(void)
 {
     PCIBus *primary_bus = NULL;
@@ -2534,8 +2549,12 @@ AddressSpace *pci_device_iommu_address_space(PCIDevice 
*dev)
     return &address_space_memory;
 }
 
-void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque)
+void pci_setup_iommu(PCIBus *bus, PCIIOMMUFunc fn, void *opaque, Error **errp)
 {
+    if (pcibus_has_devices(bus)) {
+        error_setg(errp, "IOMMU must be created before any other PCI devices");
+        return;
+    }
     bus->iommu_fn = fn;
     bus->iommu_opaque = opaque;
 }



> >>>     
> >> Hi Jason,
> >>  
> >>> Cc: Paolo Bonzini<address@hidden>
> >>> Signed-off-by: Jason Wang<address@hidden>
> >>> ---
> >>> Changes from V2:
> >>> - delay pci_init_bus_master() after pci device is realized to make
> >>>    bus_master_ready a more generic method
> >>> ---
> >>>   hw/pci/pci.c               | 11 ++++++++---
> >>>   hw/virtio/virtio-pci.c     |  9 +++++++++
> >>>   hw/virtio/virtio.c         | 19 +++++++++++++++++++
> >>>   include/hw/pci/pci.h       |  1 +
> >>>   include/hw/virtio/virtio.h |  1 +
> >>>   5 files changed, 38 insertions(+), 3 deletions(-)
> >>>
> >>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> >>> index 273f1e4..22e6ad9 100644
> >>> --- a/hw/pci/pci.c
> >>> +++ b/hw/pci/pci.c
> >>> @@ -83,6 +83,7 @@ static const VMStateDescription vmstate_pcibus = {
> >>>   static void pci_init_bus_master(PCIDevice *pci_dev)
> >>>   {
> >>>       AddressSpace *dma_as = pci_device_iommu_address_space(pci_dev);
> >>> +    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev);
> >>>
> >>>       memory_region_init_alias(&pci_dev->bus_master_enable_region,
> >>>                                OBJECT(pci_dev), "bus master",
> >>> @@ -90,6 +91,9 @@ static void pci_init_bus_master(PCIDevice *pci_dev)
> >>>       memory_region_set_enabled(&pci_dev->bus_master_enable_region, 
> >>> false);
> >>>       address_space_init(&pci_dev->bus_master_as,
> >>>                          &pci_dev->bus_master_enable_region, 
> >>> pci_dev->name);
> >>> +    if (pc->bus_master_ready) {
> >>> +        pc->bus_master_ready(pci_dev);
> >>> +    }
> >>>   }
> >>>
> >>>   static void pcibus_machine_done(Notifier *notifier, void *data)
> >>> @@ -995,9 +999,6 @@ static PCIDevice *do_pci_register_device(PCIDevice 
> >>> *pci_dev, PCIBus *bus,
> >>>       pci_dev->devfn = devfn;
> >>>       pci_dev->requester_id_cache = pci_req_id_cache_get(pci_dev);
> >>>
> >>> -    if (qdev_hotplug) {
> >>> -        pci_init_bus_master(pci_dev);
> >>> -    }
> >>>       pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
> >>>       pci_dev->irq_state = 0;
> >>>       pci_config_alloc(pci_dev);
> >>> @@ -1995,6 +1996,10 @@ static void pci_qdev_realize(DeviceState *qdev, 
> >>> Error **errp)
> >>>           }
> >>>       }
> >>>
> >>> +    if (qdev_hotplug) {
> >>> +        pci_init_bus_master(pci_dev);
> >>> +    }
> >>> +  
> >> How does it help if we move qdev_hotplug check outside 
> >> "do_pci_register_device"?
> >> I suppose you want the code to run after the "realize" function?
> >> If so, what happens if a "realize" function of another device needs the 
> >> bus_master_as
> >> (at hotplug time)?  
> > Conceptually,
> > I'm not sure that inherited device class realize
> > should be aware of uninitialized bus_master,
> > which belongs to PCI device, nor should it initialize
> > it by calling pci_init_bus_master() explicitly,
> > it's parent's class job (PCIDevice).  
> 
> Yes, I was trying to propose a workaround for 2.9. I'm sure we will 
> refine the ordering in 2.10. And consider we have asked libvirt to 
> create IOMMU first, I think I will withdraw the patch.
> 
> >
> > more close to current code:
> > if xen-pci-passthrough were hotplugged then it looks like
> > this hunk could break it:
> > hw/xen/xen_pt.c:
> >   memory_listener_register(&s->memory_listener, &s->dev.bus_master_as);
> > would happen with uninitialized bus_master_as
> > as realize is called before pci_init_bus_master();  
> 
> Yes, this won't work. This is exactly the same issue as virtio, but this 
> will also break if it was created before an IOMMU.
> 
> >
> > So the same question as Marcel's but other way around,
> > why this hunk "has to" be moved.
> >
> >  
> 
> Right.
> 
> Thanks




reply via email to

[Prev in Thread] Current Thread [Next in Thread]