[Qemu-devel] [RFC/RFT PATCH 5/5] vfio/pci: Allow relocating MSI-X MMIO

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [RFC/RFT PATCH 5/5] vfio/pci: Allow relocating MSI-X MMIO

From:	Alex Williamson
Subject:	[Qemu-devel] [RFC/RFT PATCH 5/5] vfio/pci: Allow relocating MSI-X MMIO
Date:	Sun, 17 Dec 2017 22:02:53 -0700
User-agent:	StGit/0.18

With recently proposed kernel side vfio-pci changes, the MSI-X vector
table area can be mmap'd from userspace, allowing direct access to
non-MSI-X registers within the host page size of this area.  However,
we only get that direct access if QEMU isn't also emulating MSI-X
within that same page.  For x86/64 host, the system page size is 4K
and the PCI spec recommends a minimum of 4K to 8K alignment to
separate MSI-X from non-MSI-X registers, therefore only devices which
don't honor this recommendation would see any improvement from this
option.  The real targets for this feature are hosts where the page
size exceeds the PCI spec recommended alignment, such as ARM64 systems
with 64K pages.

This new x-msix-relocation option accepts the following options:

  off: Disable MSI-X relocation, use native device config (default)
  auto: Automaically relocate MSI-X MMIO to another BAR or offset
       based on minimum additional MMIO requirement
  bar0..bar5: Specify the target BAR, which will either be extended
       if the BAR exists or added if the BAR slot is available.

Signed-off-by: Alex Williamson <address@hidden>
---
 hw/vfio/pci.c        |  102 ++++++++++++++++++++++++++++++++++++++++++++++++++
 hw/vfio/pci.h        |    1 
 hw/vfio/trace-events |    2 +
 3 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index c383b842da20..b4426abf297a 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1352,6 +1352,101 @@ static void vfio_pci_fixup_msix_region(VFIOPCIDevice 
*vdev)
     }
 }
 
+static void vfio_pci_relocate_msix(VFIOPCIDevice *vdev)
+{
+    int target_bar = -1;
+    size_t msix_sz;
+
+    if (!vdev->msix || vdev->msix_relo == OFF_AUTOPCIBAR_OFF) {
+        return;
+    }
+
+    /* The actual minimum size of MSI-X structures */
+    msix_sz = (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE) +
+              (QEMU_ALIGN_UP(vdev->msix->entries, 64) / 8);
+    /* Round up to host pages, we don't want to share a page */
+    msix_sz = REAL_HOST_PAGE_ALIGN(msix_sz);
+    /* PCI BARs must be a power of 2 */
+    msix_sz = pow2ceil(msix_sz);
+
+    /* Auto: pick the BAR that incurs the least additional MMIO space */
+    if (vdev->msix_relo == OFF_AUTOPCIBAR_AUTO) {
+        int i;
+        size_t best = UINT64_MAX;
+
+        for (i = 0; i < PCI_ROM_SLOT; i++) {
+            size_t size;
+
+            if (vdev->bars[i].ioport) {
+                continue;
+            }
+
+            /* MSI-X MMIO must reside within first 32bit offset of BAR */
+            if (vdev->bars[i].size > (UINT32_MAX / 2))
+                continue;
+
+            /*
+             * Must be pow2, so larger of double existing or double msix_sz,
+             * or if BAR unimplemented, msix_sz
+             */
+            size = MAX(vdev->bars[i].size * 2,
+                       vdev->bars[i].size ? msix_sz * 2 : msix_sz);
+
+            trace_vfio_msix_relo_cost(vdev->vbasedev.name, i, size);
+
+            if (size < best) {
+                best = size;
+                target_bar = i;
+            }
+
+            if (vdev->bars[i].mem64) {
+              i++;
+            }
+        }
+    } else {
+        target_bar = (int)(vdev->msix_relo - OFF_AUTOPCIBAR_BAR0);
+    }
+
+    if (target_bar < 0 || vdev->bars[target_bar].ioport ||
+        (!vdev->bars[target_bar].size &&
+         target_bar > 0 && vdev->bars[target_bar - 1].mem64)) {
+        return; /* Go BOOM?  Plumb Error */
+    }
+
+    /*
+     * If adding a new BAR, test if we can make it 64bit.  We make it
+     * prefetchable since QEMU MSI-X emulation has no read side effects
+     * and doing so makes mapping more flexible.
+     */
+    if (!vdev->bars[target_bar].size) {
+        if (target_bar < (PCI_ROM_SLOT - 1) &&
+            !vdev->bars[target_bar + 1].size) {
+            vdev->bars[target_bar].mem64 = true;
+            vdev->bars[target_bar].type = PCI_BASE_ADDRESS_MEM_TYPE_64;
+        }
+        vdev->bars[target_bar].type |= PCI_BASE_ADDRESS_MEM_PREFETCH;
+        vdev->bars[target_bar].size = msix_sz;
+        vdev->msix->table_offset = 0;
+    } else {
+        vdev->bars[target_bar].size = MAX(vdev->bars[target_bar].size * 2,
+                                          msix_sz * 2);
+        /*
+         * Due to above size calc, MSI-X always starts halfway into the BAR,
+         * which will always be a separate host page.
+         */
+        vdev->msix->table_offset = vdev->bars[target_bar].size / 2;
+    }
+
+    vdev->msix->table_bar = target_bar;
+    vdev->msix->pba_bar = target_bar;
+    /* Requires 8-byte alignment, but PCI_MSIX_ENTRY_SIZE guarantees that */
+    vdev->msix->pba_offset = vdev->msix->table_offset +
+                                  (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE);
+
+    trace_vfio_msix_relo(vdev->vbasedev.name,
+                         vdev->msix->table_bar, vdev->msix->table_offset);
+}
+
 /*
  * We don't have any control over how pci_add_capability() inserts
  * capabilities into the chain.  In order to setup MSI-X we need a
@@ -1430,6 +1525,8 @@ static void vfio_msix_early_setup(VFIOPCIDevice *vdev, 
Error **errp)
     vdev->msix = msix;
 
     vfio_pci_fixup_msix_region(vdev);
+
+    vfio_pci_relocate_msix(vdev);
 }
 
 static int vfio_msix_setup(VFIOPCIDevice *vdev, int pos, Error **errp)
@@ -2845,13 +2942,14 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 
     vfio_pci_size_rom(vdev);
 
+    vfio_bars_prepare(vdev);
+
     vfio_msix_early_setup(vdev, &err);
     if (err) {
         error_propagate(errp, err);
         goto error;
     }
 
-    vfio_bars_prepare(vdev);
     vfio_bars_register(vdev);
 
     ret = vfio_add_capabilities(vdev, errp);
@@ -3041,6 +3139,8 @@ static Property vfio_pci_dev_properties[] = {
     DEFINE_PROP_UNSIGNED_NODEFAULT("x-nv-gpudirect-clique", VFIOPCIDevice,
                                    nv_gpudirect_clique,
                                    qdev_prop_nv_gpudirect_clique, uint8_t),
+    DEFINE_PROP_OFF_AUTO_PCIBAR("x-msix-relocation", VFIOPCIDevice, msix_relo,
+                                OFF_AUTOPCIBAR_OFF),
     /*
      * TODO - support passed fds... is this necessary?
      * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index dcdb1a806769..588381f201b4 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -135,6 +135,7 @@ typedef struct VFIOPCIDevice {
                                 (1 << VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT)
     int32_t bootindex;
     uint32_t igd_gms;
+    OffAutoPCIBAR msix_relo;
     uint8_t pm_cap;
     uint8_t nv_gpudirect_clique;
     bool pci_aer;
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index fae096c0724f..437ccdd29053 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -16,6 +16,8 @@ vfio_msix_pba_disable(const char *name) " (%s)"
 vfio_msix_pba_enable(const char *name) " (%s)"
 vfio_msix_disable(const char *name) " (%s)"
 vfio_msix_fixup(const char *name, int bar, uint64_t start, uint64_t end) " 
(%s) MSI-X region %d mmap fixup [0x%"PRIx64" - 0x%"PRIx64"]"
+vfio_msix_relo_cost(const char *name, int bar, uint64_t cost) " (%s) BAR %d 
cost 0x%"PRIx64""
+vfio_msix_relo(const char *name, int bar, uint64_t offset) " (%s) BAR %d 
offset 0x%"PRIx64""
 vfio_msi_enable(const char *name, int nr_vectors) " (%s) Enabled %d MSI 
vectors"
 vfio_msi_disable(const char *name) " (%s)"
 vfio_pci_load_rom(const char *name, unsigned long size, unsigned long offset, 
unsigned long flags) "Device %s ROM:\n  size: 0x%lx, offset: 0x%lx, flags: 
0x%lx"

[Prev in Thread]

Current Thread

[Next in Thread]

[Qemu-devel] [RFC/RFT PATCH 0/5] vfio/pci: MSI-X MMIO relocation, Alex Williamson, 2017/12/18
- [Qemu-devel] [RFC/RFT PATCH 1/5] vfio/pci: Fixup VFIOMSIXInfo comment, Alex Williamson, 2017/12/18
- [Qemu-devel] [RFC/RFT PATCH 2/5] vfio/pci: Add base BAR MemoryRegion, Alex Williamson, 2017/12/18
  - Re: [Qemu-devel] [RFC/RFT PATCH 2/5] vfio/pci: Add base BAR MemoryRegion, Alexey Kardashevskiy, 2017/12/18
    - Re: [Qemu-devel] [RFC/RFT PATCH 2/5] vfio/pci: Add base BAR MemoryRegion, Alex Williamson, 2017/12/18
    - Re: [Qemu-devel] [RFC/RFT PATCH 2/5] vfio/pci: Add base BAR MemoryRegion, Alexey Kardashevskiy, 2017/12/18
- [Qemu-devel] [RFC/RFT PATCH 3/5] vfio/pci: Emulate BARs, Alex Williamson, 2017/12/18
- [Qemu-devel] [RFC/RFT PATCH 4/5] qapi: Create DEFINE_PROP_OFF_AUTO_PCIBAR, Alex Williamson, 2017/12/18
- [Qemu-devel] [RFC/RFT PATCH 5/5] vfio/pci: Allow relocating MSI-X MMIO, Alex Williamson <=
  - Re: [Qemu-devel] [RFC/RFT PATCH 5/5] vfio/pci: Allow relocating MSI-X MMIO, Alexey Kardashevskiy, 2017/12/18
    - Re: [Qemu-devel] [RFC/RFT PATCH 5/5] vfio/pci: Allow relocating MSI-X MMIO, Alex Williamson, 2017/12/18
    - Re: [Qemu-devel] [RFC/RFT PATCH 5/5] vfio/pci: Allow relocating MSI-X MMIO, Alexey Kardashevskiy, 2017/12/18
    - Re: [Qemu-devel] [RFC/RFT PATCH 5/5] vfio/pci: Allow relocating MSI-X MMIO, Alex Williamson, 2017/12/18
    - Re: [Qemu-devel] [RFC/RFT PATCH 5/5] vfio/pci: Allow relocating MSI-X MMIO, Alexey Kardashevskiy, 2017/12/18
  - Re: [Qemu-devel] [RFC/RFT PATCH 5/5] vfio/pci: Allow relocating MSI-X MMIO, Alexey Kardashevskiy, 2017/12/18
    - Re: [Qemu-devel] [RFC/RFT PATCH 5/5] vfio/pci: Allow relocating MSI-X MMIO, Alex Williamson, 2017/12/18
    - Re: [Qemu-devel] [RFC/RFT PATCH 5/5] vfio/pci: Allow relocating MSI-X MMIO, Alexey Kardashevskiy, 2017/12/19
    - Re: [Qemu-devel] [RFC/RFT PATCH 5/5] vfio/pci: Allow relocating MSI-X MMIO, Alex Williamson, 2017/12/19
    - Re: [Qemu-devel] [RFC/RFT PATCH 5/5] vfio/pci: Allow relocating MSI-X MMIO, Alexey Kardashevskiy, 2017/12/19

Prev by Date: [Qemu-devel] [RFC/RFT PATCH 4/5] qapi: Create DEFINE_PROP_OFF_AUTO_PCIBAR
Next by Date: Re: [Qemu-devel] [RFC v5 15/26] monitor: let suspend_cnt be thread safe
Previous by thread: [Qemu-devel] [RFC/RFT PATCH 4/5] qapi: Create DEFINE_PROP_OFF_AUTO_PCIBAR
Next by thread: Re: [Qemu-devel] [RFC/RFT PATCH 5/5] vfio/pci: Allow relocating MSI-X MMIO
Index(es):
- Date
- Thread