qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH] msix: Support specifying offsets, BARs, and capabil


From: Alex Williamson
Subject: [Qemu-devel] [PATCH] msix: Support specifying offsets, BARs, and capability location
Date: Tue, 12 Jun 2012 14:03:26 -0600
User-agent: StGIT/0.14.3

msix_init has very little configurability as to how it lays out MSIX
for a device.  It claims to resize BARs, but doesn't actually do this
anymore.  This patch allows MSIX to be fully specified, which is
necessary both for emulated devices trying to match the physical
layout of a hardware device as well as for any kind of device
assignment.

New functions msix_init_bar & msix_uninit_bar provide wrappers around
the more detailed functions for drivers that just want a simple MSIX
setup.

Signed-off-by: Alex Williamson <address@hidden>
---

 hw/ivshmem.c    |    9 +-
 hw/msix.c       |  299 +++++++++++++++++++++++++++++++------------------------
 hw/msix.h       |   11 +-
 hw/pci.h        |   12 ++
 hw/virtio-pci.c |   15 +--
 5 files changed, 192 insertions(+), 154 deletions(-)

diff --git a/hw/ivshmem.c b/hw/ivshmem.c
index 05559b6..71c84a6 100644
--- a/hw/ivshmem.c
+++ b/hw/ivshmem.c
@@ -563,16 +563,13 @@ static uint64_t ivshmem_get_size(IVShmemState * s) {
 
 static void ivshmem_setup_msi(IVShmemState * s)
 {
-    memory_region_init(&s->msix_bar, "ivshmem-msix", 4096);
-    if (!msix_init(&s->dev, s->vectors, &s->msix_bar, 1, 0)) {
-        pci_register_bar(&s->dev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY,
-                         &s->msix_bar);
-        IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors);
-    } else {
+    if (msix_init_bar(&s->dev, s->vectors, &s->msix_bar, 1, "ivshmem-msix")) {
         IVSHMEM_DPRINTF("msix initialization failed\n");
         exit(1);
     }
 
+    IVSHMEM_DPRINTF("msix initialized (%d vectors)\n", s->vectors);
+
     /* allocate QEMU char devices for receiving interrupts */
     s->eventfd_table = g_malloc0(s->vectors * sizeof(EventfdEntry));
 
diff --git a/hw/msix.c b/hw/msix.c
index b64f109..ee70022 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -27,17 +27,9 @@
 #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
 #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
 
-/* How much space does an MSIX table need. */
-/* The spec requires giving the table structure
- * a 4K aligned region all by itself. */
-#define MSIX_PAGE_SIZE 0x1000
-/* Reserve second half of the page for pending bits */
-#define MSIX_PAGE_PENDING (MSIX_PAGE_SIZE / 2)
-#define MSIX_MAX_ENTRIES 32
-
 static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
 {
-    uint8_t *table_entry = dev->msix_table_page + vector * PCI_MSIX_ENTRY_SIZE;
+    uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
     MSIMessage msg;
 
     msg.address = pci_get_quad(table_entry + PCI_MSIX_ENTRY_LOWER_ADDR);
@@ -45,57 +37,6 @@ static MSIMessage msix_get_message(PCIDevice *dev, unsigned 
vector)
     return msg;
 }
 
-/* Add MSI-X capability to the config space for the device. */
-/* Given a bar and its size, add MSI-X table on top of it
- * and fill MSI-X capability in the config space.
- * Original bar size must be a power of 2 or 0.
- * New bar size is returned. */
-static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
-                           unsigned bar_nr, unsigned bar_size)
-{
-    int config_offset;
-    uint8_t *config;
-
-    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1)
-        return -EINVAL;
-    if (bar_size > 0x80000000)
-        return -ENOSPC;
-
-    /* Require aligned offset for MSI-X structures */
-    if (bar_size & ~(MSIX_PAGE_SIZE - 1)) {
-        return -EINVAL;
-    }
-
-    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX,
-                                       0, MSIX_CAP_LENGTH);
-    if (config_offset < 0)
-        return config_offset;
-    config = pdev->config + config_offset;
-
-    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
-    /* Table on top of BAR */
-    pci_set_long(config + PCI_MSIX_TABLE, bar_size | bar_nr);
-    /* Pending bits on top of that */
-    pci_set_long(config + PCI_MSIX_PBA, (bar_size + MSIX_PAGE_PENDING) |
-                 bar_nr);
-    pdev->msix_cap = config_offset;
-    /* Make flags bit writable. */
-    pdev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
-           MSIX_MASKALL_MASK;
-    pdev->msix_function_masked = true;
-    return 0;
-}
-
-static uint64_t msix_mmio_read(void *opaque, target_phys_addr_t addr,
-                               unsigned size)
-{
-    PCIDevice *dev = opaque;
-    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3;
-    void *page = dev->msix_table_page;
-
-    return pci_get_long(page + offset);
-}
-
 static uint8_t msix_pending_mask(int vector)
 {
     return 1 << (vector % 8);
@@ -103,7 +44,7 @@ static uint8_t msix_pending_mask(int vector)
 
 static uint8_t *msix_pending_byte(PCIDevice *dev, int vector)
 {
-    return dev->msix_table_page + MSIX_PAGE_PENDING + vector / 8;
+    return dev->msix_pba + vector / 8;
 }
 
 static int msix_is_pending(PCIDevice *dev, int vector)
@@ -124,7 +65,7 @@ static void msix_clr_pending(PCIDevice *dev, int vector)
 static bool msix_vector_masked(PCIDevice *dev, int vector, bool fmask)
 {
     unsigned offset = vector * PCI_MSIX_ENTRY_SIZE + 
PCI_MSIX_ENTRY_VECTOR_CTRL;
-    return fmask || dev->msix_table_page[offset] & PCI_MSIX_ENTRY_CTRL_MASKBIT;
+    return fmask || dev->msix_table[offset] & PCI_MSIX_ENTRY_CTRL_MASKBIT;
 }
 
 static bool msix_is_masked(PCIDevice *dev, int vector)
@@ -203,27 +144,29 @@ void msix_write_config(PCIDevice *dev, uint32_t addr,
     }
 }
 
-static void msix_mmio_write(void *opaque, target_phys_addr_t addr,
-                            uint64_t val, unsigned size)
+static uint64_t msix_table_mmio_read(void *opaque, target_phys_addr_t addr,
+                                     unsigned size)
 {
     PCIDevice *dev = opaque;
-    unsigned int offset = addr & (MSIX_PAGE_SIZE - 1) & ~0x3;
-    int vector = offset / PCI_MSIX_ENTRY_SIZE;
-    bool was_masked;
 
-    /* MSI-X page includes a read-only PBA and a writeable Vector Control. */
-    if (vector >= dev->msix_entries_nr) {
-        return;
-    }
+    return pci_get_long(dev->msix_table + addr);
+}
+
+static void msix_table_mmio_write(void *opaque, target_phys_addr_t addr,
+                                  uint64_t val, unsigned size)
+{
+    PCIDevice *dev = opaque;
+    int vector = addr / PCI_MSIX_ENTRY_SIZE;
+    bool was_masked;
 
     was_masked = msix_is_masked(dev, vector);
-    pci_set_long(dev->msix_table_page + offset, val);
+    pci_set_long(dev->msix_table + addr, val);
     msix_handle_mask_update(dev, vector, was_masked);
 }
 
-static const MemoryRegionOps msix_mmio_ops = {
-    .read = msix_mmio_read,
-    .write = msix_mmio_write,
+static const MemoryRegionOps msix_table_mmio_ops = {
+    .read = msix_table_mmio_read,
+    .write = msix_table_mmio_write,
     .endianness = DEVICE_NATIVE_ENDIAN,
     .valid = {
         .min_access_size = 4,
@@ -231,17 +174,23 @@ static const MemoryRegionOps msix_mmio_ops = {
     },
 };
 
-static void msix_mmio_setup(PCIDevice *d, MemoryRegion *bar)
+static uint64_t msix_pba_mmio_read(void *opaque, target_phys_addr_t addr,
+                                   unsigned size)
 {
-    uint8_t *config = d->config + d->msix_cap;
-    uint32_t table = pci_get_long(config + PCI_MSIX_TABLE);
-    uint32_t offset = table & ~(MSIX_PAGE_SIZE - 1);
-    /* TODO: for assigned devices, we'll want to make it possible to map
-     * pending bits separately in case they are in a separate bar. */
+    PCIDevice *dev = opaque;
 
-    memory_region_add_subregion(bar, offset, &d->msix_mmio);
+    return pci_get_long(dev->msix_pba + addr);
 }
 
+static const MemoryRegionOps msix_pba_mmio_ops = {
+    .read = msix_pba_mmio_read,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    },
+};
+
 static void msix_mask_all(struct PCIDevice *dev, unsigned nentries)
 {
     int vector;
@@ -251,52 +200,154 @@ static void msix_mask_all(struct PCIDevice *dev, 
unsigned nentries)
             vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
         bool was_masked = msix_is_masked(dev, vector);
 
-        dev->msix_table_page[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
+        dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
         msix_handle_mask_update(dev, vector, was_masked);
     }
 }
 
-/* Initialize the MSI-X structures. Note: if MSI-X is supported, BAR size is
- * modified, it should be retrieved with msix_bar_size. */
+/* Add MSI-X capability to the config space for the device. */
+static int msix_add_config(struct PCIDevice *pdev, unsigned short nentries,
+                           uint8_t table_bar, unsigned table_offset,
+                           uint8_t pba_bar, unsigned pba_offset, uint8_t pos)
+{
+    int config_offset;
+    uint8_t *config;
+
+    if (nentries < 1 || nentries > PCI_MSIX_FLAGS_QSIZE + 1) {
+        return -EINVAL;
+    }
+
+    config_offset = pci_add_capability(pdev, PCI_CAP_ID_MSIX,
+                                       pos, MSIX_CAP_LENGTH);
+    if (config_offset < 0) {
+        return config_offset;
+    }
+
+    config = pdev->config + config_offset;
+
+    pci_set_word(config + PCI_MSIX_FLAGS, nentries - 1);
+    pci_set_long(config + PCI_MSIX_TABLE, table_offset | table_bar);
+    pci_set_long(config + PCI_MSIX_PBA, pba_offset | pba_bar);
+
+    pdev->msix_cap = config_offset;
+
+    /* Make flags bit writable. */
+    pdev->wmask[config_offset + MSIX_CONTROL_OFFSET] |= MSIX_ENABLE_MASK |
+                                                        MSIX_MASKALL_MASK;
+
+    pdev->msix_function_masked = true;
+
+    return 0;
+}
+
+/* Clean up resources for the device. */
+void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion 
*pba_bar)
+{
+    if (!(dev->cap_present & QEMU_PCI_CAP_MSIX)) {
+        return;
+    }
+
+    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
+    dev->msix_cap = 0;
+    dev->msix_entries_nr = 0;
+
+    memory_region_del_subregion(pba_bar, &dev->msix_pba_mmio);
+    memory_region_destroy(&dev->msix_pba_mmio);
+    g_free(dev->msix_pba);
+    dev->msix_pba = NULL;
+
+    memory_region_del_subregion(table_bar, &dev->msix_table_mmio);
+    memory_region_destroy(&dev->msix_table_mmio);
+    g_free(dev->msix_table);
+    dev->msix_table = NULL;
+
+    g_free(dev->msix_entry_used);
+    dev->msix_entry_used = NULL;
+    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
+}
+
+/* Initialize the MSI-X structures */
 int msix_init(struct PCIDevice *dev, unsigned short nentries,
-              MemoryRegion *bar,
-              unsigned bar_nr, unsigned bar_size)
+              MemoryRegion *table_bar, uint8_t table_bar_nr,
+              unsigned table_offset, MemoryRegion *pba_bar,
+              uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos)
 {
     int ret;
+    unsigned table_size, pba_size;
 
     /* Nothing to do if MSI is not supported by interrupt controller */
     if (!msi_supported) {
         return -ENOTSUP;
     }
-    if (nentries > MSIX_MAX_ENTRIES)
+
+    table_size = nentries * PCI_MSIX_ENTRY_SIZE;
+    pba_size = QEMU_ALIGN_UP(nentries, 64) / 8;
+
+    /* Sanity test, vector table & pba don't overlap and fit within BARs */
+    if ((table_bar_nr == pba_bar_nr &&
+         ranges_overlap(table_offset, table_size, pba_offset, pba_size)) ||
+        table_offset + table_size > memory_region_size(table_bar) ||
+        pba_offset + pba_size > memory_region_size(pba_bar)) {
         return -EINVAL;
+    }
 
-    dev->msix_entry_used = g_malloc0(MSIX_MAX_ENTRIES *
-                                        sizeof *dev->msix_entry_used);
+    dev->msix_table = g_malloc0(table_size);
+    dev->msix_pba = g_malloc0(pba_size);
+    dev->msix_entry_used = g_malloc0(nentries * sizeof *dev->msix_entry_used);
+    dev->msix_entries_nr = nentries;
+    dev->cap_present |= QEMU_PCI_CAP_MSIX;
 
-    dev->msix_table_page = g_malloc0(MSIX_PAGE_SIZE);
     msix_mask_all(dev, nentries);
 
-    memory_region_init_io(&dev->msix_mmio, &msix_mmio_ops, dev,
-                          "msix", MSIX_PAGE_SIZE);
+    memory_region_init_io(&dev->msix_table_mmio, &msix_table_mmio_ops, dev,
+                          "msix", table_size);
+    memory_region_add_subregion(table_bar, table_offset, 
&dev->msix_table_mmio);
 
-    dev->msix_entries_nr = nentries;
-    ret = msix_add_config(dev, nentries, bar_nr, bar_size);
-    if (ret)
-        goto err_config;
+    memory_region_init_io(&dev->msix_pba_mmio, &msix_pba_mmio_ops, dev,
+                          "msix-pba", pba_size);
+    memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio);
+
+    ret = msix_add_config(dev, nentries, table_bar_nr, table_offset,
+                          pba_bar_nr, pba_offset, cap_pos);
+    if (ret) {
+        msix_uninit(dev, table_bar, pba_bar);
+        return ret;
+    }
 
-    dev->cap_present |= QEMU_PCI_CAP_MSIX;
-    msix_mmio_setup(dev, bar);
     return 0;
+}
 
-err_config:
-    dev->msix_entries_nr = 0;
-    memory_region_destroy(&dev->msix_mmio);
-    g_free(dev->msix_table_page);
-    dev->msix_table_page = NULL;
-    g_free(dev->msix_entry_used);
-    dev->msix_entry_used = NULL;
-    return ret;
+int msix_init_bar(PCIDevice *pdev, unsigned short nentries,
+                  MemoryRegion *bar, uint8_t bar_nr, const char *name)
+{
+    int ret;
+
+    /*
+     * Migration compatibility dictates that this remains a 4k
+     * BAR with the vector table in the lower half and PBA in
+     * the upper half.
+     */
+    if (nentries * PCI_MSIX_ENTRY_SIZE > 2048) {
+        return -EINVAL;
+    }
+
+    memory_region_init(bar, name, 4096);
+
+    ret = msix_init(pdev, nentries, bar, bar_nr, 0, bar, bar_nr, 2048, 0);
+    if (ret) {
+        memory_region_destroy(bar);
+        return ret;
+    }
+
+    pci_register_bar(pdev, bar_nr, PCI_BASE_ADDRESS_SPACE_MEMORY, bar);
+
+    return 0;
+}
+
+void msix_uninit_bar(PCIDevice *pdev, MemoryRegion *bar)
+{
+    msix_uninit(pdev, bar, bar);
+    memory_region_destroy(bar);
 }
 
 static void msix_free_irq_entries(PCIDevice *dev)
@@ -309,26 +360,6 @@ static void msix_free_irq_entries(PCIDevice *dev)
     }
 }
 
-/* Clean up resources for the device. */
-int msix_uninit(PCIDevice *dev, MemoryRegion *bar)
-{
-    if (!msix_present(dev)) {
-        return 0;
-    }
-    pci_del_capability(dev, PCI_CAP_ID_MSIX, MSIX_CAP_LENGTH);
-    dev->msix_cap = 0;
-    msix_free_irq_entries(dev);
-    dev->msix_entries_nr = 0;
-    memory_region_del_subregion(bar, &dev->msix_mmio);
-    memory_region_destroy(&dev->msix_mmio);
-    g_free(dev->msix_table_page);
-    dev->msix_table_page = NULL;
-    g_free(dev->msix_entry_used);
-    dev->msix_entry_used = NULL;
-    dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
-    return 0;
-}
-
 void msix_save(PCIDevice *dev, QEMUFile *f)
 {
     unsigned n = dev->msix_entries_nr;
@@ -337,8 +368,8 @@ void msix_save(PCIDevice *dev, QEMUFile *f)
         return;
     }
 
-    qemu_put_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE);
-    qemu_put_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
+    qemu_put_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
+    qemu_put_buffer(f, dev->msix_pba, (n + 7) / 8);
 }
 
 /* Should be called after restoring the config space. */
@@ -352,8 +383,8 @@ void msix_load(PCIDevice *dev, QEMUFile *f)
     }
 
     msix_free_irq_entries(dev);
-    qemu_get_buffer(f, dev->msix_table_page, n * PCI_MSIX_ENTRY_SIZE);
-    qemu_get_buffer(f, dev->msix_table_page + MSIX_PAGE_PENDING, (n + 7) / 8);
+    qemu_get_buffer(f, dev->msix_table, n * PCI_MSIX_ENTRY_SIZE);
+    qemu_get_buffer(f, dev->msix_pba, (n + 7) / 8);
     msix_update_function_masked(dev);
 
     for (vector = 0; vector < n; vector++) {
@@ -397,10 +428,14 @@ void msix_reset(PCIDevice *dev)
     if (!msix_present(dev)) {
         return;
     }
+
     msix_free_irq_entries(dev);
+
     dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] &=
            ~dev->wmask[dev->msix_cap + MSIX_CONTROL_OFFSET];
-    memset(dev->msix_table_page, 0, MSIX_PAGE_SIZE);
+
+    memset(dev->msix_table, 0, dev->msix_entries_nr * PCI_MSIX_ENTRY_SIZE);
+    memset(dev->msix_pba, 0, QEMU_ALIGN_UP(dev->msix_entries_nr, 64) / 8);
     msix_mask_all(dev, dev->msix_entries_nr);
 }
 
diff --git a/hw/msix.h b/hw/msix.h
index e5a488d..54ea2a7 100644
--- a/hw/msix.h
+++ b/hw/msix.h
@@ -5,13 +5,18 @@
 #include "pci.h"
 
 int msix_init(PCIDevice *pdev, unsigned short nentries,
-              MemoryRegion *bar,
-              unsigned bar_nr, unsigned bar_size);
+              MemoryRegion *table_bar, uint8_t table_bar_nr,
+              unsigned table_offset, MemoryRegion *pba_bar,
+              uint8_t pba_bar_nr, unsigned pba_offset, uint8_t cap_pos);
+int msix_init_bar(PCIDevice *pdev, unsigned short nentries,
+                  MemoryRegion *bar, uint8_t bar_nr, const char *name);
 
 void msix_write_config(PCIDevice *pci_dev, uint32_t address,
                        uint32_t val, int len);
 
-int msix_uninit(PCIDevice *d, MemoryRegion *bar);
+void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar,
+                 MemoryRegion *pba_bar);
+void msix_uninit_bar(PCIDevice *dev, MemoryRegion *bar);
 
 unsigned int msix_nr_vectors_allocated(const PCIDevice *dev);
 
diff --git a/hw/pci.h b/hw/pci.h
index 4c96268..75b80f2 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -224,14 +224,20 @@ struct PCIDevice {
     /* MSI-X entries */
     int msix_entries_nr;
 
-    /* Space to store MSIX table */
-    uint8_t *msix_table_page;
+    /* Space to store MSIX table & pending bits array */
+    uint8_t *msix_table;
+    uint8_t *msix_pba;
+
     /* MMIO index used to map MSIX table and pending bit entries. */
-    MemoryRegion msix_mmio;
+    MemoryRegion msix_table_mmio;
+    MemoryRegion msix_pba_mmio;
+
     /* Reference-count for entries actually in use by driver. */
     unsigned *msix_entry_used;
+
     /* MSIX function mask set or MSIX disabled */
     bool msix_function_masked;
+
     /* Version id needed for VMState */
     int32_t version_id;
 
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index 9342eed..da0d022 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -782,13 +782,10 @@ void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice 
*vdev)
     pci_set_word(config + PCI_SUBSYSTEM_ID, vdev->device_id);
     config[PCI_INTERRUPT_PIN] = 1;
 
-    memory_region_init(&proxy->msix_bar, "virtio-msix", 4096);
-    if (vdev->nvectors && !msix_init(&proxy->pci_dev, vdev->nvectors,
-                                     &proxy->msix_bar, 1, 0)) {
-        pci_register_bar(&proxy->pci_dev, 1, PCI_BASE_ADDRESS_SPACE_MEMORY,
-                         &proxy->msix_bar);
-    } else
+    if (vdev->nvectors && msix_init_bar(&proxy->pci_dev, vdev->nvectors,
+                                        &proxy->msix_bar, 1, "virtio-msix")) {
         vdev->nvectors = 0;
+    }
 
     proxy->pci_dev.config_write = virtio_write_config;
 
@@ -834,12 +831,10 @@ static int virtio_blk_init_pci(PCIDevice *pci_dev)
 static int virtio_exit_pci(PCIDevice *pci_dev)
 {
     VirtIOPCIProxy *proxy = DO_UPCAST(VirtIOPCIProxy, pci_dev, pci_dev);
-    int r;
 
     memory_region_destroy(&proxy->bar);
-    r = msix_uninit(pci_dev, &proxy->msix_bar);
-    memory_region_destroy(&proxy->msix_bar);
-    return r;
+    msix_uninit_bar(pci_dev, &proxy->msix_bar);
+    return 0;
 }
 
 static int virtio_blk_exit_pci(PCIDevice *pci_dev)




reply via email to

[Prev in Thread] Current Thread [Next in Thread]