qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [patch v6 11/12] vfio: register aer resume notification han


From: Cao jin
Subject: [Qemu-devel] [patch v6 11/12] vfio: register aer resume notification handler for aer resume
Date: Tue, 5 Apr 2016 19:42:02 +0800

From: Chen Fan <address@hidden>

for supporting aer recovery, host and guest would run the same aer
recovery code, that would do the secondary bus reset if the error
is fatal, the aer recovery process:
  1. error_detected
  2. reset_link (if fatal)
  3. slot_reset/mmio_enabled
  4. resume

it indicates that host will do secondary bus reset to reset
the physical devices under bus in step 2, that would cause
devices in D3 status in a short time. but in qemu, we register
an error detected handler, that would be invoked as host broadcasts
the error-detected event in step 1, in order to avoid guest do
reset_link when host do reset_link simultaneously. it may cause
fatal error. we introduce a resmue notifier to assure host reset
completely. then do guest aer injection.

Signed-off-by: Chen Fan <address@hidden>
---
 hw/vfio/pci.c              | 157 +++++++++++++++++++++++++++++++++++----------
 hw/vfio/pci.h              |   2 +
 linux-headers/linux/vfio.h |   1 +
 3 files changed, 126 insertions(+), 34 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 691ff5e..d79fb3d 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2610,12 +2610,7 @@ static void vfio_put_device(VFIOPCIDevice *vdev)
 static void vfio_err_notifier_handler(void *opaque)
 {
     VFIOPCIDevice *vdev = opaque;
-    PCIDevice *dev = &vdev->pdev;
     Error *local_err = NULL;
-    PCIEAERMsg msg = {
-        .severity = 0,
-        .source_id = (pci_bus_num(dev->bus) << 8) | dev->devfn,
-    };
 
     if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
         return;
@@ -2636,35 +2631,7 @@ static void vfio_err_notifier_handler(void *opaque)
         goto stop;
     }
 
-    /*
-     * we should read the error details from the real hardware
-     * configuration spaces, here we only need to do is signaling
-     * to guest an uncorrectable error has occurred.
-     */
-    if (dev->exp.aer_cap) {
-        uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
-        uint32_t uncor_status;
-        bool isfatal;
-
-        uncor_status = vfio_pci_read_config(dev,
-                           dev->exp.aer_cap + PCI_ERR_UNCOR_STATUS, 4);
-
-        /*
-         * if the error is not emitted by this device, we can
-         * just ignore it.
-         */
-        if (!(uncor_status & ~0UL)) {
-            return;
-        }
-
-        isfatal = uncor_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
-
-        msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
-                                 PCI_ERR_ROOT_CMD_NONFATAL_EN;
-
-        pcie_aer_msg(dev, &msg);
-        return;
-    }
+    return;
 
 stop:
     /*
@@ -2757,6 +2724,126 @@ static void vfio_unregister_err_notifier(VFIOPCIDevice 
*vdev)
     event_notifier_cleanup(&vdev->err_notifier);
 }
 
+static void vfio_resume_notifier_handler(void *opaque)
+{
+    VFIOPCIDevice *vdev = opaque;
+    PCIDevice *dev = &vdev->pdev;
+    PCIEAERMsg msg = {
+        .severity = 0,
+        .source_id = (pci_bus_num(dev->bus) << 8) | dev->devfn,
+    };
+
+    if (!event_notifier_test_and_clear(&vdev->resume_notifier)) {
+        return;
+    }
+
+    /*
+     * we should read the error details from the real hardware
+     * configuration spaces, here we only need to do is signaling
+     * to guest an uncorrectable error has occurred.
+     */
+    if (dev->exp.aer_cap) {
+        uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
+        uint32_t uncor_status;
+        bool isfatal;
+
+        uncor_status = vfio_pci_read_config(dev,
+                           dev->exp.aer_cap + PCI_ERR_UNCOR_STATUS, 4);
+
+        /*
+         * if the error is not emitted by this device, we can
+         * just ignore it.
+         */
+        if (!(uncor_status & ~0UL)) {
+            return;
+        }
+
+        isfatal = uncor_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
+
+        msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
+                                 PCI_ERR_ROOT_CMD_NONFATAL_EN;
+
+        pcie_aer_msg(dev, &msg);
+    }
+}
+
+static void vfio_register_aer_resume_notifier(VFIOPCIDevice *vdev)
+{
+    int ret;
+    int argsz;
+    struct vfio_irq_set *irq_set;
+    int32_t *pfd;
+
+    if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
+        return;
+    }
+
+    if (event_notifier_init(&vdev->resume_notifier, 0)) {
+        error_report("vfio: Unable to init event notifier for"
+                     " resume notification");
+        return;
+    }
+
+    argsz = sizeof(*irq_set) + sizeof(*pfd);
+
+    irq_set = g_malloc0(argsz);
+    irq_set->argsz = argsz;
+    irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+                     VFIO_IRQ_SET_ACTION_TRIGGER;
+    irq_set->index = VFIO_PCI_RESUME_IRQ_INDEX;
+    irq_set->start = 0;
+    irq_set->count = 1;
+    pfd = (int32_t *)&irq_set->data;
+
+    *pfd = event_notifier_get_fd(&vdev->resume_notifier);
+    qemu_set_fd_handler(*pfd, vfio_resume_notifier_handler, NULL, vdev);
+
+    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
+    if (ret) {
+        error_report("vfio: Failed to set up resume notification");
+        qemu_set_fd_handler(*pfd, NULL, NULL, vdev);
+        event_notifier_cleanup(&vdev->resume_notifier);
+    } else {
+        vdev->resume_enabled = true;
+    }
+    g_free(irq_set);
+}
+
+static void vfio_unregister_aer_resume_notifier(VFIOPCIDevice *vdev)
+{
+    int argsz;
+    struct vfio_irq_set *irq_set;
+    int32_t *pfd;
+    int ret;
+
+    if (!vdev->resume_enabled) {
+        return;
+    }
+
+    argsz = sizeof(*irq_set) + sizeof(*pfd);
+
+    irq_set = g_malloc0(argsz);
+    irq_set->argsz = argsz;
+    irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+                     VFIO_IRQ_SET_ACTION_TRIGGER;
+    irq_set->index = VFIO_PCI_RESUME_IRQ_INDEX;
+    irq_set->start = 0;
+    irq_set->count = 1;
+    pfd = (int32_t *)&irq_set->data;
+    *pfd = -1;
+
+    ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_SET_IRQS, irq_set);
+    if (ret) {
+        error_report("vfio: Failed to de-assign error fd: %m");
+    }
+    g_free(irq_set);
+    qemu_set_fd_handler(event_notifier_get_fd(&vdev->resume_notifier),
+                        NULL, NULL, vdev);
+    event_notifier_cleanup(&vdev->resume_notifier);
+
+    vdev->resume_enabled = false;
+}
+
 static void vfio_req_notifier_handler(void *opaque)
 {
     VFIOPCIDevice *vdev = opaque;
@@ -3062,6 +3149,7 @@ static int vfio_initfn(PCIDevice *pdev)
     }
 
     vfio_register_err_notifier(vdev);
+    vfio_register_aer_resume_notifier(vdev);
     vfio_register_req_notifier(vdev);
     vfio_setup_resetfn_quirk(vdev);
 
@@ -3092,6 +3180,7 @@ static void vfio_exitfn(PCIDevice *pdev)
     VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
 
     vfio_unregister_req_notifier(vdev);
+    vfio_unregister_aer_resume_notifier(vdev);
     vfio_unregister_err_notifier(vdev);
     pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
     vfio_disable_interrupts(vdev);
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 9fb0206..3ebc58f 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -119,6 +119,7 @@ typedef struct VFIOPCIDevice {
     VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */
     PCIHostDeviceAddress host;
     EventNotifier err_notifier;
+    EventNotifier resume_notifier;
     EventNotifier req_notifier;
     int (*resetfn)(struct VFIOPCIDevice *);
     uint32_t vendor_id;
@@ -144,6 +145,7 @@ typedef struct VFIOPCIDevice {
     bool no_kvm_msi;
     bool no_kvm_msix;
     bool single_depend_dev;
+    bool resume_enabled;
 } VFIOPCIDevice;
 
 uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 15e096c..6d1826d 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -345,6 +345,7 @@ enum {
        VFIO_PCI_MSIX_IRQ_INDEX,
        VFIO_PCI_ERR_IRQ_INDEX,
        VFIO_PCI_REQ_IRQ_INDEX,
+       VFIO_PCI_RESUME_IRQ_INDEX,
        VFIO_PCI_NUM_IRQS
 };
 
-- 
1.9.3






reply via email to

[Prev in Thread] Current Thread [Next in Thread]