qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH V8 29/39] vfio-pci: cpr part 3 (intx)


From: Alex Williamson
Subject: Re: [PATCH V8 29/39] vfio-pci: cpr part 3 (intx)
Date: Wed, 29 Jun 2022 14:43:16 -0600

On Wed, 15 Jun 2022 07:52:16 -0700
Steve Sistare <steven.sistare@oracle.com> wrote:

> Preserve vfio INTX state across cpr restart.  Preserve VFIOINTx fields as
> follows:
>   pin : Recover this from the vfio config in kernel space
>   interrupt : Preserve its eventfd descriptor across exec.
>   unmask : Ditto
>   route.irq : This could perhaps be recovered in vfio_pci_post_load by
>     calling pci_device_route_intx_to_irq(pin), whose implementation reads
>     config space for a bridge device such as ich9.  However, there is no
>     guarantee that the bridge vmstate is read before vfio vmstate.  Rather
>     than fiddling with MigrationPriority for vmstate handlers, explicitly
>     save route.irq in vfio vmstate.
>   pending : save in vfio vmstate.
>   mmap_timeout, mmap_timer : Re-initialize
>   bool kvm_accel : Re-initialize
> 
> In vfio_realize, defer calling vfio_intx_enable until the vmstate
> is available, in vfio_pci_post_load.  Modify vfio_intx_enable and
> vfio_intx_kvm_enable to skip vfio initialization, but still perform
> kvm initialization.
> 
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> ---
>  hw/vfio/pci.c | 92 
> +++++++++++++++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 83 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 2fd7121..b8aee91 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -173,14 +173,45 @@ static void vfio_intx_eoi(VFIODevice *vbasedev)
>      vfio_unmask_single_irqindex(vbasedev, VFIO_PCI_INTX_IRQ_INDEX);
>  }
>  
> +#ifdef CONFIG_KVM
> +static bool vfio_no_kvm_intx(VFIOPCIDevice *vdev)
> +{
> +    return vdev->no_kvm_intx || !kvm_irqfds_enabled() ||
> +           vdev->intx.route.mode != PCI_INTX_ENABLED ||
> +           !kvm_resamplefds_enabled();
> +}
> +#endif
> +
> +static void vfio_intx_reenable_kvm(VFIOPCIDevice *vdev, Error **errp)
> +{
> +#ifdef CONFIG_KVM
> +    if (vfio_no_kvm_intx(vdev)) {
> +        return;
> +    }
> +
> +    if (vfio_notifier_init(vdev, &vdev->intx.unmask, "intx-unmask", 0)) {
> +        error_setg(errp, "vfio_notifier_init intx-unmask failed");
> +        return;
> +    }
> +
> +    if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
> +                                           &vdev->intx.interrupt,
> +                                           &vdev->intx.unmask,
> +                                           vdev->intx.route.irq)) {
> +        error_setg_errno(errp, errno, "failed to setup resample irqfd");


Does not unwind with vfio_notifier_cleanup().  This also exactly
duplicates code in vfio_intx_enable_kvm(), which suggests it needs
further refactoring to a common helper.



> +        return;
> +    }
> +
> +    vdev->intx.kvm_accel = true;
> +#endif
> +}
> +
>  static void vfio_intx_enable_kvm(VFIOPCIDevice *vdev, Error **errp)
>  {
>  #ifdef CONFIG_KVM
>      int irq_fd = event_notifier_get_fd(&vdev->intx.interrupt);
>  
> -    if (vdev->no_kvm_intx || !kvm_irqfds_enabled() ||
> -        vdev->intx.route.mode != PCI_INTX_ENABLED ||
> -        !kvm_resamplefds_enabled()) {
> +    if (vfio_no_kvm_intx(vdev)) {
>          return;
>      }
>  
> @@ -328,7 +359,13 @@ static int vfio_intx_enable(VFIOPCIDevice *vdev, Error 
> **errp)
>          return 0;
>      }
>  
> -    vfio_disable_interrupts(vdev);
> +    /*
> +     * Do not alter interrupt state during vfio_realize and cpr-load.  The
> +     * reused flag is cleared thereafter.
> +     */
> +    if (!vdev->vbasedev.reused) {
> +        vfio_disable_interrupts(vdev);
> +    }
>  
>      vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */
>      pci_config_set_interrupt_pin(vdev->pdev.config, pin);
> @@ -353,6 +390,11 @@ static int vfio_intx_enable(VFIOPCIDevice *vdev, Error 
> **errp)
>      fd = event_notifier_get_fd(&vdev->intx.interrupt);
>      qemu_set_fd_handler(fd, vfio_intx_interrupt, NULL, vdev);
>  
> +    if (vdev->vbasedev.reused) {
> +        vfio_intx_reenable_kvm(vdev, &err);
> +        goto finish;
> +    }
> +

This only jumps over the vfio_set_irq_signaling() and
vfio_intx_enable_kvm(), largely replacing the latter with chunks of
code taken from it.  Doesn't seem like the right factoring.

>      if (vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_INTX_IRQ_INDEX, 0,
>                                 VFIO_IRQ_SET_ACTION_TRIGGER, fd, errp)) {
>          qemu_set_fd_handler(fd, NULL, NULL, vdev);
> @@ -365,6 +407,7 @@ static int vfio_intx_enable(VFIOPCIDevice *vdev, Error 
> **errp)
>          warn_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
>      }
>  
> +finish:
>      vdev->interrupt = VFIO_INT_INTx;
>  
>      trace_vfio_intx_enable(vdev->vbasedev.name);
> @@ -3195,9 +3238,13 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>                                               vfio_intx_routing_notifier);
>          vdev->irqchip_change_notifier.notify = vfio_irqchip_change;
>          kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier);
> -        ret = vfio_intx_enable(vdev, errp);
> -        if (ret) {
> -            goto out_deregister;
> +
> +        /* Wait until cpr-load reads intx routing data to enable */
> +        if (!vdev->vbasedev.reused) {
> +            ret = vfio_intx_enable(vdev, errp);
> +            if (ret) {
> +                goto out_deregister;
> +            }
>          }
>      }
>  
> @@ -3474,6 +3521,7 @@ static int vfio_pci_post_load(void *opaque, int 
> version_id)
>      VFIOPCIDevice *vdev = opaque;
>      PCIDevice *pdev = &vdev->pdev;
>      int nr_vectors;
> +    int ret = 0;
>  
>      if (msix_enabled(pdev)) {
>          msix_set_vector_notifiers(pdev, vfio_msix_vector_use,
> @@ -3486,10 +3534,35 @@ static int vfio_pci_post_load(void *opaque, int 
> version_id)
>          vfio_claim_vectors(vdev, nr_vectors, false);
>  
>      } else if (vfio_pci_read_config(pdev, PCI_INTERRUPT_PIN, 1)) {
> -        assert(0);      /* completed in a subsequent patch */
> +        Error *err = 0;
> +        ret = vfio_intx_enable(vdev, &err);
> +        if (ret) {
> +            error_report_err(err);
> +        }
>      }
>  
> -    return 0;
> +    return ret;
> +}
> +
> +static const VMStateDescription vfio_intx_vmstate = {
> +    .name = "vfio-intx",
> +    .unmigratable = 1,


unmigratable-vmstates-to-interfere-with-migration++

Thanks,
Alex


> +    .version_id = 0,
> +    .minimum_version_id = 0,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_BOOL(pending, VFIOINTx),
> +        VMSTATE_UINT32(route.mode, VFIOINTx),
> +        VMSTATE_INT32(route.irq, VFIOINTx),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
> +#define VMSTATE_VFIO_INTX(_field, _state) {                         \
> +    .name       = (stringify(_field)),                              \
> +    .size       = sizeof(VFIOINTx),                                 \
> +    .vmsd       = &vfio_intx_vmstate,                               \
> +    .flags      = VMS_STRUCT,                                       \
> +    .offset     = vmstate_offset_value(_state, _field, VFIOINTx),   \
>  }
>  
>  static bool vfio_pci_needed(void *opaque)
> @@ -3509,6 +3582,7 @@ static const VMStateDescription vfio_pci_vmstate = {
>      .fields = (VMStateField[]) {
>          VMSTATE_PCI_DEVICE(pdev, VFIOPCIDevice),
>          VMSTATE_MSIX_TEST(pdev, VFIOPCIDevice, vfio_msix_present),
> +        VMSTATE_VFIO_INTX(intx, VFIOPCIDevice),
>          VMSTATE_END_OF_LIST()
>      }
>  };




reply via email to

[Prev in Thread] Current Thread [Next in Thread]