qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v2 2/5] acpi_piix4: Fix PCI hotplug race


From: Marcelo Tosatti
Subject: Re: [Qemu-devel] [PATCH v2 2/5] acpi_piix4: Fix PCI hotplug race
Date: Sun, 8 Apr 2012 01:57:26 -0300
User-agent: Mutt/1.5.21 (2010-09-15)

On Thu, Apr 05, 2012 at 11:07:15AM -0600, Alex Williamson wrote:
> As Michael Tsirkin demonstrated, current PCI hotplug is vulnerable
> to a few races.  The first is a race with other hotplug operations
> because we clear the up & down registers at each event.  If a new
> event comes before the last is processed, up/down is cleared and
> the event is lost.
> 
> To fix this for the down register, we create a life cycle for
> the event request that starts with the hot unplug request in
> piix4_device_hotplug() and ends when the device is ejected.
> This allows us to mask and clear individual bits, preserving them
> against races.  For the up register, we have no clear end point
> for when the event is finished.  We could modify the BIOS to
> acknowledge the bit and clear it, but this creates BIOS compatibiliy
> issues without offering a complete solution.  Instead we note that
> gratuitous ACPI device checks are not harmful, which allows us to
> issue a device check for every slot.  We know which slots are present
> and we know which slots are hotpluggable, so we can easily reduce
> this to a more manageable set for the guest.
> 
> The other race Michael noted was that an unplug request followed
> by reset may also lose the eject notification, which may also
> result in the eject request being lost which a subsequent add
> or remove.  Once we're in reset, the device is unused and we can
> flush the queue of device removals ourselves.  Previously if a
> device_del was issued to a guest without ACPI PCI hotplug support,
> it was necessary to shutdown the guest to recover the device.
> With this, a guest reboot is sufficient.
> 
> Signed-off-by: Alex Williamson <address@hidden>
> ---
> 
>  hw/acpi_piix4.c |   74 
> +++++++++++++++++++++++++++++++++++++++----------------
>  1 files changed, 53 insertions(+), 21 deletions(-)
> 
> diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c
> index 5e8b261..0e7af51 100644
> --- a/hw/acpi_piix4.c
> +++ b/hw/acpi_piix4.c
> @@ -48,7 +48,7 @@
>  #define PIIX4_PCI_HOTPLUG_STATUS 2
>  
>  struct pci_status {
> -    uint32_t up;
> +    uint32_t up; /* deprecated, maintained for migration compatibility */
>      uint32_t down;
>  };
>  
> @@ -70,6 +70,7 @@ typedef struct PIIX4PMState {
>      /* for pci hotplug */
>      struct pci_status pci0_status;
>      uint32_t pci0_hotplug_enable;
> +    uint32_t pci0_slot_device_present;

Why can't you use the "up" field for this? Fail to see the point
of the new variable.


>  } PIIX4PMState;
>  
>  static void piix4_acpi_system_hot_add_init(PCIBus *bus, PIIX4PMState *s);
> @@ -205,6 +206,17 @@ static void pm_write_config(PCIDevice *d,
>          pm_io_space_update((PIIX4PMState *)d);
>  }
>  
> +static void vmstate_pci_status_pre_save(void *opaque)
> +{
> +    struct pci_status *pci0_status = opaque;
> +    PIIX4PMState *s = container_of(pci0_status, PIIX4PMState, pci0_status);
> +
> +    /* We no longer track up, so build a safe value for migrating
> +     * to a version that still does... of course these might get lost
> +     * by an old buggy implementation, but we try. */
> +    pci0_status->up = s->pci0_slot_device_present & s->pci0_hotplug_enable;
> +}
> +
>  static int vmstate_acpi_post_load(void *opaque, int version_id)
>  {
>      PIIX4PMState *s = opaque;
> @@ -241,6 +253,7 @@ static const VMStateDescription vmstate_pci_status = {
>      .version_id = 1,
>      .minimum_version_id = 1,
>      .minimum_version_id_old = 1,
> +    .pre_save = vmstate_pci_status_pre_save,
>      .fields      = (VMStateField []) {
>          VMSTATE_UINT32(up, struct pci_status),
>          VMSTATE_UINT32(down, struct pci_status),
> @@ -269,13 +282,38 @@ static const VMStateDescription vmstate_acpi = {
>      }
>  };
>  
> +static void acpi_piix_eject_slot(PIIX4PMState *s, unsigned slots)
> +{
> +    DeviceState *qdev, *next;
> +    BusState *bus = qdev_get_parent_bus(&s->dev.qdev);
> +    int slot = ffs(slots) - 1;
> +
> +    /* Mark request as complete */
> +    s->pci0_status.down &= ~(1U << slot);
> +
> +    QTAILQ_FOREACH_SAFE(qdev, &bus->children, sibling, next) {
> +        PCIDevice *dev = PCI_DEVICE(qdev);
> +        PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
> +        if (PCI_SLOT(dev->devfn) == slot && !pc->no_hotplug) {
> +            s->pci0_slot_device_present &= ~(1U << slot);
> +            qdev_free(qdev);
> +        }
> +    }
> +}
> +
>  static void piix4_update_hotplug(PIIX4PMState *s)
>  {
>      PCIDevice *dev = &s->dev;
>      BusState *bus = qdev_get_parent_bus(&dev->qdev);
>      DeviceState *qdev, *next;
>  
> +    /* Execute any pending removes during reset */
> +    while (s->pci0_status.down) {
> +        acpi_piix_eject_slot(s, s->pci0_status.down);
> +    }
> +
>      s->pci0_hotplug_enable = ~0;
> +    s->pci0_slot_device_present = 0;
>  
>      QTAILQ_FOREACH_SAFE(qdev, &bus->children, sibling, next) {
>          PCIDevice *pdev = PCI_DEVICE(qdev);
> @@ -283,8 +321,10 @@ static void piix4_update_hotplug(PIIX4PMState *s)
>          int slot = PCI_SLOT(pdev->devfn);
>  
>          if (pc->no_hotplug) {
> -            s->pci0_hotplug_enable &= ~(1 << slot);
> +            s->pci0_hotplug_enable &= ~(1U << slot);
>          }
> +
> +        s->pci0_slot_device_present |= (1U << slot);
>      }
>  }
>  
> @@ -452,7 +492,11 @@ static void gpe_writeb(void *opaque, uint32_t addr, 
> uint32_t val)
>  static uint32_t pci_up_read(void *opaque, uint32_t addr)
>  {
>      PIIX4PMState *s = opaque;
> -    uint32_t val = s->pci0_status.up;
> +    uint32_t val;
> +
> +    /* Manufacture an "up" value to cause a device check on any hotplug
> +     * slot with a device.  Extra device checks are harmless. */
> +    val = s->pci0_slot_device_present & s->pci0_hotplug_enable;
>  
>      PIIX4_DPRINTF("pci_up_read %x\n", val);
>      return val;
> @@ -475,18 +519,7 @@ static uint32_t pciej_read(void *opaque, uint32_t addr)
>  
>  static void pciej_write(void *opaque, uint32_t addr, uint32_t val)
>  {
> -    BusState *bus = opaque;
> -    DeviceState *qdev, *next;
> -    int slot = ffs(val) - 1;
> -
> -    QTAILQ_FOREACH_SAFE(qdev, &bus->children, sibling, next) {
> -        PCIDevice *dev = PCI_DEVICE(qdev);
> -        PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
> -        if (PCI_SLOT(dev->devfn) == slot && !pc->no_hotplug) {
> -            qdev_free(qdev);
> -        }
> -    }
> -
> +    acpi_piix_eject_slot(opaque, val);
>  
>      PIIX4_DPRINTF("pciej write %x <== %d\n", addr, val);
>  }
> @@ -516,8 +549,8 @@ static void piix4_acpi_system_hot_add_init(PCIBus *bus, 
> PIIX4PMState *s)
>      register_ioport_read(PCI_UP_BASE, 4, 4, pci_up_read, s);
>      register_ioport_read(PCI_DOWN_BASE, 4, 4, pci_down_read, s);
>  
> -    register_ioport_write(PCI_EJ_BASE, 4, 4, pciej_write, bus);
> -    register_ioport_read(PCI_EJ_BASE, 4, 4,  pciej_read, bus);
> +    register_ioport_write(PCI_EJ_BASE, 4, 4, pciej_write, s);
> +    register_ioport_read(PCI_EJ_BASE, 4, 4,  pciej_read, s);
>  
>      register_ioport_write(PCI_RMV_BASE, 4, 4, pcirmv_write, s);
>      register_ioport_read(PCI_RMV_BASE, 4, 4,  pcirmv_read, s);
> @@ -528,13 +561,13 @@ static void piix4_acpi_system_hot_add_init(PCIBus *bus, 
> PIIX4PMState *s)
>  static void enable_device(PIIX4PMState *s, int slot)
>  {
>      s->ar.gpe.sts[0] |= PIIX4_PCI_HOTPLUG_STATUS;
> -    s->pci0_status.up |= (1 << slot);
> +    s->pci0_slot_device_present |= (1U << slot);
>  }
>  
>  static void disable_device(PIIX4PMState *s, int slot)
>  {
>      s->ar.gpe.sts[0] |= PIIX4_PCI_HOTPLUG_STATUS;
> -    s->pci0_status.down |= (1 << slot);
> +    s->pci0_status.down |= (1U << slot);
>  }
>  
>  static int piix4_device_hotplug(DeviceState *qdev, PCIDevice *dev,
> @@ -548,11 +581,10 @@ static int piix4_device_hotplug(DeviceState *qdev, 
> PCIDevice *dev,
>       * it is present on boot, no hotplug event is necessary. We do send an
>       * event when the device is disabled later. */
>      if (state == PCI_COLDPLUG_ENABLED) {
> +        s->pci0_slot_device_present |= (1U << slot);

Why is this necessary?





reply via email to

[Prev in Thread] Current Thread [Next in Thread]