qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] Re: [PATCH 2/3] virtio-pci: Use ioeventfd for virtqueue not


From: Michael S. Tsirkin
Subject: [Qemu-devel] Re: [PATCH 2/3] virtio-pci: Use ioeventfd for virtqueue notify
Date: Thu, 11 Nov 2010 17:53:17 +0200
User-agent: Mutt/1.5.21 (2010-09-15)

On Thu, Nov 11, 2010 at 01:47:21PM +0000, Stefan Hajnoczi wrote:
> Virtqueue notify is currently handled synchronously in userspace virtio.  This
> prevents the vcpu from executing guest code while hardware emulation code
> handles the notify.
> 
> On systems that support KVM, the ioeventfd mechanism can be used to make
> virtqueue notify a lightweight exit by deferring hardware emulation to the
> iothread and allowing the VM to continue execution.  This model is similar to
> how vhost receives virtqueue notifies.
> 
> The result of this change is improved performance for userspace virtio 
> devices.
> Virtio-blk throughput increases especially for multithreaded scenarios and
> virtio-net transmit throughput increases substantially.
> 
> Some virtio devices are known to have guest drivers which expect a notify to 
> be
> processed synchronously and spin waiting for completion.  Only enable 
> ioeventfd
> for virtio-blk and virtio-net for now.
> 
> Care must be taken not to interfere with vhost-net, which already uses
> ioeventfd host notifiers.  The following list shows the behavior implemented 
> in
> this patch and is designed to take vhost-net into account:
> 
>  * VIRTIO_CONFIG_S_DRIVER_OK -> assign host notifiers, 
> qemu_set_fd_handler(virtio_pci_host_notifier_read)

we should also deassign when VIRTIO_CONFIG_S_DRIVER_OK is cleared
by io write or bus master bit?

>  * reset -> qemu_set_fd_handler(NULL), deassign host notifiers
>  * virtio_pci_set_host_notifier(true) -> qemu_set_fd_handler(NULL)
>  * virtio_pci_set_host_notifier(false) -> 
> qemu_set_fd_handler(virtio_pci_host_notifier_read)
> 
> Signed-off-by: Stefan Hajnoczi <address@hidden>
> ---
>  hw/virtio-pci.c |  155 +++++++++++++++++++++++++++++++++++++++++++-----------
>  hw/virtio.c     |    5 ++
>  hw/virtio.h     |    1 +
>  3 files changed, 129 insertions(+), 32 deletions(-)
> 
> diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
> index 549118d..436fc59 100644
> --- a/hw/virtio-pci.c
> +++ b/hw/virtio-pci.c
> @@ -83,6 +83,10 @@
>  /* Flags track per-device state like workarounds for quirks in older guests. 
> */
>  #define VIRTIO_PCI_FLAG_BUS_MASTER_BUG  (1 << 0)
>  
> +/* Performance improves when virtqueue kick processing is decoupled from the
> + * vcpu thread using ioeventfd for some devices. */
> +#define VIRTIO_PCI_FLAG_USE_IOEVENTFD   (1 << 1)
> +
>  /* QEMU doesn't strictly need write barriers since everything runs in
>   * lock-step.  We'll leave the calls to wmb() in though to make it obvious 
> for
>   * KVM or if kqemu gets SMP support.
> @@ -179,12 +183,108 @@ static int virtio_pci_load_queue(void * opaque, int n, 
> QEMUFile *f)
>      return 0;
>  }
>  
> +static int virtio_pci_set_host_notifier_ioeventfd(VirtIOPCIProxy *proxy, int 
> n, bool assign)
> +{
> +    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
> +    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
> +    int r;
> +    if (assign) {
> +        r = event_notifier_init(notifier, 1);
> +        if (r < 0) {
> +            return r;
> +        }
> +        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
> +                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
> +                                       n, assign);
> +        if (r < 0) {
> +            event_notifier_cleanup(notifier);
> +        }
> +    } else {
> +        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
> +                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
> +                                       n, assign);
> +        if (r < 0) {
> +            return r;
> +        }
> +        event_notifier_cleanup(notifier);
> +    }
> +    return r;
> +}
> +
> +static void virtio_pci_host_notifier_read(void *opaque)
> +{
> +    VirtQueue *vq = opaque;
> +    EventNotifier *n = virtio_queue_get_host_notifier(vq);
> +    if (event_notifier_test_and_clear(n)) {
> +        virtio_queue_notify_vq(vq);
> +    }
> +}
> +
> +static void virtio_pci_set_host_notifier_fd_handler(VirtIOPCIProxy *proxy, 
> int n, bool assign)
> +{
> +    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
> +    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
> +    if (assign) {
> +        qemu_set_fd_handler(event_notifier_get_fd(notifier),
> +                            virtio_pci_host_notifier_read, NULL, vq);
> +    } else {
> +        qemu_set_fd_handler(event_notifier_get_fd(notifier),
> +                            NULL, NULL, NULL);
> +    }
> +}
> +
> +static int virtio_pci_set_host_notifiers(VirtIOPCIProxy *proxy, bool assign)
> +{
> +    int n, r;
> +
> +    for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
> +        if (!virtio_queue_get_num(proxy->vdev, n)) {
> +            continue;
> +        }
> +
> +        if (assign) {
> +            r = virtio_pci_set_host_notifier_ioeventfd(proxy, n, true);
> +            if (r < 0) {
> +                goto assign_error;
> +            }
> +
> +            virtio_pci_set_host_notifier_fd_handler(proxy, n, true);
> +        } else {
> +            virtio_pci_set_host_notifier_fd_handler(proxy, n, false);
> +            virtio_pci_set_host_notifier_ioeventfd(proxy, n, false);
> +        }
> +    }
> +    return 0;
> +
> +assign_error:
> +    proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
> +    while (--n >= 0) {
> +        virtio_pci_set_host_notifier_fd_handler(proxy, n, false);
> +        virtio_pci_set_host_notifier_ioeventfd(proxy, n, false);
> +    }
> +    return r;
> +}
> +
> +static void virtio_pci_reset_vdev(VirtIOPCIProxy *proxy)
> +{
> +    /* Poke virtio device so it deassigns its host notifiers (if any) */
> +    virtio_set_status(proxy->vdev, 0);

Hmm. virtio_reset already sets status to 0.
I guess it should just be fixed to call virtio_set_status?

> +
> +    /* Now safely deassign our own host notifiers */
> +    if (proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) {
> +        virtio_pci_set_host_notifiers(proxy, false);
> +    }
> +
> +    virtio_reset(proxy->vdev);
> +    msix_unuse_all_vectors(&proxy->pci_dev);
> +}
> +
>  static void virtio_pci_reset(DeviceState *d)
>  {
>      VirtIOPCIProxy *proxy = container_of(d, VirtIOPCIProxy, pci_dev.qdev);
> -    virtio_reset(proxy->vdev);
> +    virtio_pci_reset_vdev(proxy);
>      msix_reset(&proxy->pci_dev);
> -    proxy->flags = 0;
> +    proxy->flags &= ~VIRTIO_PCI_FLAG_BUS_MASTER_BUG;
>  }
>  
>  static void virtio_ioport_write(void *opaque, uint32_t addr, uint32_t val)
> @@ -209,11 +309,10 @@ static void virtio_ioport_write(void *opaque, uint32_t 
> addr, uint32_t val)
>      case VIRTIO_PCI_QUEUE_PFN:
>          pa = (target_phys_addr_t)val << VIRTIO_PCI_QUEUE_ADDR_SHIFT;
>          if (pa == 0) {
> -            virtio_reset(proxy->vdev);
> -            msix_unuse_all_vectors(&proxy->pci_dev);
> -        }
> -        else
> +            virtio_pci_reset_vdev(proxy);
> +        } else {
>              virtio_queue_set_addr(vdev, vdev->queue_sel, pa);
> +        }
>          break;
>      case VIRTIO_PCI_QUEUE_SEL:
>          if (val < VIRTIO_PCI_QUEUE_MAX)
> @@ -223,10 +322,16 @@ static void virtio_ioport_write(void *opaque, uint32_t 
> addr, uint32_t val)
>          virtio_queue_notify(vdev, val);
>          break;
>      case VIRTIO_PCI_STATUS:
> -        virtio_set_status(vdev, val & 0xFF);
> -        if (vdev->status == 0) {
> -            virtio_reset(proxy->vdev);
> -            msix_unuse_all_vectors(&proxy->pci_dev);
> +        if ((val & VIRTIO_CONFIG_S_DRIVER_OK) &&
> +            !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK) &&
> +            (proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD)) {
> +            virtio_pci_set_host_notifiers(proxy, true);
> +        }

So we set host notifiers to true from here, but to false
only on reset? This seems strange. Should not we disable
notifiers when driver clears OK status?
How about on bus master disable?

> +
> +        if (val & 0xFF) {
> +            virtio_set_status(vdev, val & 0xFF);
> +        } else {
> +            virtio_pci_reset_vdev(proxy);
>          }
>  
>          /* Linux before 2.6.34 sets the device as OK without enabling
> @@ -480,30 +585,12 @@ assign_error:
>  static int virtio_pci_set_host_notifier(void *opaque, int n, bool assign)
>  {
>      VirtIOPCIProxy *proxy = opaque;
> -    VirtQueue *vq = virtio_get_queue(proxy->vdev, n);
> -    EventNotifier *notifier = virtio_queue_get_host_notifier(vq);
> -    int r;
> -    if (assign) {
> -        r = event_notifier_init(notifier, 1);
> -        if (r < 0) {
> -            return r;
> -        }
> -        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
> -                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
> -                                       n, assign);
> -        if (r < 0) {
> -            event_notifier_cleanup(notifier);
> -        }
> +    if (proxy->flags & VIRTIO_PCI_FLAG_USE_IOEVENTFD) {
> +        virtio_pci_set_host_notifier_fd_handler(proxy, n, !assign);
> +        return 0;
>      } else {
> -        r = kvm_set_ioeventfd_pio_word(event_notifier_get_fd(notifier),
> -                                       proxy->addr + VIRTIO_PCI_QUEUE_NOTIFY,
> -                                       n, assign);
> -        if (r < 0) {
> -            return r;
> -        }
> -        event_notifier_cleanup(notifier);
> +        return virtio_pci_set_host_notifier_ioeventfd(proxy, n, assign);
>      }
> -    return r;
>  }
>  
>  static const VirtIOBindings virtio_pci_bindings = {
> @@ -702,6 +789,8 @@ static PCIDeviceInfo virtio_info[] = {
>          .qdev.props = (Property[]) {
>              DEFINE_PROP_HEX32("class", VirtIOPCIProxy, class_code, 0),
>              DEFINE_BLOCK_PROPERTIES(VirtIOPCIProxy, block),
> +            DEFINE_PROP_UINT32("flags", VirtIOPCIProxy, flags,
> +                               VIRTIO_PCI_FLAG_USE_IOEVENTFD),
>              DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2),
>              DEFINE_VIRTIO_BLK_FEATURES(VirtIOPCIProxy, host_features),
>              DEFINE_PROP_END_OF_LIST(),
> @@ -714,6 +803,8 @@ static PCIDeviceInfo virtio_info[] = {
>          .exit       = virtio_net_exit_pci,
>          .romfile    = "pxe-virtio.bin",
>          .qdev.props = (Property[]) {
> +            DEFINE_PROP_UINT32("flags", VirtIOPCIProxy, flags,
> +                               VIRTIO_PCI_FLAG_USE_IOEVENTFD),
>              DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 3),
>              DEFINE_VIRTIO_NET_FEATURES(VirtIOPCIProxy, host_features),
>              DEFINE_NIC_PROPERTIES(VirtIOPCIProxy, nic),

This ties interface to an internal macro value.  Further, user gets to
tweak other fields in this integer which we don't want.  Finally, the
interface is extremely unfriendly.
Please use a bit property instead: DEFINE_PROP_BIT.

> diff --git a/hw/virtio.c b/hw/virtio.c
> index a2a657e..f588e29 100644
> --- a/hw/virtio.c
> +++ b/hw/virtio.c
> @@ -582,6 +582,11 @@ void virtio_queue_notify(VirtIODevice *vdev, int n)
>      }
>  }
>  
> +void virtio_queue_notify_vq(VirtQueue *vq)
> +{
> +    virtio_queue_notify(vq->vdev, vq - vq->vdev->vq);

Let's implement virtio_queue_notify in terms of virtio_queue_notify_vq.
Not the other way around.

> +}
> +
>  uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
>  {
>      return n < VIRTIO_PCI_QUEUE_MAX ? vdev->vq[n].vector :
> diff --git a/hw/virtio.h b/hw/virtio.h
> index 02fa312..5ae521c 100644
> --- a/hw/virtio.h
> +++ b/hw/virtio.h
> @@ -219,5 +219,6 @@ void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, 
> int n, uint16_t idx);
>  VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n);
>  EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq);
>  EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq);
> +void virtio_queue_notify_vq(VirtQueue *vq);
>  void virtio_irq(VirtQueue *vq);
>  #endif
> -- 
> 1.7.2.3



reply via email to

[Prev in Thread] Current Thread [Next in Thread]