[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [RFC V2 PATCH 4/4] virtio-net: add multiqueue support
From: |
Michael S. Tsirkin |
Subject: |
Re: [Qemu-devel] [RFC V2 PATCH 4/4] virtio-net: add multiqueue support |
Date: |
Sun, 1 Jul 2012 12:43:21 +0300 |
On Mon, Jun 25, 2012 at 06:04:49PM +0800, Jason Wang wrote:
> This patch let the virtio-net can transmit and recevie packets through
> multiuple
> VLANClientStates and abstract them as multiple virtqueues to guest. A new
> parameter 'queues' were introduced to specify the number of queue pairs.
>
> The main goal for vhost support is to let the multiqueue could be used without
> changes in vhost code. So each vhost_net structure were used to track a single
> VLANClientState and two virtqueues in the past. As multiple VLANClientState
> were
> stored in the NICState, we can infer the correspond VLANClientState from this
> and queue_index easily.
>
> Signed-off-by: Jason Wang <address@hidden>
Can this patch be split up?
1. extend vhost API to allow multiqueue and minimally tweak virtio
2. add real multiqueue for virtio
Hmm?
> ---
> hw/vhost.c | 58 ++++---
> hw/vhost.h | 1
> hw/vhost_net.c | 7 +
> hw/vhost_net.h | 2
> hw/virtio-net.c | 461
> +++++++++++++++++++++++++++++++++++++------------------
> hw/virtio-net.h | 3
> 6 files changed, 355 insertions(+), 177 deletions(-)
>
> diff --git a/hw/vhost.c b/hw/vhost.c
> index 43664e7..6318bb2 100644
> --- a/hw/vhost.c
> +++ b/hw/vhost.c
> @@ -620,11 +620,12 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
> {
> target_phys_addr_t s, l, a;
> int r;
> + int vhost_vq_index = (idx > 2 ? idx - 1 : idx) % dev->nvqs;
> struct vhost_vring_file file = {
> - .index = idx,
> + .index = vhost_vq_index
> };
> struct vhost_vring_state state = {
> - .index = idx,
> + .index = vhost_vq_index
> };
> struct VirtQueue *vvq = virtio_get_queue(vdev, idx);
>
> @@ -670,11 +671,12 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
> goto fail_alloc_ring;
> }
>
> - r = vhost_virtqueue_set_addr(dev, vq, idx, dev->log_enabled);
> + r = vhost_virtqueue_set_addr(dev, vq, vhost_vq_index, dev->log_enabled);
> if (r < 0) {
> r = -errno;
> goto fail_alloc;
> }
> +
> file.fd = event_notifier_get_fd(virtio_queue_get_host_notifier(vvq));
> r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
> if (r) {
> @@ -715,7 +717,7 @@ static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
> unsigned idx)
> {
> struct vhost_vring_state state = {
> - .index = idx,
> + .index = (idx > 2 ? idx - 1 : idx) % dev->nvqs,
> };
> int r;
> r = ioctl(dev->control, VHOST_GET_VRING_BASE, &state);
> @@ -829,7 +831,9 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev,
> VirtIODevice *vdev)
> }
>
> for (i = 0; i < hdev->nvqs; ++i) {
> - r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, true);
> + r = vdev->binding->set_host_notifier(vdev->binding_opaque,
> + hdev->start_idx + i,
> + true);
> if (r < 0) {
> fprintf(stderr, "vhost VQ %d notifier binding failed: %d\n", i,
> -r);
> goto fail_vq;
> @@ -839,7 +843,9 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev,
> VirtIODevice *vdev)
> return 0;
> fail_vq:
> while (--i >= 0) {
> - r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
> + r = vdev->binding->set_host_notifier(vdev->binding_opaque,
> + hdev->start_idx + i,
> + false);
> if (r < 0) {
> fprintf(stderr, "vhost VQ %d notifier cleanup error: %d\n", i,
> -r);
> fflush(stderr);
> @@ -860,7 +866,9 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev,
> VirtIODevice *vdev)
> int i, r;
>
> for (i = 0; i < hdev->nvqs; ++i) {
> - r = vdev->binding->set_host_notifier(vdev->binding_opaque, i, false);
> + r = vdev->binding->set_host_notifier(vdev->binding_opaque,
> + hdev->start_idx + i,
> + false);
> if (r < 0) {
> fprintf(stderr, "vhost VQ %d notifier cleanup failed: %d\n", i,
> -r);
> fflush(stderr);
> @@ -874,15 +882,17 @@ int vhost_dev_start(struct vhost_dev *hdev,
> VirtIODevice *vdev)
> {
> int i, r;
> if (!vdev->binding->set_guest_notifiers) {
> - fprintf(stderr, "binding does not support guest notifiers\n");
> + fprintf(stderr, "binding does not support guest notifier\n");
> r = -ENOSYS;
> goto fail;
> }
>
> - r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, true);
> - if (r < 0) {
> - fprintf(stderr, "Error binding guest notifier: %d\n", -r);
> - goto fail_notifiers;
> + if (hdev->start_idx == 0) {
> + r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, true);
> + if (r < 0) {
> + fprintf(stderr, "Error binding guest notifier: %d\n", -r);
> + goto fail_notifiers;
> + }
> }
>
> r = vhost_dev_set_features(hdev, hdev->log_enabled);
> @@ -898,7 +908,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice
> *vdev)
> r = vhost_virtqueue_init(hdev,
> vdev,
> hdev->vqs + i,
> - i);
> + hdev->start_idx + i);
> if (r < 0) {
> goto fail_vq;
> }
> @@ -925,11 +935,13 @@ fail_vq:
> vhost_virtqueue_cleanup(hdev,
> vdev,
> hdev->vqs + i,
> - i);
> + hdev->start_idx + i);
> }
> + i = hdev->nvqs;
> fail_mem:
> fail_features:
> - vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
> + if (hdev->start_idx == 0)
> + vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
> fail_notifiers:
> fail:
> return r;
> @@ -944,18 +956,22 @@ void vhost_dev_stop(struct vhost_dev *hdev,
> VirtIODevice *vdev)
> vhost_virtqueue_cleanup(hdev,
> vdev,
> hdev->vqs + i,
> - i);
> + hdev->start_idx + i);
> }
> +
> for (i = 0; i < hdev->n_mem_sections; ++i) {
> vhost_sync_dirty_bitmap(hdev, &hdev->mem_sections[i],
> 0, (target_phys_addr_t)~0x0ull);
> }
> - r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
> - if (r < 0) {
> - fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
> - fflush(stderr);
> +
> + if (hdev->start_idx == 0) {
> + r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
> + if (r < 0) {
> + fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
> + fflush(stderr);
> + }
> + assert (r >= 0);
> }
> - assert (r >= 0);
>
> hdev->started = false;
> g_free(hdev->log);
> diff --git a/hw/vhost.h b/hw/vhost.h
> index 80e64df..fa5357a 100644
> --- a/hw/vhost.h
> +++ b/hw/vhost.h
> @@ -34,6 +34,7 @@ struct vhost_dev {
> MemoryRegionSection *mem_sections;
> struct vhost_virtqueue *vqs;
> int nvqs;
> + int start_idx;
> unsigned long long features;
> unsigned long long acked_features;
> unsigned long long backend_features;
> diff --git a/hw/vhost_net.c b/hw/vhost_net.c
> index f672e9d..73a72bb 100644
> --- a/hw/vhost_net.c
> +++ b/hw/vhost_net.c
> @@ -138,13 +138,15 @@ bool vhost_net_query(VHostNetState *net, VirtIODevice
> *dev)
> }
>
> int vhost_net_start(struct vhost_net *net,
> - VirtIODevice *dev)
> + VirtIODevice *dev,
> + int start_idx)
> {
> struct vhost_vring_file file = { };
> int r;
>
> net->dev.nvqs = 2;
> net->dev.vqs = net->vqs;
> + net->dev.start_idx = start_idx;
>
> r = vhost_dev_enable_notifiers(&net->dev, dev);
> if (r < 0) {
> @@ -227,7 +229,8 @@ bool vhost_net_query(VHostNetState *net, VirtIODevice
> *dev)
> }
>
> int vhost_net_start(struct vhost_net *net,
> - VirtIODevice *dev)
> + VirtIODevice *dev,
> + int start_idx)
> {
> return -ENOSYS;
> }
> diff --git a/hw/vhost_net.h b/hw/vhost_net.h
> index 91e40b1..79a4f09 100644
> --- a/hw/vhost_net.h
> +++ b/hw/vhost_net.h
> @@ -9,7 +9,7 @@ typedef struct vhost_net VHostNetState;
> VHostNetState *vhost_net_init(VLANClientState *backend, int devfd, bool
> force);
>
> bool vhost_net_query(VHostNetState *net, VirtIODevice *dev);
> -int vhost_net_start(VHostNetState *net, VirtIODevice *dev);
> +int vhost_net_start(VHostNetState *net, VirtIODevice *dev, int start_idx);
> void vhost_net_stop(VHostNetState *net, VirtIODevice *dev);
>
> void vhost_net_cleanup(VHostNetState *net);
> diff --git a/hw/virtio-net.c b/hw/virtio-net.c
> index 3f190d4..d42c4cc 100644
> --- a/hw/virtio-net.c
> +++ b/hw/virtio-net.c
> @@ -26,34 +26,43 @@
> #define MAC_TABLE_ENTRIES 64
> #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
>
> -typedef struct VirtIONet
> +struct VirtIONet;
> +
> +typedef struct VirtIONetQueue
> {
> - VirtIODevice vdev;
> - uint8_t mac[ETH_ALEN];
> - uint16_t status;
> VirtQueue *rx_vq;
> VirtQueue *tx_vq;
> - VirtQueue *ctrl_vq;
> - NICState *nic;
> QEMUTimer *tx_timer;
> QEMUBH *tx_bh;
> uint32_t tx_timeout;
> - int32_t tx_burst;
> int tx_waiting;
> - uint32_t has_vnet_hdr;
> - uint8_t has_ufo;
> struct {
> VirtQueueElement elem;
> ssize_t len;
> } async_tx;
> + struct VirtIONet *n;
> + uint8_t vhost_started;
> +} VirtIONetQueue;
> +
> +typedef struct VirtIONet
> +{
> + VirtIODevice vdev;
> + uint8_t mac[ETH_ALEN];
> + uint16_t status;
> + VirtIONetQueue vqs[MAX_QUEUE_NUM];
> + VirtQueue *ctrl_vq;
> + NICState *nic;
> + int32_t tx_burst;
> + uint32_t has_vnet_hdr;
> + uint8_t has_ufo;
> int mergeable_rx_bufs;
> + int multiqueue;
> uint8_t promisc;
> uint8_t allmulti;
> uint8_t alluni;
> uint8_t nomulti;
> uint8_t nouni;
> uint8_t nobcast;
> - uint8_t vhost_started;
> struct {
> int in_use;
> int first_multi;
> @@ -63,6 +72,7 @@ typedef struct VirtIONet
> } mac_table;
> uint32_t *vlans;
> DeviceState *qdev;
> + uint32_t queues;
> } VirtIONet;
>
> /* TODO
> @@ -74,12 +84,25 @@ static VirtIONet *to_virtio_net(VirtIODevice *vdev)
> return (VirtIONet *)vdev;
> }
>
> +static int vq_get_pair_index(VirtIONet *n, VirtQueue *vq)
> +{
> + int i;
> + for (i = 0; i < n->queues; i++) {
> + if (n->vqs[i].tx_vq == vq || n->vqs[i].rx_vq == vq) {
> + return i;
> + }
> + }
> + assert(1);
> + return -1;
> +}
> +
> static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
> {
> VirtIONet *n = to_virtio_net(vdev);
> struct virtio_net_config netcfg;
>
> stw_p(&netcfg.status, n->status);
> + netcfg.queues = n->queues * 2;
> memcpy(netcfg.mac, n->mac, ETH_ALEN);
> memcpy(config, &netcfg, sizeof(netcfg));
> }
> @@ -103,78 +126,140 @@ static bool virtio_net_started(VirtIONet *n, uint8_t
> status)
> (n->status & VIRTIO_NET_S_LINK_UP) && n->vdev.vm_running;
> }
>
> -static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
> +static void nc_vhost_status(VLANClientState *nc, VirtIONet *n,
> + uint8_t status)
> {
> - if (!n->nic->nc.peer) {
> + int queue_index = nc->queue_index;
> + VLANClientState *peer = nc->peer;
> + VirtIONetQueue *netq = &n->vqs[nc->queue_index];
> +
> + if (!peer) {
> return;
> }
> - if (n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP) {
> + if (peer->info->type != NET_CLIENT_TYPE_TAP) {
> return;
> }
>
> - if (!tap_get_vhost_net(n->nic->nc.peer)) {
> + if (!tap_get_vhost_net(peer)) {
> return;
> }
> - if (!!n->vhost_started == virtio_net_started(n, status) &&
> - !n->nic->nc.peer->link_down) {
> + if (!!netq->vhost_started == virtio_net_started(n, status) &&
> + !peer->link_down) {
> return;
> }
> - if (!n->vhost_started) {
> - int r;
> - if (!vhost_net_query(tap_get_vhost_net(n->nic->nc.peer), &n->vdev)) {
> + if (!netq->vhost_started) {
> + /* skip ctrl vq */
> + int r, start_idx = queue_index == 0 ? 0 : queue_index * 2 + 1;
> + if (!vhost_net_query(tap_get_vhost_net(peer), &n->vdev)) {
> return;
> }
> - r = vhost_net_start(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
> + r = vhost_net_start(tap_get_vhost_net(peer), &n->vdev, start_idx);
> if (r < 0) {
> error_report("unable to start vhost net: %d: "
> "falling back on userspace virtio", -r);
> } else {
> - n->vhost_started = 1;
> + netq->vhost_started = 1;
> }
> } else {
> - vhost_net_stop(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
> - n->vhost_started = 0;
> + vhost_net_stop(tap_get_vhost_net(peer), &n->vdev);
> + netq->vhost_started = 0;
> + }
> +}
> +
> +static int peer_attach(VirtIONet *n, int index)
> +{
> + if (!n->nic->ncs[index]->peer) {
> + return -1;
> + }
> +
> + if (n->nic->ncs[index]->peer->info->type != NET_CLIENT_TYPE_TAP) {
> + return -1;
> + }
> +
> + return tap_attach(n->nic->ncs[index]->peer);
> +}
> +
> +static int peer_detach(VirtIONet *n, int index)
> +{
> + if (!n->nic->ncs[index]->peer) {
> + return -1;
> + }
> +
> + if (n->nic->ncs[index]->peer->info->type != NET_CLIENT_TYPE_TAP) {
> + return -1;
> + }
> +
> + return tap_detach(n->nic->ncs[index]->peer);
> +}
> +
> +static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
> +{
> + int i;
> + for (i = 0; i < n->queues; i++) {
> + if (!n->multiqueue && i != 0)
> + status = 0;
> + nc_vhost_status(n->nic->ncs[i], n, status);
> }
> }
>
> static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
> {
> VirtIONet *n = to_virtio_net(vdev);
> + int i;
>
> virtio_net_vhost_status(n, status);
>
> - if (!n->tx_waiting) {
> - return;
> - }
> + for (i = 0; i < n->queues; i++) {
> + VirtIONetQueue *netq = &n->vqs[i];
> + if (!netq->tx_waiting) {
> + continue;
> + }
> +
> + if (!n->multiqueue && i != 0)
> + status = 0;
>
> - if (virtio_net_started(n, status) && !n->vhost_started) {
> - if (n->tx_timer) {
> - qemu_mod_timer(n->tx_timer,
> - qemu_get_clock_ns(vm_clock) + n->tx_timeout);
> + if (virtio_net_started(n, status) && !netq->vhost_started) {
> + if (netq->tx_timer) {
> + qemu_mod_timer(netq->tx_timer,
> + qemu_get_clock_ns(vm_clock) +
> netq->tx_timeout);
> + } else {
> + qemu_bh_schedule(netq->tx_bh);
> + }
> } else {
> - qemu_bh_schedule(n->tx_bh);
> + if (netq->tx_timer) {
> + qemu_del_timer(netq->tx_timer);
> + } else {
> + qemu_bh_cancel(netq->tx_bh);
> + }
> }
> - } else {
> - if (n->tx_timer) {
> - qemu_del_timer(n->tx_timer);
> - } else {
> - qemu_bh_cancel(n->tx_bh);
> + }
> +}
> +
> +static bool virtio_net_is_link_up(VirtIONet *n)
> +{
> + int i;
> + for (i = 0; i < n->queues; i++) {
> + if (n->nic->ncs[i]->link_down) {
> + return false;
> }
> }
> + return true;
> }
>
> static void virtio_net_set_link_status(VLANClientState *nc)
> {
> - VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
> + VirtIONet *n = ((NICState *)(nc->opaque))->opaque;
> uint16_t old_status = n->status;
>
> - if (nc->link_down)
> + if (virtio_net_is_link_up(n)) {
> n->status &= ~VIRTIO_NET_S_LINK_UP;
> - else
> + } else {
> n->status |= VIRTIO_NET_S_LINK_UP;
> + }
>
> - if (n->status != old_status)
> + if (n->status != old_status) {
> virtio_notify_config(&n->vdev);
> + }
>
> virtio_net_set_status(&n->vdev, n->vdev.status);
> }
> @@ -202,13 +287,15 @@ static void virtio_net_reset(VirtIODevice *vdev)
>
> static int peer_has_vnet_hdr(VirtIONet *n)
> {
> - if (!n->nic->nc.peer)
> + if (!n->nic->ncs[0]->peer) {
> return 0;
> + }
>
> - if (n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP)
> + if (n->nic->ncs[0]->peer->info->type != NET_CLIENT_TYPE_TAP) {
> return 0;
> + }
>
> - n->has_vnet_hdr = tap_has_vnet_hdr(n->nic->nc.peer);
> + n->has_vnet_hdr = tap_has_vnet_hdr(n->nic->ncs[0]->peer);
>
> return n->has_vnet_hdr;
> }
> @@ -218,7 +305,7 @@ static int peer_has_ufo(VirtIONet *n)
> if (!peer_has_vnet_hdr(n))
> return 0;
>
> - n->has_ufo = tap_has_ufo(n->nic->nc.peer);
> + n->has_ufo = tap_has_ufo(n->nic->ncs[0]->peer);
>
> return n->has_ufo;
> }
> @@ -228,9 +315,13 @@ static uint32_t virtio_net_get_features(VirtIODevice
> *vdev, uint32_t features)
> VirtIONet *n = to_virtio_net(vdev);
>
> features |= (1 << VIRTIO_NET_F_MAC);
> + features |= (1 << VIRTIO_NET_F_MULTIQUEUE);
>
> if (peer_has_vnet_hdr(n)) {
> - tap_using_vnet_hdr(n->nic->nc.peer, 1);
> + int i;
> + for (i = 0; i < n->queues; i++) {
> + tap_using_vnet_hdr(n->nic->ncs[i]->peer, 1);
> + }
> } else {
> features &= ~(0x1 << VIRTIO_NET_F_CSUM);
> features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO4);
> @@ -248,14 +339,15 @@ static uint32_t virtio_net_get_features(VirtIODevice
> *vdev, uint32_t features)
> features &= ~(0x1 << VIRTIO_NET_F_HOST_UFO);
> }
>
> - if (!n->nic->nc.peer ||
> - n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP) {
> + if (!n->nic->ncs[0]->peer ||
> + n->nic->ncs[0]->peer->info->type != NET_CLIENT_TYPE_TAP) {
> return features;
> }
> - if (!tap_get_vhost_net(n->nic->nc.peer)) {
> + if (!tap_get_vhost_net(n->nic->ncs[0]->peer)) {
> return features;
> }
> - return vhost_net_get_features(tap_get_vhost_net(n->nic->nc.peer),
> features);
> + return vhost_net_get_features(tap_get_vhost_net(n->nic->ncs[0]->peer),
> + features);
> }
>
> static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
> @@ -276,25 +368,38 @@ static uint32_t virtio_net_bad_features(VirtIODevice
> *vdev)
> static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
> {
> VirtIONet *n = to_virtio_net(vdev);
> + int i, r;
>
> n->mergeable_rx_bufs = !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF));
> + n->multiqueue = !!(features & (1 << VIRTIO_NET_F_MULTIQUEUE));
>
> - if (n->has_vnet_hdr) {
> - tap_set_offload(n->nic->nc.peer,
> - (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
> - (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
> - (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
> - (features >> VIRTIO_NET_F_GUEST_ECN) & 1,
> - (features >> VIRTIO_NET_F_GUEST_UFO) & 1);
> - }
> - if (!n->nic->nc.peer ||
> - n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP) {
> - return;
> - }
> - if (!tap_get_vhost_net(n->nic->nc.peer)) {
> - return;
> + for (i = 0; i < n->queues; i++) {
> + if (!n->multiqueue && i != 0) {
> + r = peer_detach(n, i);
> + assert(r == 0);
> + } else {
> + r = peer_attach(n, i);
> + assert(r == 0);
> +
> + if (n->has_vnet_hdr) {
> + tap_set_offload(n->nic->ncs[i]->peer,
> + (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
> + (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
> + (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
> + (features >> VIRTIO_NET_F_GUEST_ECN) & 1,
> + (features >> VIRTIO_NET_F_GUEST_UFO) & 1);
> + }
> + if (!n->nic->ncs[i]->peer ||
> + n->nic->ncs[i]->peer->info->type != NET_CLIENT_TYPE_TAP) {
> + continue;
> + }
> + if (!tap_get_vhost_net(n->nic->ncs[i]->peer)) {
> + continue;
> + }
> + vhost_net_ack_features(tap_get_vhost_net(n->nic->ncs[i]->peer),
> + features);
> + }
> }
> - vhost_net_ack_features(tap_get_vhost_net(n->nic->nc.peer), features);
> }
>
> static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
> @@ -446,7 +551,7 @@ static void virtio_net_handle_rx(VirtIODevice *vdev,
> VirtQueue *vq)
> {
> VirtIONet *n = to_virtio_net(vdev);
>
> - qemu_flush_queued_packets(&n->nic->nc);
> + qemu_flush_queued_packets(n->nic->ncs[vq_get_pair_index(n, vq)]);
>
> /* We now have RX buffers, signal to the IO thread to break out of the
> * select to re-poll the tap file descriptor */
> @@ -455,36 +560,37 @@ static void virtio_net_handle_rx(VirtIODevice *vdev,
> VirtQueue *vq)
>
> static int virtio_net_can_receive(VLANClientState *nc)
> {
> - VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
> + int queue_index = nc->queue_index;
> + VirtIONet *n = ((NICState *)nc->opaque)->opaque;
> +
> if (!n->vdev.vm_running) {
> return 0;
> }
>
> - if (!virtio_queue_ready(n->rx_vq) ||
> + if (!virtio_queue_ready(n->vqs[queue_index].rx_vq) ||
> !(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
> return 0;
>
> return 1;
> }
>
> -static int virtio_net_has_buffers(VirtIONet *n, int bufsize)
> +static int virtio_net_has_buffers(VirtIONet *n, int bufsize, VirtQueue *vq)
> {
> - if (virtio_queue_empty(n->rx_vq) ||
> - (n->mergeable_rx_bufs &&
> - !virtqueue_avail_bytes(n->rx_vq, bufsize, 0))) {
> - virtio_queue_set_notification(n->rx_vq, 1);
> + if (virtio_queue_empty(vq) || (n->mergeable_rx_bufs &&
> + !virtqueue_avail_bytes(vq, bufsize, 0))) {
> + virtio_queue_set_notification(vq, 1);
>
> /* To avoid a race condition where the guest has made some buffers
> * available after the above check but before notification was
> * enabled, check for available buffers again.
> */
> - if (virtio_queue_empty(n->rx_vq) ||
> - (n->mergeable_rx_bufs &&
> - !virtqueue_avail_bytes(n->rx_vq, bufsize, 0)))
> + if (virtio_queue_empty(vq) || (n->mergeable_rx_bufs &&
> + !virtqueue_avail_bytes(vq, bufsize, 0))) {
> return 0;
> + }
> }
>
> - virtio_queue_set_notification(n->rx_vq, 0);
> + virtio_queue_set_notification(vq, 0);
> return 1;
> }
>
> @@ -595,12 +701,15 @@ static int receive_filter(VirtIONet *n, const uint8_t
> *buf, int size)
>
> static ssize_t virtio_net_receive(VLANClientState *nc, const uint8_t *buf,
> size_t size)
> {
> - VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
> + int queue_index = nc->queue_index;
> + VirtIONet *n = ((NICState *)(nc->opaque))->opaque;
> + VirtQueue *vq = n->vqs[queue_index].rx_vq;
> struct virtio_net_hdr_mrg_rxbuf *mhdr = NULL;
> size_t guest_hdr_len, offset, i, host_hdr_len;
>
> - if (!virtio_net_can_receive(&n->nic->nc))
> + if (!virtio_net_can_receive(n->nic->ncs[queue_index])) {
> return -1;
> + }
>
> /* hdr_len refers to the header we supply to the guest */
> guest_hdr_len = n->mergeable_rx_bufs ?
> @@ -608,7 +717,7 @@ static ssize_t virtio_net_receive(VLANClientState *nc,
> const uint8_t *buf, size_
>
>
> host_hdr_len = n->has_vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
> - if (!virtio_net_has_buffers(n, size + guest_hdr_len - host_hdr_len))
> + if (!virtio_net_has_buffers(n, size + guest_hdr_len - host_hdr_len, vq))
> return 0;
>
> if (!receive_filter(n, buf, size))
> @@ -623,7 +732,7 @@ static ssize_t virtio_net_receive(VLANClientState *nc,
> const uint8_t *buf, size_
>
> total = 0;
>
> - if (virtqueue_pop(n->rx_vq, &elem) == 0) {
> + if (virtqueue_pop(vq, &elem) == 0) {
> if (i == 0)
> return -1;
> error_report("virtio-net unexpected empty queue: "
> @@ -675,47 +784,50 @@ static ssize_t virtio_net_receive(VLANClientState *nc,
> const uint8_t *buf, size_
> }
>
> /* signal other side */
> - virtqueue_fill(n->rx_vq, &elem, total, i++);
> + virtqueue_fill(vq, &elem, total, i++);
> }
>
> if (mhdr) {
> stw_p(&mhdr->num_buffers, i);
> }
>
> - virtqueue_flush(n->rx_vq, i);
> - virtio_notify(&n->vdev, n->rx_vq);
> + virtqueue_flush(vq, i);
> + virtio_notify(&n->vdev, vq);
>
> return size;
> }
>
> -static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq);
> +static int32_t virtio_net_flush_tx(VirtIONet *n, VirtIONetQueue *tvq);
>
> static void virtio_net_tx_complete(VLANClientState *nc, ssize_t len)
> {
> - VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
> + VirtIONet *n = ((NICState *)nc->opaque)->opaque;
> + VirtIONetQueue *netq = &n->vqs[nc->queue_index];
>
> - virtqueue_push(n->tx_vq, &n->async_tx.elem, n->async_tx.len);
> - virtio_notify(&n->vdev, n->tx_vq);
> + virtqueue_push(netq->tx_vq, &netq->async_tx.elem, netq->async_tx.len);
> + virtio_notify(&n->vdev, netq->tx_vq);
>
> - n->async_tx.elem.out_num = n->async_tx.len = 0;
> + netq->async_tx.elem.out_num = netq->async_tx.len;
>
> - virtio_queue_set_notification(n->tx_vq, 1);
> - virtio_net_flush_tx(n, n->tx_vq);
> + virtio_queue_set_notification(netq->tx_vq, 1);
> + virtio_net_flush_tx(n, netq);
> }
>
> /* TX */
> -static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
> +static int32_t virtio_net_flush_tx(VirtIONet *n, VirtIONetQueue *netq)
> {
> VirtQueueElement elem;
> int32_t num_packets = 0;
> + VirtQueue *vq = netq->tx_vq;
> +
> if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
> return num_packets;
> }
>
> assert(n->vdev.vm_running);
>
> - if (n->async_tx.elem.out_num) {
> - virtio_queue_set_notification(n->tx_vq, 0);
> + if (netq->async_tx.elem.out_num) {
> + virtio_queue_set_notification(vq, 0);
> return num_packets;
> }
>
> @@ -747,12 +859,12 @@ static int32_t virtio_net_flush_tx(VirtIONet *n,
> VirtQueue *vq)
> len += hdr_len;
> }
>
> - ret = qemu_sendv_packet_async(&n->nic->nc, out_sg, out_num,
> - virtio_net_tx_complete);
> + ret = qemu_sendv_packet_async(n->nic->ncs[vq_get_pair_index(n, vq)],
> + out_sg, out_num,
> virtio_net_tx_complete);
> if (ret == 0) {
> - virtio_queue_set_notification(n->tx_vq, 0);
> - n->async_tx.elem = elem;
> - n->async_tx.len = len;
> + virtio_queue_set_notification(vq, 0);
> + netq->async_tx.elem = elem;
> + netq->async_tx.len = len;
> return -EBUSY;
> }
>
> @@ -771,22 +883,23 @@ static int32_t virtio_net_flush_tx(VirtIONet *n,
> VirtQueue *vq)
> static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
> {
> VirtIONet *n = to_virtio_net(vdev);
> + VirtIONetQueue *netq = &n->vqs[vq_get_pair_index(n, vq)];
>
> /* This happens when device was stopped but VCPU wasn't. */
> if (!n->vdev.vm_running) {
> - n->tx_waiting = 1;
> + netq->tx_waiting = 1;
> return;
> }
>
> - if (n->tx_waiting) {
> + if (netq->tx_waiting) {
> virtio_queue_set_notification(vq, 1);
> - qemu_del_timer(n->tx_timer);
> - n->tx_waiting = 0;
> - virtio_net_flush_tx(n, vq);
> + qemu_del_timer(netq->tx_timer);
> + netq->tx_waiting = 0;
> + virtio_net_flush_tx(n, netq);
> } else {
> - qemu_mod_timer(n->tx_timer,
> - qemu_get_clock_ns(vm_clock) + n->tx_timeout);
> - n->tx_waiting = 1;
> + qemu_mod_timer(netq->tx_timer,
> + qemu_get_clock_ns(vm_clock) + netq->tx_timeout);
> + netq->tx_waiting = 1;
> virtio_queue_set_notification(vq, 0);
> }
> }
> @@ -794,48 +907,53 @@ static void virtio_net_handle_tx_timer(VirtIODevice
> *vdev, VirtQueue *vq)
> static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
> {
> VirtIONet *n = to_virtio_net(vdev);
> + VirtIONetQueue *netq = &n->vqs[vq_get_pair_index(n, vq)];
>
> - if (unlikely(n->tx_waiting)) {
> + if (unlikely(netq->tx_waiting)) {
> return;
> }
> - n->tx_waiting = 1;
> + netq->tx_waiting = 1;
> /* This happens when device was stopped but VCPU wasn't. */
> if (!n->vdev.vm_running) {
> return;
> }
> virtio_queue_set_notification(vq, 0);
> - qemu_bh_schedule(n->tx_bh);
> + qemu_bh_schedule(netq->tx_bh);
> }
>
> static void virtio_net_tx_timer(void *opaque)
> {
> - VirtIONet *n = opaque;
> + VirtIONetQueue *netq = opaque;
> + VirtIONet *n = netq->n;
> +
> assert(n->vdev.vm_running);
>
> - n->tx_waiting = 0;
> + netq->tx_waiting = 0;
>
> /* Just in case the driver is not ready on more */
> if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
> return;
>
> - virtio_queue_set_notification(n->tx_vq, 1);
> - virtio_net_flush_tx(n, n->tx_vq);
> + virtio_queue_set_notification(netq->tx_vq, 1);
> + virtio_net_flush_tx(n, netq);
> }
>
> static void virtio_net_tx_bh(void *opaque)
> {
> - VirtIONet *n = opaque;
> + VirtIONetQueue *netq = opaque;
> + VirtQueue *vq = netq->tx_vq;
> + VirtIONet *n = netq->n;
> int32_t ret;
>
> assert(n->vdev.vm_running);
>
> - n->tx_waiting = 0;
> + netq->tx_waiting = 0;
>
> /* Just in case the driver is not ready on more */
> if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)))
> return;
>
> - ret = virtio_net_flush_tx(n, n->tx_vq);
> + ret = virtio_net_flush_tx(n, netq);
> if (ret == -EBUSY) {
> return; /* Notification re-enable handled by tx_complete */
> }
> @@ -843,33 +961,39 @@ static void virtio_net_tx_bh(void *opaque)
> /* If we flush a full burst of packets, assume there are
> * more coming and immediately reschedule */
> if (ret >= n->tx_burst) {
> - qemu_bh_schedule(n->tx_bh);
> - n->tx_waiting = 1;
> + qemu_bh_schedule(netq->tx_bh);
> + netq->tx_waiting = 1;
> return;
> }
>
> /* If less than a full burst, re-enable notification and flush
> * anything that may have come in while we weren't looking. If
> * we find something, assume the guest is still active and reschedule */
> - virtio_queue_set_notification(n->tx_vq, 1);
> - if (virtio_net_flush_tx(n, n->tx_vq) > 0) {
> - virtio_queue_set_notification(n->tx_vq, 0);
> - qemu_bh_schedule(n->tx_bh);
> - n->tx_waiting = 1;
> + virtio_queue_set_notification(vq, 1);
> + if (virtio_net_flush_tx(n, netq) > 0) {
> + virtio_queue_set_notification(vq, 0);
> + qemu_bh_schedule(netq->tx_bh);
> + netq->tx_waiting = 1;
> }
> }
>
> static void virtio_net_save(QEMUFile *f, void *opaque)
> {
> VirtIONet *n = opaque;
> + int i;
>
> /* At this point, backend must be stopped, otherwise
> * it might keep writing to memory. */
> - assert(!n->vhost_started);
> + for (i = 0; i < n->queues; i++) {
> + assert(!n->vqs[i].vhost_started);
> + }
> virtio_save(&n->vdev, f);
>
> qemu_put_buffer(f, n->mac, ETH_ALEN);
> - qemu_put_be32(f, n->tx_waiting);
> + qemu_put_be32(f, n->queues);
> + for (i = 0; i < n->queues; i++) {
> + qemu_put_be32(f, n->vqs[i].tx_waiting);
> + }
> qemu_put_be32(f, n->mergeable_rx_bufs);
> qemu_put_be16(f, n->status);
> qemu_put_byte(f, n->promisc);
> @@ -902,7 +1026,10 @@ static int virtio_net_load(QEMUFile *f, void *opaque,
> int version_id)
> }
>
> qemu_get_buffer(f, n->mac, ETH_ALEN);
> - n->tx_waiting = qemu_get_be32(f);
> + n->queues = qemu_get_be32(f);
> + for (i = 0; i < n->queues; i++) {
> + n->vqs[i].tx_waiting = qemu_get_be32(f);
> + }
> n->mergeable_rx_bufs = qemu_get_be32(f);
>
> if (version_id >= 3)
> @@ -930,7 +1057,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque,
> int version_id)
> n->mac_table.in_use = 0;
> }
> }
> -
> +
> if (version_id >= 6)
> qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
>
> @@ -941,13 +1068,16 @@ static int virtio_net_load(QEMUFile *f, void *opaque,
> int version_id)
> }
>
> if (n->has_vnet_hdr) {
> - tap_using_vnet_hdr(n->nic->nc.peer, 1);
> - tap_set_offload(n->nic->nc.peer,
> - (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
> - (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
> - (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
> - (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_ECN) & 1,
> - (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_UFO) & 1);
> + for(i = 0; i < n->queues; i++) {
> + tap_using_vnet_hdr(n->nic->ncs[i]->peer, 1);
> + tap_set_offload(n->nic->ncs[i]->peer,
> + (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_CSUM)
> & 1,
> + (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO4)
> & 1,
> + (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO6)
> & 1,
> + (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_ECN)
> & 1,
> + (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_UFO) &
> + 1);
> + }
> }
> }
>
> @@ -982,7 +1112,7 @@ static int virtio_net_load(QEMUFile *f, void *opaque,
> int version_id)
>
> static void virtio_net_cleanup(VLANClientState *nc)
> {
> - VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
> + VirtIONet *n = ((NICState *)nc->opaque)->opaque;
>
> n->nic = NULL;
> }
> @@ -1000,6 +1130,7 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf
> *conf,
> virtio_net_conf *net)
> {
> VirtIONet *n;
> + int i;
>
> n = (VirtIONet *)virtio_common_init("virtio-net", VIRTIO_ID_NET,
> sizeof(struct virtio_net_config),
> @@ -1012,7 +1143,6 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf
> *conf,
> n->vdev.bad_features = virtio_net_bad_features;
> n->vdev.reset = virtio_net_reset;
> n->vdev.set_status = virtio_net_set_status;
> - n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
>
> if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {
> error_report("virtio-net: "
> @@ -1021,15 +1151,6 @@ VirtIODevice *virtio_net_init(DeviceState *dev,
> NICConf *conf,
> error_report("Defaulting to \"bh\"");
> }
>
> - if (net->tx && !strcmp(net->tx, "timer")) {
> - n->tx_vq = virtio_add_queue(&n->vdev, 256,
> virtio_net_handle_tx_timer);
> - n->tx_timer = qemu_new_timer_ns(vm_clock, virtio_net_tx_timer, n);
> - n->tx_timeout = net->txtimer;
> - } else {
> - n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_bh);
> - n->tx_bh = qemu_bh_new(virtio_net_tx_bh, n);
> - }
> - n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl);
> qemu_macaddr_default_if_unset(&conf->macaddr);
> memcpy(&n->mac[0], &conf->macaddr, sizeof(n->mac));
> n->status = VIRTIO_NET_S_LINK_UP;
> @@ -1038,7 +1159,6 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf
> *conf,
>
> qemu_format_nic_info_str(&n->nic->nc, conf->macaddr.a);
>
> - n->tx_waiting = 0;
> n->tx_burst = net->txburst;
> n->mergeable_rx_bufs = 0;
> n->promisc = 1; /* for compatibility */
> @@ -1046,6 +1166,32 @@ VirtIODevice *virtio_net_init(DeviceState *dev,
> NICConf *conf,
> n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
>
> n->vlans = g_malloc0(MAX_VLAN >> 3);
> + n->queues = conf->queues;
> +
> + /* Allocate per rx/tx vq's */
> + for (i = 0; i < n->queues; i++) {
> + n->vqs[i].rx_vq = virtio_add_queue(&n->vdev, 256,
> virtio_net_handle_rx);
> + if (net->tx && !strcmp(net->tx, "timer")) {
> + n->vqs[i].tx_vq = virtio_add_queue(&n->vdev, 256,
> + virtio_net_handle_tx_timer);
> + n->vqs[i].tx_timer = qemu_new_timer_ns(vm_clock,
> + virtio_net_tx_timer,
> + &n->vqs[i]);
> + n->vqs[i].tx_timeout = net->txtimer;
> + } else {
> + n->vqs[i].tx_vq = virtio_add_queue(&n->vdev, 256,
> + virtio_net_handle_tx_bh);
> + n->vqs[i].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[i]);
> + }
> +
> + n->vqs[i].tx_waiting = 0;
> + n->vqs[i].n = n;
> +
> + if (i == 0) {
> + /* keep compatiable with spec and old guest */
> + n->ctrl_vq = virtio_add_queue(&n->vdev, 64,
> virtio_net_handle_ctrl);
> + }
> + }
>
> n->qdev = dev;
> register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
> @@ -1059,24 +1205,33 @@ VirtIODevice *virtio_net_init(DeviceState *dev,
> NICConf *conf,
> void virtio_net_exit(VirtIODevice *vdev)
> {
> VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev);
> + int i;
>
> /* This will stop vhost backend if appropriate. */
> virtio_net_set_status(vdev, 0);
>
> - qemu_purge_queued_packets(&n->nic->nc);
> + for (i = 0; i < n->queues; i++) {
> + qemu_purge_queued_packets(n->nic->ncs[i]);
> + }
>
> unregister_savevm(n->qdev, "virtio-net", n);
>
> g_free(n->mac_table.macs);
> g_free(n->vlans);
>
> - if (n->tx_timer) {
> - qemu_del_timer(n->tx_timer);
> - qemu_free_timer(n->tx_timer);
> - } else {
> - qemu_bh_delete(n->tx_bh);
> + for (i = 0; i < n->queues; i++) {
> + VirtIONetQueue *netq = &n->vqs[i];
> + if (netq->tx_timer) {
> + qemu_del_timer(netq->tx_timer);
> + qemu_free_timer(netq->tx_timer);
> + } else {
> + qemu_bh_delete(netq->tx_bh);
> + }
> }
>
> - qemu_del_vlan_client(&n->nic->nc);
> virtio_cleanup(&n->vdev);
> +
> + for (i = 0; i < n->queues; i++) {
> + qemu_del_vlan_client(n->nic->ncs[i]);
> + }
> }
> diff --git a/hw/virtio-net.h b/hw/virtio-net.h
> index 36aa463..b35ba5d 100644
> --- a/hw/virtio-net.h
> +++ b/hw/virtio-net.h
> @@ -44,6 +44,7 @@
> #define VIRTIO_NET_F_CTRL_RX 18 /* Control channel RX mode support */
> #define VIRTIO_NET_F_CTRL_VLAN 19 /* Control channel VLAN filtering */
> #define VIRTIO_NET_F_CTRL_RX_EXTRA 20 /* Extra RX mode control support */
> +#define VIRTIO_NET_F_MULTIQUEUE 22
>
> #define VIRTIO_NET_S_LINK_UP 1 /* Link is up */
>
> @@ -72,6 +73,8 @@ struct virtio_net_config
> uint8_t mac[ETH_ALEN];
> /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
> uint16_t status;
> +
> + uint16_t queues;
> } QEMU_PACKED;
>
> /* This is the first element of the scatter-gather list. If you don't
- Re: [Qemu-devel] [RFC V2 PATCH 4/4] virtio-net: add multiqueue support,
Michael S. Tsirkin <=