qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [RFC PATCH for 8.0 10/13] virtio-net: Migrate vhost inflight descrip


From: Eugenio Perez Martin
Subject: Re: [RFC PATCH for 8.0 10/13] virtio-net: Migrate vhost inflight descriptors
Date: Wed, 7 Dec 2022 09:40:02 +0100

On Mon, Dec 5, 2022 at 9:52 PM Parav Pandit <parav@nvidia.com> wrote:
>
>
> > From: Eugenio Pérez <eperezma@redhat.com>
> > Sent: Monday, December 5, 2022 12:05 PM
> >
> > There is currently no data to be migrated, since nothing populates or read
> > the fields on virtio-net.
> >
> > The migration of in-flight descriptors is modelled after the migration of
> > requests in virtio-blk. With some differences:
> > * virtio-blk migrates queue number on each request. Here we only add a
> >   vq if it has descriptors to migrate, and then we make all descriptors
> >   in an array.
> > * Use of QTAILQ since it works similar to signal the end of the inflight
> >   descriptors: 1 for more data, 0 if end. But do it for each vq instead
> >   of for each descriptor.
> > * Usage of VMState macros.
> >
> > The fields of descriptors would be way more complicated if we use the
> > VirtQueueElements directly, since there would be a few levels of
> > indirections. Using VirtQueueElementOld for the moment, and migrate to
> > VirtQueueElement for the final patch.
> >
> > TODO: Proper migration versioning
> > TODO: Do not embed vhost-vdpa structs
> > TODO: Migrate the VirtQueueElement, not VirtQueueElementOld.
> >
> > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > ---
> >  include/hw/virtio/virtio-net.h |   2 +
> >  include/migration/vmstate.h    |  11 +++
> >  hw/net/virtio-net.c            | 129 +++++++++++++++++++++++++++++++++
> >  3 files changed, 142 insertions(+)
> >
> > diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
> > index ef234ffe7e..ae7c017ef0 100644
> > --- a/include/hw/virtio/virtio-net.h
> > +++ b/include/hw/virtio/virtio-net.h
> > @@ -151,9 +151,11 @@ typedef struct VirtIONetQueue {
> >      QEMUTimer *tx_timer;
> >      QEMUBH *tx_bh;
> >      uint32_t tx_waiting;
> > +    uint32_t tx_inflight_num, rx_inflight_num;
> >      struct {
> >          VirtQueueElement *elem;
> >      } async_tx;
> > +    VirtQueueElement **tx_inflight, **rx_inflight;
> >      struct VirtIONet *n;
> >  } VirtIONetQueue;
> >
> > diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
> > index 9726d2d09e..9e0dfef9ee 100644
> > --- a/include/migration/vmstate.h
> > +++ b/include/migration/vmstate.h
> > @@ -626,6 +626,17 @@ extern const VMStateInfo vmstate_info_qlist;
> >      .offset     = vmstate_offset_varray(_state, _field, _type),      \
> >  }
> >
> > +#define VMSTATE_STRUCT_VARRAY_ALLOC_UINT16(_field, _state,
> > _field_num,        \
> > +                                           _version, _vmsd, _type) {       
> >    \
> > +    .name       = (stringify(_field)),                                     
> >    \
> > +    .version_id = (_version),                                              
> >    \
> > +    .vmsd       = &(_vmsd),                                                
> >    \
> > +    .num_offset = vmstate_offset_value(_state, _field_num, uint16_t),      
> >    \
> > +    .size       = sizeof(_type),                                           
> >    \
> > +    .flags      = VMS_STRUCT | VMS_VARRAY_UINT16 | VMS_ALLOC |
> > VMS_POINTER,   \
> > +    .offset     = vmstate_offset_pointer(_state, _field, _type),           
> >    \
> > +}
> > +
> >  #define VMSTATE_STRUCT_VARRAY_ALLOC(_field, _state, _field_num,
> > _version, _vmsd, _type) {\
> >      .name       = (stringify(_field)),                               \
> >      .version_id = (_version),                                        \
> > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index
> > aba12759d5..ffd7bf1fc7 100644
> > --- a/hw/net/virtio-net.c
> > +++ b/hw/net/virtio-net.c
> > @@ -3077,6 +3077,13 @@ static bool mac_table_doesnt_fit(void *opaque,
> > int version_id)
> >      return !mac_table_fits(opaque, version_id);  }
> >
> > +typedef struct VirtIONetInflightQueue {
> > +    uint16_t idx;
> > +    uint16_t num;
> > +    QTAILQ_ENTRY(VirtIONetInflightQueue) entry;
> > +    VirtQueueElementOld *elems;
> > +} VirtIONetInflightQueue;
> > +
> >  /* This temporary type is shared by all the WITH_TMP methods
> >   * although only some fields are used by each.
> >   */
> > @@ -3086,6 +3093,7 @@ struct VirtIONetMigTmp {
> >      uint16_t        curr_queue_pairs_1;
> >      uint8_t         has_ufo;
> >      uint32_t        has_vnet_hdr;
> > +    QTAILQ_HEAD(, VirtIONetInflightQueue) queues_inflight;
> >  };
> >
> >  /* The 2nd and subsequent tx_waiting flags are loaded later than @@ -
> > 3231,6 +3239,124 @@ static const VMStateDescription
> > vmstate_virtio_net_rss = {
> >      },
> >  };
> >
> > +static const VMStateDescription vmstate_virtio_net_inflight_queue = {
> > +    .name      = "virtio-net-device/inflight/queue",
> > +    .fields = (VMStateField[]) {
> > +        VMSTATE_UINT16(idx, VirtIONetInflightQueue),
> > +        VMSTATE_UINT16(num, VirtIONetInflightQueue),
> > +
> > +        VMSTATE_STRUCT_VARRAY_ALLOC_UINT16(elems,
> > VirtIONetInflightQueue, num,
> > +                                           0, 
> > vmstate_virtqueue_element_old,
> > +                                           VirtQueueElementOld),
> > +        VMSTATE_END_OF_LIST()
> > +    },
> > +};
> > +
> > +static int virtio_net_inflight_init(void *opaque) {
> > +    struct VirtIONetMigTmp *tmp = opaque;
> > +
> > +    QTAILQ_INIT(&tmp->queues_inflight);
> > +    return 0;
> > +}
> > +
> > +static int virtio_net_inflight_pre_save(void *opaque) {
> > +    struct VirtIONetMigTmp *tmp = opaque;
> > +    VirtIONet *net = tmp->parent;
> > +    uint16_t curr_queue_pairs = net->multiqueue ? net->curr_queue_pairs :
> > 1;
> > +    VirtIONetInflightQueue *qi = g_new0(VirtIONetInflightQueue,
> > +                                        curr_queue_pairs * 2);
> > +
> > +    virtio_net_inflight_init(opaque);
> > +    for (uint16_t i = 0; i < curr_queue_pairs * 2; ++i) {
> > +        VirtIONetQueue *q = &net->vqs[vq2q(i)];
> > +        size_t n = i % 2 ? q->tx_inflight_num : q->rx_inflight_num;
> > +        VirtQueueElement **inflight = i % 2 ? q->tx_inflight :
> > + q->rx_inflight;
> > +
> > +        if (n == 0) {
> > +            continue;
> > +        }
> > +
> > +        qi[i].idx = i;
> > +        qi[i].num = n;
> > +        qi[i].elems = g_new0(VirtQueueElementOld, n);
> > +        for (uint16_t j = 0; j < n; ++j) {
> > +            qemu_put_virtqueue_element_old(inflight[j], &qi[i].elems[j]);
> > +        }
> > +        QTAILQ_INSERT_TAIL(&tmp->queues_inflight, &qi[i], entry);
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int virtio_net_inflight_post_save(void *opaque) {
> > +    struct VirtIONetMigTmp *tmp = opaque;
> > +    VirtIONetInflightQueue *qi;
> > +
> > +    while ((qi = QTAILQ_FIRST(&tmp->queues_inflight))) {
> > +        QTAILQ_REMOVE(&tmp->queues_inflight, qi, entry);
> > +        g_free(qi->elems);
> > +        g_free(qi);
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int virtio_net_inflight_post_load(void *opaque, int version_id)
> > +{
> > +    struct VirtIONetMigTmp *tmp = opaque;
> > +    VirtIONet *net = tmp->parent;
> > +    uint16_t curr_queue_pairs = net->multiqueue ? net->curr_queue_pairs :
> > 1;
> > +    VirtIONetInflightQueue *qi;
> > +
> > +    while ((qi = QTAILQ_FIRST(&tmp->queues_inflight))) {
> > +        VirtIONetQueue *q = &net->vqs[vq2q(qi->idx)];
> > +        uint32_t *n = qi->idx % 2 ? &q->tx_inflight_num : 
> > &q->rx_inflight_num;
> > +        VirtQueueElement ***inflight = qi->idx % 2 ?
> > +                                       &q->tx_inflight : &q->rx_inflight;
> > +        if (unlikely(qi->num == 0)) {
> > +            /* TODO: error message */
> > +            return -1;
> > +        }
> > +
> > +        if (unlikely(qi->idx > curr_queue_pairs * 2)) {
> > +            /* TODO: error message */
> > +            return -1;
> > +        }
> > +
> > +        *n = qi->num;
> > +        *inflight = g_new(VirtQueueElement *, *n);
> > +        for (uint16_t j = 0; j < *n; ++j) {
> > +            (*inflight)[j] = qemu_get_virtqueue_element_from_old(
> > +                &net->parent_obj, &qi->elems[j],
> > +                sizeof(VirtQueueElement));
> > +        }
> > +
> > +        QTAILQ_REMOVE(&tmp->queues_inflight, qi, entry);
> > +        g_free(qi->elems);
> > +        g_free(qi);
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +/* TODO: Allocate a temporal per queue / queue element, not all of
> > +them! */ static const VMStateDescription vmstate_virtio_net_inflight = {
> > +    .name      = "virtio-net-device/inflight",
> > +    .pre_save = virtio_net_inflight_pre_save,
> > +    .post_save = virtio_net_inflight_post_save,
> > +    .pre_load = virtio_net_inflight_init,
> > +    .post_load = virtio_net_inflight_post_load,
> > +    .fields = (VMStateField[]) {
> > +        VMSTATE_QTAILQ_V(queues_inflight, struct VirtIONetMigTmp, 0,
> > +                         vmstate_virtio_net_inflight_queue,
> > +                         VirtIONetInflightQueue, entry),
> > +        VMSTATE_END_OF_LIST()
> > +    },
> > +};
> > +
> How is the CVQ related mac, vlan, rss replay different than these inflight 
> descriptors, due to which inflights to be done by these callbacks and CVQ 
> differently?

CVQ is processed by qemu directly, so it is guaranteed they will not
be out of order. Guest memory is enough to recover in the destination.




reply via email to

[Prev in Thread] Current Thread [Next in Thread]