qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC v2 16/32] vhost+postcopy: Send address back to qem


From: Peter Xu
Subject: Re: [Qemu-devel] [RFC v2 16/32] vhost+postcopy: Send address back to qemu
Date: Tue, 29 Aug 2017 16:30:03 +0800
User-agent: Mutt/1.5.24 (2015-08-30)

On Thu, Aug 24, 2017 at 08:27:14PM +0100, Dr. David Alan Gilbert (git) wrote:
> From: "Dr. David Alan Gilbert" <address@hidden>
> 
> We need a better way, but at the moment we need the address of the
> mappings sent back to qemu so it can interpret the messages on the
> userfaultfd it reads.
> 
> Note: We don't ask for the default 'ack' reply since we've got our own.
> 
> Signed-off-by: Dr. David Alan Gilbert <address@hidden>
> ---
>  contrib/libvhost-user/libvhost-user.c | 15 ++++++++-
>  docs/interop/vhost-user.txt           |  6 ++++
>  hw/virtio/trace-events                |  1 +
>  hw/virtio/vhost-user.c                | 57 
> ++++++++++++++++++++++++++++++++++-
>  4 files changed, 77 insertions(+), 2 deletions(-)
> 
> diff --git a/contrib/libvhost-user/libvhost-user.c 
> b/contrib/libvhost-user/libvhost-user.c
> index e6ab059a03..5ec54f7d60 100644
> --- a/contrib/libvhost-user/libvhost-user.c
> +++ b/contrib/libvhost-user/libvhost-user.c
> @@ -477,13 +477,26 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
>              DPRINT("%s: region %d: Registered userfault for %llx + %llx\n",
>                      __func__, i, reg_struct.range.start, 
> reg_struct.range.len);
>              /* TODO: Stash 'zero' support flags somewhere */
> -            /* TODO: Get address back to QEMU */
>  
> +            /* TODO: We need to find a way for the qemu not to see the 
> virtual
> +             * addresses of the clients, so as to keep better separation.
> +             */
> +            /* Return the address to QEMU so that it can translate the ufd
> +             * fault addresses back.
> +             */
> +            msg_region->userspace_addr = (uintptr_t)(mmap_addr +
> +                                                     
> dev_region->mmap_offset);
>          }
>  
>          close(vmsg->fds[i]);
>      }
>  
> +    if (dev->postcopy_listening) {
> +        /* Need to return the addresses - send the updated message back */
> +        vmsg->fd_num = 0;
> +        return true;
> +    }
> +
>      return false;
>  }
>  
> diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt
> index 73c3dd74db..b2a548c94d 100644
> --- a/docs/interop/vhost-user.txt
> +++ b/docs/interop/vhost-user.txt
> @@ -413,12 +413,18 @@ Master message types
>        Id: 5
>        Equivalent ioctl: VHOST_SET_MEM_TABLE
>        Master payload: memory regions description
> +      Slave payload: (postcopy only) memory regions description
>  
>        Sets the memory map regions on the slave so it can translate the vring
>        addresses. In the ancillary data there is an array of file descriptors
>        for each memory mapped region. The size and ordering of the fds matches
>        the number and ordering of memory regions.
>  
> +      When postcopy-listening has been received, SET_MEM_TABLE replies with
> +      the bases of the memory mapped regions to the master.  It must have 
> mmap'd
> +      the regions and enabled userfaultfd on them.  Note NEED_REPLY_MASK
> +      is not set in this case.
> +
>   * VHOST_USER_SET_LOG_BASE
>  
>        Id: 6
> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> index f736c7c84f..63fd4a79cf 100644
> --- a/hw/virtio/trace-events
> +++ b/hw/virtio/trace-events
> @@ -2,6 +2,7 @@
>  
>  # hw/virtio/vhost-user.c
>  vhost_user_postcopy_listen(void) ""
> +vhost_user_set_mem_table_postcopy(uint64_t client_addr, uint64_t qhva, int 
> reply_i, int region_i) "client:0x%"PRIx64" for hva: 0x%"PRIx64" reply %d 
> region %d"
>  
>  # hw/virtio/virtio.c
>  virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned 
> out_num) "elem %p size %zd in_num %u out_num %u"
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index 9178271ab2..2e4eb0864a 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -19,6 +19,7 @@
>  #include "qemu/sockets.h"
>  #include "migration/migration.h"
>  #include "migration/postcopy-ram.h"
> +#include "trace.h"
>  
>  #include <sys/ioctl.h>
>  #include <sys/socket.h>
> @@ -133,6 +134,7 @@ struct vhost_user {
>      int slave_fd;
>      NotifierWithReturn postcopy_notifier;
>      struct PostCopyFD  postcopy_fd;
> +    uint64_t           postcopy_client_bases[VHOST_MEMORY_MAX_NREGIONS];
>  };
>  
>  static bool ioeventfd_enabled(void)
> @@ -300,11 +302,13 @@ static int vhost_user_set_log_base(struct vhost_dev 
> *dev, uint64_t base,
>  static int vhost_user_set_mem_table(struct vhost_dev *dev,
>                                      struct vhost_memory *mem)
>  {
> +    struct vhost_user *u = dev->opaque;
>      int fds[VHOST_MEMORY_MAX_NREGIONS];
>      int i, fd;
>      size_t fd_num = 0;
>      bool reply_supported = virtio_has_feature(dev->protocol_features,
> -                                              
> VHOST_USER_PROTOCOL_F_REPLY_ACK);
> +                                          VHOST_USER_PROTOCOL_F_REPLY_ACK) &&
> +                           !u->postcopy_fd.handler;

(indent)

>  
>      VhostUserMsg msg = {
>          .request = VHOST_USER_SET_MEM_TABLE,
> @@ -350,6 +354,57 @@ static int vhost_user_set_mem_table(struct vhost_dev 
> *dev,
>          return -1;
>      }
>  
> +    if (u->postcopy_fd.handler) {

It seems that after this handler is set, we never clean it up.  Do we
need to unset it somewhere? (maybe vhost_user_postcopy_end?)

> +        VhostUserMsg msg_reply;
> +        int region_i, reply_i;
> +        if (vhost_user_read(dev, &msg_reply) < 0) {
> +            return -1;
> +        }
> +
> +        if (msg_reply.request != VHOST_USER_SET_MEM_TABLE) {
> +            error_report("%s: Received unexpected msg type."
> +                         "Expected %d received %d", __func__,
> +                         VHOST_USER_SET_MEM_TABLE, msg_reply.request);
> +            return -1;
> +        }
> +        /* We're using the same structure, just reusing one of the
> +         * fields, so it should be the same size.
> +         */
> +        if (msg_reply.size != msg.size) {
> +            error_report("%s: Unexpected size for postcopy reply "
> +                         "%d vs %d", __func__, msg_reply.size, msg.size);
> +            return -1;
> +        }
> +
> +        memset(u->postcopy_client_bases, 0,
> +               sizeof(uint64_t) * VHOST_MEMORY_MAX_NREGIONS);
> +
> +        /* They're in the same order as the regions that were sent
> +         * but some of the regions were skipped (above) if they
> +         * didn't have fd's
> +        */
> +        for (reply_i = 0, region_i = 0;
> +             region_i < dev->mem->nregions;
> +             region_i++) {
> +            if (reply_i < fd_num &&
> +                msg_reply.payload.memory.regions[region_i].guest_phys_addr ==
                                                    ^^^^^^^^
                                          should this be reply_i?

(And maybe we can use pointers for the regions for better readability?)

> +                dev->mem->regions[region_i].guest_phys_addr) {
> +                u->postcopy_client_bases[region_i] =
> +                    msg_reply.payload.memory.regions[reply_i].userspace_addr;
> +                trace_vhost_user_set_mem_table_postcopy(
> +                    msg_reply.payload.memory.regions[reply_i].userspace_addr,
> +                    msg.payload.memory.regions[reply_i].userspace_addr,
> +                    reply_i, region_i);
> +                reply_i++;
> +            }
> +        }
> +        if (reply_i != fd_num) {
> +            error_report("%s: postcopy reply not fully consumed "
> +                         "%d vs %zd",
> +                         __func__, reply_i, fd_num);
> +            return -1;
> +        }
> +    }
>      if (reply_supported) {
>          return process_message_reply(dev, &msg);
>      }
> -- 
> 2.13.5
> 

-- 
Peter Xu



reply via email to

[Prev in Thread] Current Thread [Next in Thread]