qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v6 5/8] Add domain socket communication for vhos


From: Michael S. Tsirkin
Subject: Re: [Qemu-devel] [PATCH v6 5/8] Add domain socket communication for vhost-user backend
Date: Tue, 14 Jan 2014 13:10:35 +0200

On Mon, Jan 13, 2014 at 03:25:16PM +0100, Antonios Motakis wrote:
> Add structures for passing vhost-user messages over a unix domain socket.
> This is the equivalent of the existing vhost-kernel ioctls.
> 
> Connect to the named unix domain socket. The system call sendmsg
> is used for communication. To be able to pass file descriptors
> between processes - we use SCM_RIGHTS type in the message control header.
> 
> Signed-off-by: Antonios Motakis <address@hidden>
> Signed-off-by: Nikolay Nikolaev <address@hidden>

Not all comments in v5 review of this file have been
addressed.
I think if a comment was wrong it's a good idea to
add clarification in code on why it is, so future readers
aren't confused.

In particular I think always explicitly opening
domain sockets and forcing a specific client/server
model is a problem.

I think we also want the protocol documented in docs/
so future users don't have to read the code to connect
to qemu.

Can you add a unit test for this code?

We ship pxe ROM that enables virtio net,
so you can write a small stub that works as the
remote side of this protocol, poke at
guest memory to see that it has
the expected content (compare to what we get with
readl).

In theory you could also get and put some buffers
though that's not a must.


> ---
>  hw/virtio/vhost-backend.c | 306 
> +++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 301 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c
> index 1d83e1d..460ee02 100644
> --- a/hw/virtio/vhost-backend.c
> +++ b/hw/virtio/vhost-backend.c
> @@ -11,34 +11,330 @@
>  #include "hw/virtio/vhost.h"
>  #include "hw/virtio/vhost-backend.h"
>  #include "qemu/error-report.h"
> +#include "qemu/sockets.h"
>  
>  #include <fcntl.h>
>  #include <unistd.h>
>  #include <sys/ioctl.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
> +#include <linux/vhost.h>
> +
> +#define VHOST_MEMORY_MAX_NREGIONS    8
> +#define VHOST_USER_SOCKTO            (300) /* msec */
> +
> +typedef enum VhostUserRequest {
> +    VHOST_USER_NONE = 0,
> +    VHOST_USER_GET_FEATURES = 1,
> +    VHOST_USER_SET_FEATURES = 2,
> +    VHOST_USER_SET_OWNER = 3,
> +    VHOST_USER_RESET_OWNER = 4,
> +    VHOST_USER_SET_MEM_TABLE = 5,
> +    VHOST_USER_SET_LOG_BASE = 6,
> +    VHOST_USER_SET_LOG_FD = 7,
> +    VHOST_USER_SET_VRING_NUM = 8,
> +    VHOST_USER_SET_VRING_ADDR = 9,
> +    VHOST_USER_SET_VRING_BASE = 10,
> +    VHOST_USER_GET_VRING_BASE = 11,
> +    VHOST_USER_SET_VRING_KICK = 12,
> +    VHOST_USER_SET_VRING_CALL = 13,
> +    VHOST_USER_SET_VRING_ERR = 14,
> +    VHOST_USER_NET_SET_BACKEND = 15,
> +    VHOST_USER_ECHO = 16,
> +    VHOST_USER_MAX
> +} VhostUserRequest;
> +
> +typedef struct VhostUserMemoryRegion {
> +    uint64_t guest_phys_addr;
> +    uint64_t memory_size;
> +    uint64_t userspace_addr;
> +} VhostUserMemoryRegion;
> +
> +typedef struct VhostUserMemory {
> +    uint32_t nregions;
> +    uint32_t padding;
> +    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> +} VhostUserMemory;
> +
> +typedef struct VhostUserMsg {
> +    VhostUserRequest request;
> +
> +#define VHOST_USER_VERSION_MASK     (0x3)
> +#define VHOST_USER_REPLY_MASK       (0x1<<2)
> +    uint32_t flags;
> +    uint32_t size; /* the following payload size */
> +    union {
> +        uint64_t u64;
> +        struct vhost_vring_state state;
> +        struct vhost_vring_addr addr;
> +        VhostUserMemory memory;
> +    };
> +} QEMU_PACKED VhostUserMsg;
> +
> +static VhostUserMsg m __attribute__ ((unused));
> +#define VHOST_USER_HDR_SIZE (sizeof(m.request) \
> +                            + sizeof(m.flags) \
> +                            + sizeof(m.size))
> +
> +#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)
> +
> +/* The version of the protocol we support */
> +#define VHOST_USER_VERSION    (0x1)
> +
> +static int vhost_user_recv(int fd, VhostUserMsg *msg)
> +{
> +    ssize_t r;
> +    uint8_t *p = (uint8_t *) msg;
> +
> +    /* read the header */
> +    do {
> +        r = read(fd, p, VHOST_USER_HDR_SIZE);
> +    } while (r < 0 && errno == EINTR);
> +
> +    if (r < 0) {
> +        error_report("Failed to read msg header, reason: %s\n",
> +                     strerror(errno));
> +        goto fail;
> +    }
> +
> +    if (r != VHOST_USER_HDR_SIZE) {
> +        error_report("Failed to read msg header. Read %zu instead of %zu.\n",
> +                     r, VHOST_USER_HDR_SIZE);
> +        goto fail;
> +    }
> +
> +    /* validate received flags */
> +    if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
> +        error_report("Failed to read msg header."
> +                     " Flags 0x%x instead of 0x%x.\n",
> +                     msg->flags, VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
> +        goto fail;
> +    }
> +
> +    /* validate message size is sane */
> +    if (msg->size > VHOST_USER_PAYLOAD_SIZE) {
> +        error_report("Failed to read msg header."
> +                     " Size %d exceeds the maximum %zu.\n",
> +                     msg->size, VHOST_USER_PAYLOAD_SIZE);
> +        goto fail;
> +    }
> +
> +    p += VHOST_USER_HDR_SIZE;
> +
> +    /* read the payload */
> +    do {
> +        r = read(fd, p, msg->size);
> +    } while (r < 0 && errno == EINTR);
> +
> +    if (r < 0) {
> +        error_report("Failed to read msg payload, reason: %s\n",
> +                     strerror(errno));
> +        goto fail;
> +    }
> +
> +    if (r != msg->size) {
> +        error_report("Failed to read msg payload. Read %zu instead of %d.\n",
> +                     r, msg->size);
> +        goto fail;
> +    }
> +
> +    return 0;
> +
> +fail:
> +    return -1;
> +}
> +
> +static int vhost_user_send_fds(int fd, VhostUserMsg *msg, int *fds,
> +        size_t fd_num)
> +{
> +    int r;
> +
> +    struct msghdr msgh;
> +    struct iovec iov;
> +
> +    size_t fd_size = fd_num * sizeof(int);
> +    char control[CMSG_SPACE(fd_size)];
> +    struct cmsghdr *cmsg;
> +
> +    memset(&msgh, 0, sizeof(msgh));
> +    memset(control, 0, sizeof(control));
> +
> +    /* set the payload */
> +    iov.iov_base = msg;
> +    iov.iov_len = VHOST_USER_HDR_SIZE + msg->size;
> +
> +    msgh.msg_iov = &iov;
> +    msgh.msg_iovlen = 1;
> +
> +    if (fd_num) {
> +        msgh.msg_control = control;
> +        msgh.msg_controllen = sizeof(control);
> +
> +        cmsg = CMSG_FIRSTHDR(&msgh);
> +
> +        cmsg->cmsg_len = CMSG_LEN(fd_size);
> +        cmsg->cmsg_level = SOL_SOCKET;
> +        cmsg->cmsg_type = SCM_RIGHTS;
> +        memcpy(CMSG_DATA(cmsg), fds, fd_size);
> +    } else {
> +        msgh.msg_control = 0;
> +        msgh.msg_controllen = 0;
> +    }
> +
> +    do {
> +        r = sendmsg(fd, &msgh, 0);
> +    } while (r < 0 && errno == EINTR);
> +
> +    if (r < 0) {
> +        error_report("Failed to send msg(%d), reason: %s\n",
> +                msg->request, strerror(errno));
> +        goto fail;
> +    }
> +
> +    if (r != VHOST_USER_HDR_SIZE + msg->size) {
> +        error_report("Failed to send msg(%d). Sent %d instead of %zu.\n",
> +                msg->request, r, VHOST_USER_HDR_SIZE + msg->size);
> +        goto fail;
> +    }
> +
> +    return 0;
> +
> +fail:
> +    return -1;
> +}
> +
> +static int vhost_user_echo(struct vhost_dev *dev)
> +{
> +    VhostUserMsg msg;
> +    int fd = dev->control;
> +
> +    if (fd < 0) {
> +        return 0;
> +    }
> +
> +    /* check connection */
> +    msg.request = VHOST_USER_ECHO;
> +    msg.flags = VHOST_USER_VERSION;
> +    msg.size = 0;
> +
> +    if (vhost_user_send_fds(fd, &msg, 0, 0) < 0) {
> +        error_report("ECHO failed\n");
> +        return -1;
> +    }
> +
> +    if (vhost_user_recv(fd, &msg) < 0) {
> +        error_report("ECHO failed\n");
> +        return -1;
> +    }
> +
> +    return 0;
> +}
>  
>  static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
>          void *arg)
>  {
> +    int fd = dev->control;
> +    VhostUserMsg msg;
> +    int result = 0;
> +    int fds[VHOST_MEMORY_MAX_NREGIONS];
> +    size_t fd_num = 0;
> +
>      assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
> -    error_report("vhost_user_call not implemented\n");
>  
> -    return -1;
> +    if (fd < 0) {
> +        return 0;
> +    }
> +
> +    msg.request = VHOST_USER_NONE;
> +    msg.flags = VHOST_USER_VERSION;
> +    msg.size = 0;
> +
> +    switch (request) {
> +    default:
> +        error_report("vhost-user trying to send unhandled ioctl\n");
> +        return -1;
> +        break;
> +    }
> +
> +    result = vhost_user_send_fds(fd, &msg, fds, fd_num);
> +
> +    return result;
>  }
>  
>  static int vhost_user_init(struct vhost_dev *dev, const char *devpath)
>  {
> +    int fd = -1;
> +    struct sockaddr_un un;
> +    struct timeval tv;
> +    size_t len;
> +
>      assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
> -    error_report("vhost_user_init not implemented\n");
>  
> +    /* Create the socket */
> +    fd = socket(AF_UNIX, SOCK_STREAM, 0);
> +    if (fd == -1) {
> +        error_report("socket: %s", strerror(errno));
> +        return -1;
> +    }
> +
> +    /* Set socket options */
> +    tv.tv_sec = VHOST_USER_SOCKTO / 1000;
> +    tv.tv_usec = (VHOST_USER_SOCKTO % 1000) * 1000;
> +
> +    if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(struct timeval))
> +            == -1) {
> +        error_report("setsockopt SO_SNDTIMEO: %s", strerror(errno));
> +        goto fail;
> +    }
> +
> +    if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(struct timeval))
> +            == -1) {
> +        error_report("setsockopt SO_RCVTIMEO: %s", strerror(errno));
> +        goto fail;
> +    }
> +
> +    /* Connect */
> +    un.sun_family = AF_UNIX;
> +    strcpy(un.sun_path, devpath);
> +
> +    len = sizeof(un.sun_family) + strlen(devpath);
> +
> +    if (connect(fd, (struct sockaddr *) &un, len) == -1) {
> +        error_report("connect: %s", strerror(errno));
> +        goto fail;
> +    }
> +
> +    /* Cleanup if there is previous connection left */
> +    if (dev->control >= 0) {
> +        dev->vhost_ops->vhost_backend_cleanup(dev);
> +    }
> +    dev->control = fd;
> +
> +    if (vhost_user_echo(dev) < 0) {
> +        dev->control = -1;
> +        goto fail;
> +    }
> +
> +    return fd;
> +
> +fail:
> +    close(fd);
>      return -1;
> +
>  }
>  
>  static int vhost_user_cleanup(struct vhost_dev *dev)
>  {
> +    int r = 0;
> +
>      assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
> -    error_report("vhost_user_cleanup not implemented\n");
>  
> -    return -1;
> +    if (dev->control >= 0) {
> +        r = close(dev->control);
> +    }
> +    dev->control = -1;
> +
> +    return r;
>  }
>  
>  static const VhostOps user_ops = {
> -- 
> 1.8.3.2
> 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]