[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v1] vhost-user: fix not send all hugepage files
From: |
Michael S. Tsirkin |
Subject: |
Re: [Qemu-devel] [PATCH v1] vhost-user: fix not send all hugepage files to vhost-user |
Date: |
Wed, 4 Feb 2015 14:52:00 +0100 |
On Mon, Feb 02, 2015 at 12:29:47AM +0100, Paolo Bonzini wrote:
>
>
> On 17/12/2014 07:02, address@hidden wrote:
> > From: linhaifeng <address@hidden>
> >
> > If we create VM with two or more numa nodes qemu will create two
> > or more hugepage files but qemu only send one hugepage file fd
> > to vhost-user when VM's memory size is 2G and with two numa nodes.
> >
> > Signed-off-by: linhaifeng <address@hidden>
>
> The bug is in vhost_dev_assign_memory. It doesn't check that the file
> descriptor matches when merging regions. Michael, does the merging
> trigger in practice? Can we just eliminate it?
>
> Paolo
I'm not sure: does memory core ever give us two adjacent
RAM segments that we *can* merge?
If yes it would trigger, and extra memory slots slow down lookups
linearly so they aren't free.
> > ---
> > hw/virtio/vhost-user.c | 78
> > ++++++++++++++++++++++++++++++---------------
> > hw/virtio/vhost.c | 13 ++++++++
> > linux-headers/linux/vhost.h | 7 ++++
> > 3 files changed, 73 insertions(+), 25 deletions(-)
> >
> > diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> > index aefe0bb..439cbba 100644
> > --- a/hw/virtio/vhost-user.c
> > +++ b/hw/virtio/vhost-user.c
> > @@ -24,6 +24,10 @@
> > #include <linux/vhost.h>
> >
> > #define VHOST_MEMORY_MAX_NREGIONS 8
> > +/* FIXME: same as the max number of numa node?*/
> > +#define HUGEPAGE_MAX_FILES 8
> > +
> > +#define RAM_SHARED (1 << 1)
> >
> > typedef enum VhostUserRequest {
> > VHOST_USER_NONE = 0,
> > @@ -41,14 +45,15 @@ typedef enum VhostUserRequest {
> > VHOST_USER_SET_VRING_KICK = 12,
> > VHOST_USER_SET_VRING_CALL = 13,
> > VHOST_USER_SET_VRING_ERR = 14,
> > - VHOST_USER_MAX
> > + VHOST_USER_MMAP_HUGEPAGE_FILE = 15,
> > + VHOST_USER_UNMAP_HUGEPAGE_FILE = 16,
> > + VHOST_USER_MAX,
> > } VhostUserRequest;
> >
> > typedef struct VhostUserMemoryRegion {
> > uint64_t guest_phys_addr;
> > uint64_t memory_size;
> > uint64_t userspace_addr;
> > - uint64_t mmap_offset;
> > } VhostUserMemoryRegion;
> >
> > typedef struct VhostUserMemory {
> > @@ -57,6 +62,16 @@ typedef struct VhostUserMemory {
> > VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> > } VhostUserMemory;
> >
> > +typedef struct HugepageMemoryInfo {
> > + uint64_t base_addr;
> > + uint64_t size;
> > +}HugeMemInfo;
> > +
> > +typedef struct HugepageInfo {
> > + uint32_t num;
> > + HugeMemInfo files[HUGEPAGE_MAX_FILES];
> > +}HugepageInfo;
> > +
> > typedef struct VhostUserMsg {
> > VhostUserRequest request;
> >
> > @@ -71,6 +86,7 @@ typedef struct VhostUserMsg {
> > struct vhost_vring_state state;
> > struct vhost_vring_addr addr;
> > VhostUserMemory memory;
> > + HugepageInfo huge_info;
> > };
> > } QEMU_PACKED VhostUserMsg;
> >
> > @@ -104,7 +120,9 @@ static unsigned long int
> > ioctl_to_vhost_user_request[VHOST_USER_MAX] = {
> > VHOST_GET_VRING_BASE, /* VHOST_USER_GET_VRING_BASE */
> > VHOST_SET_VRING_KICK, /* VHOST_USER_SET_VRING_KICK */
> > VHOST_SET_VRING_CALL, /* VHOST_USER_SET_VRING_CALL */
> > - VHOST_SET_VRING_ERR /* VHOST_USER_SET_VRING_ERR */
> > + VHOST_SET_VRING_ERR, /* VHOST_USER_SET_VRING_ERR */
> > + VHOST_MMAP_HUGEPAGE_FILE, /* VHOST_USER_MMAP_HUGEPAGE_FILE */
> > + VHOST_UNMAP_HUGEPAGE_FILE, /* VHOST_USER_UNMAP_HUGEPAGE_FILE */
> > };
> >
> > static VhostUserRequest vhost_user_request_translate(unsigned long int
> > request)
> > @@ -190,6 +208,7 @@ static int vhost_user_call(struct vhost_dev *dev,
> > unsigned long int request,
> > int fds[VHOST_MEMORY_MAX_NREGIONS];
> > int i, fd;
> > size_t fd_num = 0;
> > + RAMBlock *block;
> >
> > assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);
> >
> > @@ -213,37 +232,46 @@ static int vhost_user_call(struct vhost_dev *dev,
> > unsigned long int request,
> > case VHOST_RESET_OWNER:
> > break;
> >
> > - case VHOST_SET_MEM_TABLE:
> > - for (i = 0; i < dev->mem->nregions; ++i) {
> > - struct vhost_memory_region *reg = dev->mem->regions + i;
> > - ram_addr_t ram_addr;
> > + case VHOST_MMAP_HUGEPAGE_FILE:
> > + qemu_mutex_lock_ramlist();
> >
> > - assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
> > - qemu_ram_addr_from_host((void
> > *)(uintptr_t)reg->userspace_addr, &ram_addr);
> > - fd = qemu_get_ram_fd(ram_addr);
> > - if (fd > 0) {
> > - msg.memory.regions[fd_num].userspace_addr =
> > reg->userspace_addr;
> > - msg.memory.regions[fd_num].memory_size = reg->memory_size;
> > - msg.memory.regions[fd_num].guest_phys_addr =
> > reg->guest_phys_addr;
> > - msg.memory.regions[fd_num].mmap_offset =
> > reg->userspace_addr -
> > - (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr);
> > - assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
> > - fds[fd_num++] = fd;
> > + /* Get hugepage file informations */
> > + QTAILQ_FOREACH(block, &ram_list.blocks, next) {
> > + if (block->flags & RAM_SHARED && block->fd > 0) {
> > + msg.huge_info.files[fd_num].size = block->length;
> > + msg.huge_info.files[fd_num].base_addr = block->host;
> > + fds[fd_num++] = block->fd;
> > }
> > }
> > + msg.huge_info.num = fd_num;
> >
> > - msg.memory.nregions = fd_num;
> > + /* Calculate msg size */
> > + msg.size = sizeof(m.huge_info.num);
> > + msg.size += fd_num * sizeof(HugeMemInfo);
> > +
> > + qemu_mutex_unlock_ramlist();
> > + break;
> >
> > - if (!fd_num) {
> > - error_report("Failed initializing vhost-user memory map\n"
> > - "consider using -object memory-backend-file
> > share=on\n");
> > - return -1;
> > + case VHOST_UNMAP_HUGEPAGE_FILE:
> > + /* Tell vhost-user to unmap all hugepage files. */
> > + break;
> > +
> > + case VHOST_SET_MEM_TABLE:
> > + for (i = 0; i < dev->mem->nregions; i++) {
> > + struct vhost_memory_region *reg = dev->mem->regions + i;
> > +
> > + assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
> > +
> > + msg.memory.regions[i].userspace_addr = reg->userspace_addr;
> > + msg.memory.regions[i].memory_size = reg->memory_size;
> > + msg.memory.regions[i].guest_phys_addr = reg->guest_phys_addr;
> > + assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
> > }
> >
> > + msg.memory.nregions = i;
> > msg.size = sizeof(m.memory.nregions);
> > msg.size += sizeof(m.memory.padding);
> > - msg.size += fd_num * sizeof(VhostUserMemoryRegion);
> > -
> > + msg.size += i * sizeof(VhostUserMemoryRegion);
> > break;
> >
> > case VHOST_SET_LOG_FD:
> > diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> > index 5a12861..b8eb341 100644
> > --- a/hw/virtio/vhost.c
> > +++ b/hw/virtio/vhost.c
> > @@ -1041,6 +1041,14 @@ int vhost_dev_start(struct vhost_dev *hdev,
> > VirtIODevice *vdev)
> > if (r < 0) {
> > goto fail_features;
> > }
> > + if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) {
> > + r = hdev->vhost_ops->vhost_call(hdev, VHOST_MMAP_HUGEPAGE_FILE,
> > + NULL);
> > + if (r < 0) {
> > + r = -errno;
> > + goto fail_mem;
> > + }
> > + }
> > r = hdev->vhost_ops->vhost_call(hdev, VHOST_SET_MEM_TABLE, hdev->mem);
> > if (r < 0) {
> > r = -errno;
> > @@ -1101,5 +1109,10 @@ void vhost_dev_stop(struct vhost_dev *hdev,
> > VirtIODevice *vdev)
> > g_free(hdev->log);
> > hdev->log = NULL;
> > hdev->log_size = 0;
> > +
> > + if (hdev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER) {
> > + (void)hdev->vhost_ops->vhost_call(hdev, VHOST_MMAP_HUGEPAGE_FILE,
> > + NULL);
> > + }
> > }
> >
> > diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h
> > index c656f61..bb72811 100644
> > --- a/linux-headers/linux/vhost.h
> > +++ b/linux-headers/linux/vhost.h
> > @@ -113,6 +113,13 @@ struct vhost_memory {
> > /* Set eventfd to signal an error */
> > #define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct
> > vhost_vring_file)
> >
> > +/* Tell vhost-user to mmap hugepage file */
> > +#define VHOST_MMAP_HUGEPAGE_FILE _IOW(VHOST_VIRTIO, 0x23, int)
> > +/* Tell vhost-user to unmap hugepage file */
> > +#define VHOST_UNMAP_HUGEPAGE_FILE _IOW(VHOST_VIRTIO, 0x24, int)
> > +
> > +#define VHOST_THREAD_ID _IOR(VHOST_VIRTIO, 0x25, struct vhost_vring_thread)
> > +
> > /* VHOST_NET specific defines */
> >
> > /* Attach virtio net ring to a raw socket, or tap device.
> >