qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v2] exec: factor out duplicate mmap code


From: Michael S. Tsirkin
Subject: Re: [Qemu-devel] [PATCH v2] exec: factor out duplicate mmap code
Date: Thu, 8 Oct 2015 23:38:50 +0300

Any objections to merging as is, and renaming later
as appropriate?

On Sun, Oct 04, 2015 at 11:38:59AM +0300, Michael S. Tsirkin wrote:
> On Fri, Oct 02, 2015 at 10:48:13AM +1000, Richard Henderson wrote:
> > On 10/01/2015 10:58 PM, Michael S. Tsirkin wrote:
> > >Anonymous and file-backed RAM allocation are now almost exactly the same.
> > >
> > >Reduce code duplication by moving RAM mmap code out of oslib-posix.c and
> > >exec.c.
> > >
> > >Reported-by: Marc-André Lureau <address@hidden>
> > >Signed-off-by: Michael S. Tsirkin <address@hidden>
> > >Reviewed-by: Paolo Bonzini <address@hidden>
> > >Acked-by: Paolo Bonzini <address@hidden>
> > >---
> > >
> > >Changes from v1: add shared flag to get MAP_SHARED mappings
> > >(for vhost-user), only set MAP_ANONYMOUS for anonymous RAM.
> > >
> > >  include/qemu/mmap-alloc.h | 10 +++++++
> > >  exec.c                    | 47 +++++++------------------------
> > >  util/mmap-alloc.c         | 71 
> > > +++++++++++++++++++++++++++++++++++++++++++++++
> > >  util/oslib-posix.c        | 28 +++----------------
> > >  util/Makefile.objs        |  2 +-
> > >  5 files changed, 96 insertions(+), 62 deletions(-)
> > >  create mode 100644 include/qemu/mmap-alloc.h
> > >  create mode 100644 util/mmap-alloc.c
> > >
> > >diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
> > >new file mode 100644
> > >index 0000000..56388e6
> > >--- /dev/null
> > >+++ b/include/qemu/mmap-alloc.h
> > >@@ -0,0 +1,10 @@
> > >+#ifndef QEMU_MMAP_ALLOC
> > >+#define QEMU_MMAP_ALLOC
> > >+
> > >+#include "qemu-common.h"
> > >+
> > >+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared);
> > >+
> > >+void qemu_ram_munmap(void *ptr, size_t size);
> > >+
> > >+#endif
> > >diff --git a/exec.c b/exec.c
> > >index 7d90a52..4505dc7 100644
> > >--- a/exec.c
> > >+++ b/exec.c
> > >@@ -55,6 +55,9 @@
> > >  #include "exec/ram_addr.h"
> > >
> > >  #include "qemu/range.h"
> > >+#ifndef _WIN32
> > >+#include "qemu/mmap-alloc.h"
> > >+#endif
> > >
> > >  //#define DEBUG_SUBPAGE
> > >
> > >@@ -84,9 +87,9 @@ static MemoryRegion io_mem_unassigned;
> > >   */
> > >  #define RAM_RESIZEABLE (1 << 2)
> > >
> > >-/* An extra page is mapped on top of this RAM.
> > >+/* RAM is backed by an mmapped file.
> > >   */
> > >-#define RAM_EXTRA (1 << 3)
> > >+#define RAM_FILE (1 << 3)
> > >  #endif
> > >
> > >  struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
> > >@@ -1188,13 +1191,10 @@ static void *file_ram_alloc(RAMBlock *block,
> > >      char *filename;
> > >      char *sanitized_name;
> > >      char *c;
> > >-    void *ptr;
> > >-    void *area = NULL;
> > >+    void *area;
> > >      int fd;
> > >      uint64_t hpagesize;
> > >-    uint64_t total;
> > >      Error *local_err = NULL;
> > >-    size_t offset;
> > >
> > >      hpagesize = gethugepagesize(path, &local_err);
> > >      if (local_err) {
> > >@@ -1238,7 +1238,6 @@ static void *file_ram_alloc(RAMBlock *block,
> > >      g_free(filename);
> > >
> > >      memory = ROUND_UP(memory, hpagesize);
> > >-    total = memory + hpagesize;
> > >
> > >      /*
> > >       * ftruncate is not supported by hugetlbfs in older
> > >@@ -1250,40 +1249,14 @@ static void *file_ram_alloc(RAMBlock *block,
> > >          perror("ftruncate");
> > >      }
> > >
> > >-    ptr = mmap(0, total, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS,
> > >-                -1, 0);
> > >-    if (ptr == MAP_FAILED) {
> > >-        error_setg_errno(errp, errno,
> > >-                         "unable to allocate memory range for hugepages");
> > >-        close(fd);
> > >-        goto error;
> > >-    }
> > >-
> > >-    offset = QEMU_ALIGN_UP((uintptr_t)ptr, hpagesize) - (uintptr_t)ptr;
> > >-
> > >-    area = mmap(ptr + offset, memory, PROT_READ | PROT_WRITE,
> > >-                (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE) |
> > >-                MAP_FIXED,
> > >-                fd, 0);
> > >+    area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & 
> > >RAM_SHARED);
> > >      if (area == MAP_FAILED) {
> > >          error_setg_errno(errp, errno,
> > >                           "unable to map backing store for hugepages");
> > >-        munmap(ptr, total);
> > >          close(fd);
> > >          goto error;
> > >      }
> > >
> > >-    if (offset > 0) {
> > >-        munmap(ptr, offset);
> > >-    }
> > >-    ptr += offset;
> > >-    total -= offset;
> > >-
> > >-    if (total > memory + getpagesize()) {
> > >-        munmap(ptr + memory + getpagesize(),
> > >-               total - memory - getpagesize());
> > >-    }
> > >-
> > >      if (mem_prealloc) {
> > >          os_mem_prealloc(fd, area, memory);
> > >      }
> > >@@ -1601,7 +1574,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, 
> > >MemoryRegion *mr,
> > >      new_block->used_length = size;
> > >      new_block->max_length = size;
> > >      new_block->flags = share ? RAM_SHARED : 0;
> > >-    new_block->flags |= RAM_EXTRA;
> > >+    new_block->flags |= RAM_FILE;
> > >      new_block->host = file_ram_alloc(new_block, size,
> > >                                       mem_path, errp);
> > >      if (!new_block->host) {
> > >@@ -1703,8 +1676,8 @@ static void reclaim_ramblock(RAMBlock *block)
> > >          xen_invalidate_map_cache_entry(block->host);
> > >  #ifndef _WIN32
> > >      } else if (block->fd >= 0) {
> > >-        if (block->flags & RAM_EXTRA) {
> > >-            munmap(block->host, block->max_length + getpagesize());
> > >+        if (block->flags & RAM_FILE) {
> > >+            qemu_ram_munmap(block->host, block->max_length);
> > >          } else {
> > >              munmap(block->host, block->max_length);
> > >          }
> > >diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
> > >new file mode 100644
> > >index 0000000..e82cc94
> > >--- /dev/null
> > >+++ b/util/mmap-alloc.c
> > >@@ -0,0 +1,71 @@
> > >+/*
> > >+ * Support for RAM backed by mmaped host memory.
> > >+ *
> > >+ * Copyright (c) 2015 Red Hat, Inc.
> > >+ *
> > >+ * Authors:
> > >+ *  Michael S. Tsirkin <address@hidden>
> > >+ *
> > >+ * This work is licensed under the terms of the GNU GPL, version 2 or
> > >+ * later.  See the COPYING file in the top-level directory.
> > >+ */
> > >+#include <qemu/mmap-alloc.h>
> > >+#include <sys/types.h>
> > >+#include <sys/mman.h>
> > >+#include <assert.h>
> > >+
> > >+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
> > 
> > Do we need generic "align", or would a bool hugepage suffice?  Or I suppose
> > this is also used for non-anonymous hugepages via /dev/hugepage where we
> > have multiple huge page sizes?
> 
> Right. Further, passing in alignment makes it possible to
> keep page size probing in exec.c which is the only user.
> 
> 
> > Should we call this function something else, so that we can use it to
> > allocate hugepage aligned storage for code_gen_buffer and/or the tbs array?
> 
> 
> I don't much care what do we call it - with two callers it would be easy
> to rename down the road.
> The reason I put in "RAM" is because it allocates a guard page
> which only seems to make sense for RAM memory that can be mapped.
> 
> But if you have a better name in mind, please let me know.
> 
> 
> > >+{
> > >+    /*
> > >+     * Note: this always allocates at least one extra page of virtual 
> > >address
> > >+     * space, even if size is already aligned.
> > >+     */
> > >+    size_t total = size + align;
> > >+    void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, 
> > >-1, 0);
> > >+    size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
> > >+    void *ptr1;
> > >+
> > >+    if (ptr == MAP_FAILED) {
> > >+        return NULL;
> > >+    }
> > >+
> > >+    /* Make sure align is a power of 2 */
> > >+    assert(!(align & (align - 1)));
> > >+    /* Always align to host page size */
> > >+    assert(align >= getpagesize());
> > 
> > Is qemu_real_host_page_size initialized yet?
> > If not, should we move the initialization forward?
> 
> I think it is.
> Though qemu_real_host_page_size seems to only be needed
> for portability, and this file is posix-specific.
> 
> Anyway, we are moving code from util/oslib-posix.c and exec.c that
> currently both call getpagesize in this function, it seems better to be
> consistent.
> 
> We can refactor afterwards, correct?
> 
> 
> > >+
> > >+    ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
> > >+                MAP_FIXED |
> > >+                (fd == -1 ? MAP_ANONYMOUS : 0) |
> > >+                (shared ? MAP_SHARED : MAP_PRIVATE),
> > >+                fd, 0);
> > 
> > If fd == -1, we really only need mprotect on the region, correct?
> 
> Yes. But that's already like this in util/oslib-posix.c,
> this is just re-factoring without logic changes.
> 
> Any reason to prefer mprotect? That would be more LOC.
> 
> > >+    if (ptr1 == MAP_FAILED) {
> > >+        munmap(ptr, total);
> > >+        return NULL;
> > >+    }
> > >+
> > >+    ptr += offset;
> > >+    total -= offset;
> > >+
> > >+    if (offset > 0) {
> > >+        munmap(ptr - offset, offset);
> > >+    }
> > >+
> > >+    /*
> > >+     * Leave a single PROT_NONE page allocated after the RAM block, to 
> > >serve as
> > >+     * a guard page guarding against potential buffer overflows.
> > >+     */
> > >+    if (total > size + getpagesize()) {
> > >+        munmap(ptr + size + getpagesize(), total - size - getpagesize());
> > >+    }
> > >+
> > >+    return ptr;
> > >+}



reply via email to

[Prev in Thread] Current Thread [Next in Thread]