qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 1/1] exec: make -mem-path filenames deterministic


From: Peter Feiner
Subject: [Qemu-devel] [PATCH 1/1] exec: make -mem-path filenames deterministic
Date: Tue, 20 Nov 2012 07:10:24 -0500

This patch makes the -mem-path filenames deterministic and allows some control
over how QEMU mmaps the files. Given this control, QEMU can be used to implement
exogenous memory management techniques quite simply. Two examples follow:

    1. Post-copy migration (mmap=shared for origin, save device state, path in
       NFS available on both hosts, mmap=shared for destination).
    2. memory sharing via VM cloning (mmap=shared for master, save device state,
       then mmap=private for clones).

These new features are exposed via arguments to -mem-path:

-mem-path \
    path[,mode=temp|open|create][,mmap=private|shared][,nofallback][,anyfs]

The backing file names are made deterministic by including their RAMBlocks'
offsets, which are unique given a qemu command line. Note that Xen's live
migration relies on RAMBlocks having the same offsets between QEMU processes
(see xen_read_physmap). The new file name format is

    qemu_back_mem.OFFSET+SIZE.NAME[.RANDOM]

where SIZE and NAME are added for extra sanity checking when mode="open".

By default, the old -mem-path behavior is preserved. I.e., mode="temp" is used,
which adds a random suffix to the deterministic name and unlinks the files.

Signed-off-by: Peter Feiner <address@hidden>
---
 cpu-all.h       |    5 ++++
 exec.c          |   60 ++++++++++++++++++++++++++++++++++--------------------
 qemu-config.c   |   26 +++++++++++++++++++++++
 qemu-options.hx |   24 +++++++++++++++++++--
 vl.c            |   43 +++++++++++++++++++++++++++++++++++++-
 5 files changed, 131 insertions(+), 27 deletions(-)

diff --git a/cpu-all.h b/cpu-all.h
index c9c51b8..a83f71e 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -500,6 +500,11 @@ typedef struct RAMList {
 extern RAMList ram_list;
 
 extern const char *mem_path;
+extern int mem_temp;
+extern int mem_create;
+extern int mem_fallback;
+extern int mem_shared;
+extern int mem_hugetlbfs;
 extern int mem_prealloc;
 
 /* Flags stored in the low bits of the TLB virtual address.  These are
diff --git a/exec.c b/exec.c
index 8435de0..75b146c 100644
--- a/exec.c
+++ b/exec.c
@@ -2346,7 +2346,7 @@ void qemu_flush_coalesced_mmio_buffer(void)
 
 #define HUGETLBFS_MAGIC       0x958458f6
 
-static long gethugepagesize(const char *path)
+static long getfspagesize(const char *path, int want_hugetlbfs)
 {
     struct statfs fs;
     int ret;
@@ -2360,7 +2360,7 @@ static long gethugepagesize(const char *path)
         return 0;
     }
 
-    if (fs.f_type != HUGETLBFS_MAGIC)
+    if (want_hugetlbfs && fs.f_type != HUGETLBFS_MAGIC)
         fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
 
     return fs.f_bsize;
@@ -2373,17 +2373,15 @@ static void *file_ram_alloc(RAMBlock *block,
     char *filename;
     void *area;
     int fd;
-#ifdef MAP_POPULATE
     int flags;
-#endif
-    unsigned long hpagesize;
+    unsigned long pagesize;
 
-    hpagesize = gethugepagesize(path);
-    if (!hpagesize) {
+    pagesize = getfspagesize(path, mem_hugetlbfs);
+    if (!pagesize) {
         return NULL;
     }
 
-    if (memory < hpagesize) {
+    if (memory < pagesize) {
         return NULL;
     }
 
@@ -2392,20 +2390,35 @@ static void *file_ram_alloc(RAMBlock *block,
         return NULL;
     }
 
-    if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
+    if (asprintf(&filename, "%s/qemu_back_mem.%"PRIx64"+%"PRIx64".%s",
+                 path, block->offset, memory, block->mr->name) == -1) {
         return NULL;
     }
 
-    fd = mkstemp(filename);
+    if (mem_temp) {
+        if (asprintf(&filename, "%s.XXXXXX", filename) == -1) {
+            return NULL;
+        }
+        fd = mkstemp(filename);
+    } else {
+        flags = O_RDWR | O_CLOEXEC | (mem_create ? O_CREAT : 0);
+        fd = open(filename, flags, 0600);
+    }
+
     if (fd < 0) {
-        perror("unable to create backing store for hugepages");
+        fprintf(stderr, "unable to create backing store %s "
+                "for guest memory: %s\n", filename, strerror(errno));
         free(filename);
         return NULL;
     }
-    unlink(filename);
+
+    if (mem_temp) {
+        unlink(filename);
+    }
+
     free(filename);
 
-    memory = (memory+hpagesize-1) & ~(hpagesize-1);
+    memory = (memory+pagesize-1) & ~(pagesize-1);
 
     /*
      * ftruncate is not supported by hugetlbfs in older
@@ -2416,16 +2429,16 @@ static void *file_ram_alloc(RAMBlock *block,
     if (ftruncate(fd, memory))
         perror("ftruncate");
 
+    flags = mem_shared ? MAP_SHARED : MAP_PRIVATE;
 #ifdef MAP_POPULATE
     /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
-     * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
+     * MAP_PRIVATE is requested.  For mem_prealloc we always mmap as MAP_SHARED
      * to sidestep this quirk.
      */
-    flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
-    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
-#else
-    area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+    if (mem_prealloc)
+        flags = MAP_POPULATE | MAP_SHARED;
 #endif
+    area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
     if (area == MAP_FAILED) {
         perror("file_ram_alloc: can't mmap RAM pages");
         close(fd);
@@ -2561,6 +2574,10 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void 
*host,
 #if defined (__linux__) && !defined(TARGET_S390X)
             new_block->host = file_ram_alloc(new_block, size, mem_path);
             if (!new_block->host) {
+                if (!mem_fallback) {
+                    fprintf(stderr, "failed to allocate ram file for %s\n", 
mr->name);
+                    exit(1);
+                }
                 new_block->host = qemu_vmalloc(size);
                 memory_try_enable_merging(new_block->host, size);
             }
@@ -2675,11 +2692,10 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
                 if (mem_path) {
 #if defined(__linux__) && !defined(TARGET_S390X)
                     if (block->fd) {
+                        flags = mem_shared ? MAP_SHARED : MAP_PRIVATE;
 #ifdef MAP_POPULATE
-                        flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
-                            MAP_PRIVATE;
-#else
-                        flags |= MAP_PRIVATE;
+                        if (mem_prealloc)
+                            flags = MAP_POPULATE | MAP_SHARED;
 #endif
                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
                                     flags, block->fd, offset);
diff --git a/qemu-config.c b/qemu-config.c
index 10d1ba4..45b24d2 100644
--- a/qemu-config.c
+++ b/qemu-config.c
@@ -691,6 +691,31 @@ static QemuOptsList qemu_object_opts = {
     },
 };
 
+static QemuOptsList qemu_mempath_opts = {
+    .name = "mempath",
+    .implied_opt_name = "path",
+    .head = QTAILQ_HEAD_INITIALIZER(qemu_mempath_opts.head),
+    .desc = {
+        {
+            .name = "path",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "mmap",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "mode",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "anyfs",
+            .type = QEMU_OPT_BOOL,
+        },{
+            .name = "fallback",
+            .type = QEMU_OPT_BOOL,
+        },
+        { /* end of list */ }
+    },
+};
+
 static QemuOptsList *vm_config_groups[32] = {
     &qemu_drive_opts,
     &qemu_chardev_opts,
@@ -709,6 +734,7 @@ static QemuOptsList *vm_config_groups[32] = {
     &qemu_sandbox_opts,
     &qemu_add_fd_opts,
     &qemu_object_opts,
+    &qemu_mempath_opts,
     NULL,
 };
 
diff --git a/qemu-options.hx b/qemu-options.hx
index 9bb29d3..8972397 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -443,10 +443,28 @@ gigabytes respectively.
 ETEXI
 
 DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
-    "-mem-path FILE  provide backing storage for guest RAM\n", QEMU_ARCH_ALL)
+    "-mem-path 
path[,mmap=private|shared][,mode=temp|open|create][,nofallback][,anyfs]\n"
+    "          provide backing storage for guest RAM\n",
+    QEMU_ARCH_ALL)
 STEXI
address@hidden -mem-path @var{path}
-Allocate guest RAM from a temporarily created file in @var{path}.
address@hidden -mem-path 
@var{path}[,mmap=private|shared][,mode=temp|open|create][,nofallback][,anyfs]
+Allocate guest RAM from files in @var{path}.
address@hidden mmap=private|shared
+Specifies how backing files are mmap'd. "private" indicates a COW mapping, thus
+leaving the underlying file unchanged. "shared" indicates a write-through
+mapping, thus reflecting the guest's memory in the underlying file. Default is
+private.
address@hidden mode=temp|open|create
+Determines how backing files are created and opened. "temp" indicates that
+unique temporary files are created in @var{path} and unlinked after opening.
+Both "create" and "open" indicate that deterministic names are used and the
+files aren't unlinked. The The difference between "create" and "open" is that
+"open" expects the files to already exist. Default is temp.
address@hidden nofallback
+Fail if backing files cannot be used for guest RAM (e.g., permission error, bad
+path). By default, RAM allocation falls back to normal method.
address@hidden anyfs
+Suppressing warnings if @var{path} isn't on a hugetlbfs filesystem.
 ETEXI
 
 #ifdef MAP_POPULATE
diff --git a/vl.c b/vl.c
index c8e9c78..3b7cea7 100644
--- a/vl.c
+++ b/vl.c
@@ -185,6 +185,11 @@ static int display_remote;
 const char* keyboard_layout = NULL;
 ram_addr_t ram_size;
 const char *mem_path = NULL;
+int mem_shared = 0;
+int mem_hugetlbfs = 1;
+int mem_temp = 1;
+int mem_create = 1;
+int mem_fallback = 1;
 #ifdef MAP_POPULATE
 int mem_prealloc = 0; /* force preallocation of physical target memory */
 #endif
@@ -2938,9 +2943,43 @@ int main(int argc, char **argv, char **envp)
                 }
                 break;
             }
-            case QEMU_OPTION_mempath:
-                mem_path = optarg;
+            case QEMU_OPTION_mempath: {
+                const char *value;
+                opts = qemu_opts_parse(qemu_find_opts("mempath"), optarg, 1);
+                if (!opts) {
+                    exit(1);
+                }
+
+                mem_path = qemu_opt_get(opts, "path");
+                mem_hugetlbfs = !qemu_opt_get_bool(opts, "anyfs", 0);
+                mem_fallback = qemu_opt_get_bool(opts, "fallback", 1);
+
+                value = qemu_opt_get(opts, "mode");
+                if (value == NULL || !strcmp(value, "temp")) {
+                    mem_temp = 1;
+                } else {
+                    mem_temp = 0;
+                    if (!strcmp(value, "create")) {
+                        mem_create = 1;
+                    } else if (!strcmp(value, "open")) {
+                        mem_create = 0;
+                    } else {
+                        fprintf(stderr, "Invalid -mem-path mode value.\n");
+                        exit(1);
+                    }
+                }
+
+                value = qemu_opt_get(opts, "mmap");
+                if (value == NULL || !strcmp(value, "private")) {
+                    mem_shared = 0;
+                } else if (!strcmp(value, "shared")) {
+                    mem_shared = 1;
+                } else {
+                    fprintf(stderr, "Invalid -mem-path mmap value.\n");
+                    exit(1);
+                }
                 break;
+            }
 #ifdef MAP_POPULATE
             case QEMU_OPTION_mem_prealloc:
                 mem_prealloc = 1;
-- 
1.7.5.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]