[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH V8 20/39] cpr: restart mode
From: |
Steve Sistare |
Subject: |
[PATCH V8 20/39] cpr: restart mode |
Date: |
Wed, 15 Jun 2022 07:52:07 -0700 |
Provide the cpr-save restart mode, which preserves the guest VM across a
restart of the qemu process. After cpr-save, the caller passes qemu
command-line arguments to cpr-exec, which directly exec's the new qemu
binary. The arguments must include -S so new qemu starts in a paused state.
The caller resumes the guest by calling cpr-load.
To use the restart mode, guest RAM must be backed by a memory-backend-file
with share=on. The '-cpr-enable restart' option causes secondary guest
ram blocks (those not specified on the command line) to be allocated by
mmap'ing a memfd. The memfd values are saved in special cpr state which
is retrieved after exec, and are kept open across exec, after which they
are retrieved and re-mmap'd. Hence guest RAM is preserved in place, albeit
with new virtual addresses in the qemu process.
The restart mode supports vfio devices and memory-backend-memfd in
subsequent patches.
cpr-exec syntax:
{ 'command': 'cpr-exec', 'data': { 'argv': [ 'str' ] } }
Add the restart mode:
{ 'enum': 'CprMode', 'data': [ 'reboot', 'restart' ] }
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
migration/cpr.c | 35 +++++++++++++++++++++++++++++++++++
qapi/cpr.json | 26 +++++++++++++++++++++++++-
qemu-options.hx | 2 +-
softmmu/physmem.c | 46 +++++++++++++++++++++++++++++++++++++++++++++-
trace-events | 1 +
5 files changed, 107 insertions(+), 3 deletions(-)
diff --git a/migration/cpr.c b/migration/cpr.c
index 1cc8738..8b3fffd 100644
--- a/migration/cpr.c
+++ b/migration/cpr.c
@@ -22,6 +22,7 @@ static int cpr_enabled_modes;
void cpr_init(int modes)
{
cpr_enabled_modes = modes;
+ cpr_state_load(&error_fatal);
}
bool cpr_enabled(CprMode mode)
@@ -153,6 +154,37 @@ err:
cpr_set_mode(CPR_MODE_NONE);
}
+static int preserve_fd(const char *name, int id, int fd, void *opaque)
+{
+ qemu_clear_cloexec(fd);
+ return 0;
+}
+
+static int unpreserve_fd(const char *name, int id, int fd, void *opaque)
+{
+ qemu_set_cloexec(fd);
+ return 0;
+}
+
+void qmp_cpr_exec(strList *args, Error **errp)
+{
+ if (!runstate_check(RUN_STATE_SAVE_VM)) {
+ error_setg(errp, "runstate is not save-vm");
+ return;
+ }
+ if (cpr_get_mode() != CPR_MODE_RESTART) {
+ error_setg(errp, "cpr-exec requires cpr-save with restart mode");
+ return;
+ }
+
+ cpr_walk_fd(preserve_fd, 0);
+ if (cpr_state_save(errp)) {
+ return;
+ }
+
+ assert(qemu_system_exec_request(args, errp) == 0);
+}
+
void qmp_cpr_load(const char *filename, CprMode mode, Error **errp)
{
QEMUFile *f;
@@ -189,6 +221,9 @@ void qmp_cpr_load(const char *filename, CprMode mode, Error
**errp)
goto out;
}
+ /* Clear cloexec to prevent fd leaks until the next cpr-save */
+ cpr_walk_fd(unpreserve_fd, 0);
+
state = global_state_get_runstate();
if (state == RUN_STATE_RUNNING) {
vm_start();
diff --git a/qapi/cpr.json b/qapi/cpr.json
index 11c6f88..47ee4ff 100644
--- a/qapi/cpr.json
+++ b/qapi/cpr.json
@@ -15,11 +15,12 @@
# @CprMode:
#
# @reboot: checkpoint can be cpr-load'ed after a host reboot.
+# @restart: checkpoint can be cpr-load'ed after restarting qemu.
#
# Since: 7.1
##
{ 'enum': 'CprMode',
- 'data': [ 'none', 'reboot' ] }
+ 'data': [ 'none', 'reboot', 'restart' ] }
##
# @cpr-save:
@@ -38,6 +39,11 @@
# issue the quit command, reboot the system, start qemu using the same
# arguments plus -S, and issue the cpr-load command.
#
+# If @mode is 'restart', the checkpoint remains valid after restarting
+# qemu using a subsequent cpr-exec. Guest RAM must be backed by a
+# memory-backend-file with share=on.
+# To resume from the checkpoint, issue the cpr-load command.
+#
# @filename: name of checkpoint file
# @mode: @CprMode mode
#
@@ -48,6 +54,24 @@
'mode': 'CprMode' } }
##
+# @cpr-exec:
+#
+# Restart qemu by directly exec'ing @argv[0], replacing the qemu process.
+# The PID remains the same. Must be called after cpr-save restart.
+#
+# @argv[0] should be the path of a new qemu binary, or a prefix command that
+# in turn exec's the new qemu binary. The arguments must match those used
+# to initially start qemu, plus the -S option so new qemu starts in a paused
+# state.
+#
+# @argv: arguments to be passed to exec().
+#
+# Since: 7.1
+##
+{ 'command': 'cpr-exec',
+ 'data': { 'argv': [ 'str' ] } }
+
+##
# @cpr-load:
#
# Load a virtual machine from the checkpoint file @filename that was created
diff --git a/qemu-options.hx b/qemu-options.hx
index 6e51c33..1b49360 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -4484,7 +4484,7 @@ SRST
ERST
DEF("cpr-enable", HAS_ARG, QEMU_OPTION_cpr_enable, \
- "-cpr-enable reboot enable the cpr mode\n",
+ "-cpr-enable reboot|restart enable the cpr mode\n",
QEMU_ARCH_ALL)
SRST
``-cpr-enable reboot``
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 822c424..412cc80 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -44,6 +44,7 @@
#include "qemu/qemu-print.h"
#include "qemu/log.h"
#include "qemu/memalign.h"
+#include "qemu/memfd.h"
#include "exec/memory.h"
#include "exec/ioport.h"
#include "sysemu/dma.h"
@@ -1962,6 +1963,40 @@ static void dirty_memory_extend(ram_addr_t old_ram_size,
}
}
+static bool memory_region_is_backend(MemoryRegion *mr)
+{
+ return !!object_dynamic_cast(mr->parent_obj.parent, TYPE_MEMORY_BACKEND);
+}
+
+static void *qemu_anon_memfd_alloc(RAMBlock *rb, size_t maxlen, Error **errp)
+{
+ size_t len, align;
+ void *addr;
+ struct MemoryRegion *mr = rb->mr;
+ const char *name = memory_region_name(mr);
+ int mfd = cpr_find_memfd(name, &len, &maxlen, &align);
+
+ if (mfd >= 0) {
+ rb->used_length = len;
+ rb->max_length = maxlen;
+ mr->align = align;
+ } else {
+ len = rb->used_length;
+ maxlen = rb->max_length;
+ mr->align = QEMU_VMALLOC_ALIGN;
+ mfd = qemu_memfd_create(name, maxlen + mr->align, 0, 0, 0, errp);
+ if (mfd < 0) {
+ return NULL;
+ }
+ cpr_save_memfd(name, mfd, len, maxlen, mr->align);
+ }
+ rb->flags |= RAM_SHARED;
+ qemu_set_cloexec(mfd);
+ addr = file_ram_alloc(rb, maxlen, mfd, false, false, 0, errp);
+ trace_anon_memfd_alloc(name, maxlen, addr, mfd);
+ return addr;
+}
+
static void ram_block_add(RAMBlock *new_block, Error **errp)
{
const bool noreserve = qemu_ram_is_noreserve(new_block);
@@ -1986,6 +2021,14 @@ static void ram_block_add(RAMBlock *new_block, Error
**errp)
qemu_mutex_unlock_ramlist();
return;
}
+ } else if (cpr_enabled(CPR_MODE_RESTART) &&
+ !memory_region_is_backend(new_block->mr)) {
+ new_block->host = qemu_anon_memfd_alloc(new_block,
+ new_block->max_length,
+ errp);
+ if (!new_block->host) {
+ return;
+ }
} else {
new_block->host = qemu_anon_ram_alloc(new_block->max_length,
&new_block->mr->align,
@@ -1997,8 +2040,8 @@ static void ram_block_add(RAMBlock *new_block, Error
**errp)
qemu_mutex_unlock_ramlist();
return;
}
- memory_try_enable_merging(new_block->host, new_block->max_length);
}
+ memory_try_enable_merging(new_block->host, new_block->max_length);
}
new_ram_size = MAX(old_ram_size,
@@ -2231,6 +2274,7 @@ void qemu_ram_free(RAMBlock *block)
}
qemu_mutex_lock_ramlist();
+ cpr_delete_memfd(memory_region_name(block->mr));
QLIST_REMOVE_RCU(block, next);
ram_list.mru_block = NULL;
/* Write list before version */
diff --git a/trace-events b/trace-events
index bc71006..07369bb 100644
--- a/trace-events
+++ b/trace-events
@@ -45,6 +45,7 @@ ram_block_discard_range(const char *rbname, void *hva, size_t
length, bool need_
# accel/tcg/cputlb.c
memory_notdirty_write_access(uint64_t vaddr, uint64_t ram_addr, unsigned size)
"0x%" PRIx64 " ram_addr 0x%" PRIx64 " size %u"
memory_notdirty_set_dirty(uint64_t vaddr) "0x%" PRIx64
+anon_memfd_alloc(const char *name, size_t size, void *ptr, int fd) "%s size
%zu ptr %p fd %d"
# gdbstub.c
gdbstub_op_start(const char *device) "Starting gdbstub using device %s"
--
1.8.3.1
- Re: [PATCH V8 02/39] migration: qemu file wrappers, (continued)
- [PATCH V8 08/39] cpr: blockers, Steve Sistare, 2022/06/15
- [PATCH V8 10/39] cpr: cpr-enable option, Steve Sistare, 2022/06/15
- [PATCH V8 12/39] memory: flat section iterator, Steve Sistare, 2022/06/15
- [PATCH V8 09/39] cpr: register blockers, Steve Sistare, 2022/06/15
- [PATCH V8 11/39] cpr: save ram blocks, Steve Sistare, 2022/06/15
- [PATCH V8 13/39] oslib: qemu_clear_cloexec, Steve Sistare, 2022/06/15
- [PATCH V8 25/39] cpr: notifiers, Steve Sistare, 2022/06/15
- [PATCH V8 20/39] cpr: restart mode,
Steve Sistare <=
- [PATCH V8 29/39] vfio-pci: cpr part 3 (intx), Steve Sistare, 2022/06/15
- [PATCH V8 17/39] qapi: strList unit tests, Steve Sistare, 2022/06/15
- [PATCH V8 14/39] qapi: strList_from_string, Steve Sistare, 2022/06/15
- [PATCH V8 18/39] vl: helper to request re-exec, Steve Sistare, 2022/06/15
- [PATCH V8 22/39] cpr: ram block blockers, Steve Sistare, 2022/06/15
- [PATCH V8 23/39] hostmem-memfd: cpr for memory-backend-memfd, Steve Sistare, 2022/06/15
- [PATCH V8 19/39] cpr: preserve extra state, Steve Sistare, 2022/06/15