qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v3 1/6] migration: Allow immutable device state to be migrated ea


From: David Hildenbrand
Subject: [PATCH v3 1/6] migration: Allow immutable device state to be migrated early (i.e., before RAM)
Date: Thu, 22 Dec 2022 12:02:10 +0100

For virtio-mem, we want to have the plugged/unplugged state of memory
blocks available before migrating any actual RAM content. This
information is immutable on the migration source while migration is active,

For example, we want to use this information for proper preallocation
support with migration: currently, we don't preallocate memory on the
migration target, and especially with hugetlb, we can easily run out of
hugetlb pages during RAM migration and will crash (SIGBUS) instead of
catching this gracefully via preallocation.

Migrating device state before we start iterating is currently impossible.
Introduce and use qemu_savevm_state_start_precopy(), and use
a new special migration priority -- MIG_PRI_POST_SETUP -- to decide whether
state will be saved in qemu_savevm_state_start_precopy() or in
qemu_savevm_state_complete_precopy_*().

We have to take care of properly including the early device state in the
vmdesc. Relying on migrate_get_current() to temporarily store the vmdesc is
a bit sub-optimal, but we use that explicitly or implicitly all over the
place already, so this barely matters in practice.

Note that only very selected devices (i.e., ones seriously messing with
RAM setup) are supposed to make use of that.

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 include/migration/vmstate.h |   7 +++
 migration/migration.c       |  13 +++++
 migration/migration.h       |   4 ++
 migration/savevm.c          | 104 +++++++++++++++++++++++++++---------
 migration/savevm.h          |   1 +
 5 files changed, 104 insertions(+), 25 deletions(-)

diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index ad24aa1934..79eb2409a2 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -156,6 +156,13 @@ typedef enum {
     MIG_PRI_VIRTIO_MEM,         /* Must happen before IOMMU */
     MIG_PRI_GICV3_ITS,          /* Must happen before PCI devices */
     MIG_PRI_GICV3,              /* Must happen before the ITS */
+    /*
+     * Must happen before all other devices (iterable and non-iterable),
+     * especiall, before migrating RAM content. Such device state must be
+     * guaranteed to be immutable on the migration source until migration
+     * ends and must not depend on the CPU state to be synchronized.
+     */
+    MIG_PRI_POST_SETUP,
     MIG_PRI_MAX,
 } MigrationPriority;
 
diff --git a/migration/migration.c b/migration/migration.c
index 52b5d39244..78b6bb8765 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2170,6 +2170,9 @@ void migrate_init(MigrationState *s)
     s->vm_was_running = false;
     s->iteration_initial_bytes = 0;
     s->threshold_size = 0;
+
+    json_writer_free(s->vmdesc);
+    s->vmdesc = NULL;
 }
 
 int migrate_add_blocker_internal(Error *reason, Error **errp)
@@ -3997,6 +4000,9 @@ static void *migration_thread(void *opaque)
 
     trace_migration_thread_setup_complete();
 
+    /* Process early data that has to get migrated before iterating. */
+    qemu_savevm_state_start_precopy(s->to_dst_file);
+
     while (migration_is_active(s)) {
         if (urgent || !qemu_file_rate_limit(s->to_dst_file)) {
             MigIterateState iter_state = migration_iteration_run(s);
@@ -4125,6 +4131,12 @@ static void *bg_migration_thread(void *opaque)
     if (vm_stop_force_state(RUN_STATE_PAUSED)) {
         goto fail;
     }
+
+    /* Migrate early data that would usually get migrated before iterating. */
+    if (qemu_savevm_state_start_precopy(fb)) {
+        goto fail;
+    }
+
     /*
      * Put vCPUs in sync with shadow context structures, then
      * save their state to channel-buffer along with devices.
@@ -4445,6 +4457,7 @@ static void migration_instance_finalize(Object *obj)
     qemu_sem_destroy(&ms->rp_state.rp_sem);
     qemu_sem_destroy(&ms->postcopy_qemufile_src_sem);
     error_free(ms->error);
+    json_writer_free(ms->vmdesc);
 }
 
 static void migration_instance_init(Object *obj)
diff --git a/migration/migration.h b/migration/migration.h
index ae4ffd3454..66511ce532 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -17,6 +17,7 @@
 #include "exec/cpu-common.h"
 #include "hw/qdev-core.h"
 #include "qapi/qapi-types-migration.h"
+#include "qapi/qmp/json-writer.h"
 #include "qemu/thread.h"
 #include "qemu/coroutine_int.h"
 #include "io/channel.h"
@@ -366,6 +367,9 @@ struct MigrationState {
      * This save hostname when out-going migration starts
      */
     char *hostname;
+
+    /* QEMU_VM_VMDESCRIPTION content filled for all non-iterable devices. */
+    JSONWriter *vmdesc;
 };
 
 void migrate_set_state(int *state, int old_state, int new_state);
diff --git a/migration/savevm.c b/migration/savevm.c
index a0cdb714f7..b810409574 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -42,7 +42,6 @@
 #include "postcopy-ram.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-migration.h"
-#include "qapi/qmp/json-writer.h"
 #include "qapi/clone-visitor.h"
 #include "qapi/qapi-builtin-visit.h"
 #include "qapi/qmp/qerror.h"
@@ -1325,6 +1324,71 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f)
     qemu_fflush(f);
 }
 
+static int qemu_savevm_state_precopy_one_non_iterable(SaveStateEntry *se,
+                                                      QEMUFile *f,
+                                                      JSONWriter *vmdesc)
+{
+    int ret;
+
+    if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
+        return 0;
+    }
+    if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
+        trace_savevm_section_skip(se->idstr, se->section_id);
+        return 0;
+    }
+
+    trace_savevm_section_start(se->idstr, se->section_id);
+
+    json_writer_start_object(vmdesc, NULL);
+    json_writer_str(vmdesc, "name", se->idstr);
+    json_writer_int64(vmdesc, "instance_id", se->instance_id);
+
+    save_section_header(f, se, QEMU_VM_SECTION_FULL);
+    ret = vmstate_save(f, se, vmdesc);
+    if (ret) {
+        qemu_file_set_error(f, ret);
+        return ret;
+    }
+    trace_savevm_section_end(se->idstr, se->section_id, 0);
+    save_section_footer(f, se);
+
+    json_writer_end_object(vmdesc);
+    return 0;
+}
+
+int qemu_savevm_state_start_precopy(QEMUFile *f)
+{
+    MigrationState *ms = migrate_get_current();
+    JSONWriter *vmdesc;
+    SaveStateEntry *se;
+    int ret;
+
+    assert(!ms->vmdesc);
+    ms->vmdesc = vmdesc = json_writer_new(false);
+    json_writer_start_object(vmdesc, NULL);
+    json_writer_int64(vmdesc, "page_size", qemu_target_page_size());
+    json_writer_start_array(vmdesc, "devices");
+
+    /*
+     * Only immutable non-iterable device state is expected to be saved this
+     * early. All remaining (ordinary) non-iterable device state will be saved
+     * in qemu_savevm_state_complete_precopy_non_iterable().
+     */
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+        if (save_state_priority(se) < MIG_PRI_POST_SETUP) {
+            continue;
+        }
+
+        ret = qemu_savevm_state_precopy_one_non_iterable(se, f, vmdesc);
+        if (ret) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
 static
 int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
 {
@@ -1364,41 +1428,24 @@ int 
qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
                                                     bool in_postcopy,
                                                     bool inactivate_disks)
 {
-    g_autoptr(JSONWriter) vmdesc = NULL;
+    MigrationState *ms = migrate_get_current();
+    JSONWriter *vmdesc = ms->vmdesc;
     int vmdesc_len;
     SaveStateEntry *se;
     int ret;
 
-    vmdesc = json_writer_new(false);
-    json_writer_start_object(vmdesc, NULL);
-    json_writer_int64(vmdesc, "page_size", qemu_target_page_size());
-    json_writer_start_array(vmdesc, "devices");
-    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+    /* qemu_savevm_state_start_precopy() is expected to be called first. */
+    assert(vmdesc);
 
-        if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
-            continue;
-        }
-        if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
-            trace_savevm_section_skip(se->idstr, se->section_id);
+    QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
+        if (save_state_priority(se) >= MIG_PRI_POST_SETUP) {
             continue;
         }
 
-        trace_savevm_section_start(se->idstr, se->section_id);
-
-        json_writer_start_object(vmdesc, NULL);
-        json_writer_str(vmdesc, "name", se->idstr);
-        json_writer_int64(vmdesc, "instance_id", se->instance_id);
-
-        save_section_header(f, se, QEMU_VM_SECTION_FULL);
-        ret = vmstate_save(f, se, vmdesc);
+        ret = qemu_savevm_state_precopy_one_non_iterable(se, f, vmdesc);
         if (ret) {
-            qemu_file_set_error(f, ret);
             return ret;
         }
-        trace_savevm_section_end(se->idstr, se->section_id, 0);
-        save_section_footer(f, se);
-
-        json_writer_end_object(vmdesc);
     }
 
     if (inactivate_disks) {
@@ -1427,6 +1474,10 @@ int 
qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
         qemu_put_buffer(f, (uint8_t *)json_writer_get(vmdesc), vmdesc_len);
     }
 
+    /* Free it now to detect any inconsistencies. */
+    g_free(vmdesc);
+    ms->vmdesc = NULL;
+
     return 0;
 }
 
@@ -1541,6 +1592,9 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
     qemu_savevm_state_setup(f);
     qemu_mutex_lock_iothread();
 
+    /* Process early data that has to get migrated before iterating. */
+    qemu_savevm_state_start_precopy(f);
+
     while (qemu_file_get_error(f) == 0) {
         if (qemu_savevm_state_iterate(f, false) > 0) {
             break;
diff --git a/migration/savevm.h b/migration/savevm.h
index 6461342cb4..323bd5ab3b 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -38,6 +38,7 @@ void qemu_savevm_state_header(QEMUFile *f);
 int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy);
 void qemu_savevm_state_cleanup(void);
 void qemu_savevm_state_complete_postcopy(QEMUFile *f);
+int qemu_savevm_state_start_precopy(QEMUFile *f);
 int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
                                        bool inactivate_disks);
 void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
-- 
2.38.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]