qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 3/6] Migration: Reconnect network in case of network


From: Md Haris Iqbal
Subject: [Qemu-devel] [PATCH 3/6] Migration: Reconnect network in case of network failure during pc migration (destination)
Date: Mon, 22 Aug 2016 02:28:49 +0530

Signed-off-by: Md Haris Iqbal <address@hidden>
---
 hmp-commands.hx               | 14 ++++---
 hmp.c                         |  3 +-
 include/migration/migration.h |  3 ++
 migration/migration.c         | 97 ++++++++++++++++++++++++++++++++++++++++---
 migration/postcopy-ram.c      |  9 ++++
 migration/savevm.c            | 35 ++++++++++++----
 qapi-schema.json              |  2 +-
 qemu-version.h                |  1 +
 qmp-commands.hx               |  4 +-
 9 files changed, 145 insertions(+), 23 deletions(-)
 create mode 100644 qemu-version.h

diff --git a/hmp-commands.hx b/hmp-commands.hx
index 8f765fd..e468c53 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -932,17 +932,19 @@ ETEXI
 
     {
         .name       = "migrate_incoming",
-        .args_type  = "uri:s",
-        .params     = "uri",
-        .help       = "Continue an incoming migration from an -incoming defer",
+        .args_type  = "recover:-r,uri:s",
+        .params     = "[-r] uri",
+        .help       = "Continue an incoming migration from an -incoming defer"
+                     "\n\t\t\t -r to recover from a broken migration",
         .mhandler.cmd = hmp_migrate_incoming,
     },
 
 STEXI
address@hidden migrate_incoming @var{uri}
address@hidden migrate_incoming [-r] @var{uri}
 @findex migrate_incoming
-Continue an incoming migration using the @var{uri} (that has the same syntax
-as the -incoming option).
+Continue an incoming migration using the @var{uri}
+    -r to recover from a broken migration (that has the same syntax
+    as the -incoming option).
 
 ETEXI
 
diff --git a/hmp.c b/hmp.c
index 02ed457..965e4f3 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1186,9 +1186,10 @@ void hmp_migrate_cancel(Monitor *mon, const QDict *qdict)
 void hmp_migrate_incoming(Monitor *mon, const QDict *qdict)
 {
     Error *err = NULL;
+    bool recover = qdict_get_try_bool(qdict, "recover", false);
     const char *uri = qdict_get_str(qdict, "uri");
 
-    qmp_migrate_incoming(uri, &err);
+    qmp_migrate_incoming(uri, !!recover, recover, &err);
 
     hmp_handle_error(mon, &err);
 }
diff --git a/include/migration/migration.h b/include/migration/migration.h
index bcaf55d..74d456e 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -82,6 +82,9 @@ typedef enum {
 struct MigrationIncomingState {
     QEMUFile *from_src_file;
 
+    /* To be used by a VM for recovery */
+    bool in_recovery;
+
     /*
      * Free at the start of the main state load, set as the main thread 
finishes
      * loading state.
diff --git a/migration/migration.c b/migration/migration.c
index 149cf1e..166f4f7 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -432,6 +432,8 @@ void migration_fd_process_incoming(QEMUFile *f)
 void migration_channel_process_incoming(MigrationState *s,
                                         QIOChannel *ioc)
 {
+    MigrationIncomingState *mis = migration_incoming_get_current();
+
     trace_migration_set_incoming_channel(
         ioc, object_get_typename(OBJECT(ioc)));
 
@@ -445,6 +447,19 @@ void migration_channel_process_incoming(MigrationState *s,
         }
     } else {
         QEMUFile *f = qemu_fopen_channel_input(ioc);
+
+        if (mis != NULL && atomic_mb_read(&mis->in_recovery)) {
+            mis->from_src_file = f;
+
+            qemu_mutex_lock(&migration_recovery_mutex);
+            atomic_mb_set(&mis->in_recovery, false);
+            qemu_cond_signal(&migration_recovery_cond);
+            qemu_mutex_unlock(&migration_recovery_mutex);
+
+            fprintf(stderr, "recovered\n");
+            return;
+        }
+
         migration_fd_process_incoming(f);
     }
 }
@@ -1063,19 +1078,62 @@ void migrate_del_blocker(Error *reason)
     migration_blockers = g_slist_remove(migration_blockers, reason);
 }
 
-void qmp_migrate_incoming(const char *uri, Error **errp)
+void qmp_migrate_incoming(const char *uri, bool in_recover, bool recover, 
Error **errp)
 {
     Error *local_err = NULL;
+    bool recovery = in_recover && recover;
     static bool once = true;
+    MigrationIncomingState *mis = migration_incoming_get_current();
 
-    if (!deferred_incoming) {
-        error_setg(errp, "For use with '-incoming defer'");
-        return;
-    }
-    if (!once) {
+    if (recovery) {
+        if (mis != NULL) {
+
+            if(!atomic_mb_read(&mis->in_recovery)) {
+                /* Recovery option was set but the VM
+                 * Does not seem to have been in recovery
+                 */
+                error_setg(errp, "No VM to recover");
+                return;
+            } else {
+                /* Recovery option was set and the VM
+                 * needs a recovery, resetting the socket
+                 * to NULL
+                 */
+                mis->from_src_file = NULL;
+                if(mis->have_fault_thread) {
+                    /* shutdown the socket to source, causing the fault_thread 
to shutdown */
+                    uint64_t tmp64 = 1;
+
+                    fprintf(stderr, "rp shutdown\n");
+
+                    if (write(mis->userfault_quit_fd, &tmp64, 8) != 8) {
+                        error_report("%s: incrementing userfault_quit_fd: %s",
+                            __func__, strerror(errno));
+                    }
+                    close(mis->userfault_quit_fd);
+                    close(mis->userfault_fd);
+                    mis->have_fault_thread = false;
+                }
+                fprintf(stderr, "rp after shutdown %p\n", mis->to_src_file);
+            }
+
+        } else {
+            /* Recovery option was set but there
+             * is no VM running/(in recovery)
+             */
+            error_setg(errp, "Cannot use -r option without a VM to recover");
+            return;
+        }
+    } else if (!once) {
         error_setg(errp, "The incoming migration has already been started");
     }
 
+    if (!recover && !deferred_incoming) {
+         error_setg(errp, "For use with '-incoming defer'");
+         return;
+     }
+
+
     qemu_start_incoming_migration(uri, &local_err);
 
     if (local_err) {
@@ -2007,6 +2065,33 @@ int 
qemu_migrate_postcopy_outgoing_recovery(MigrationState* ms)
 
 }
 
+int qemu_migrate_postcopy_incoming_recovery(QEMUFile **f,
+                                            MigrationIncomingState* mis)
+{
+    migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
+                                   MIGRATION_STATUS_POSTCOPY_RECOVERY);
+
+    atomic_mb_set(&mis->in_recovery, true);
+    /* Code for network recovery to be added here */
+    qemu_mutex_lock(&migration_recovery_mutex);
+    while(atomic_mb_read(&mis->in_recovery) == true) {
+        fprintf(stderr, "Recover, not letting it fail %p\n", 
mis->from_src_file);
+        qemu_cond_wait(&migration_recovery_cond, &migration_recovery_mutex);
+    }
+    qemu_mutex_unlock(&migration_recovery_mutex);
+
+    if(mis->from_src_file != NULL) {
+        *f = mis->from_src_file;
+
+        migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVERY,
+                                       MIGRATION_STATUS_ACTIVE);
+        return 0;
+    }
+
+    return -1;
+}
+
+
 PostcopyState  postcopy_state_get(void)
 {
     return atomic_mb_read(&incoming_postcopy_state);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 9b04778..d19c13a 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -393,6 +393,8 @@ static int ram_block_enable_notify(const char *block_name, 
void *host_addr,
  */
 static void *postcopy_ram_fault_thread(void *opaque)
 {
+    fprintf(stderr, "return path thread started\n");
+
     MigrationIncomingState *mis = opaque;
     struct uffd_msg msg;
     int ret;
@@ -481,8 +483,15 @@ static void *postcopy_ram_fault_thread(void *opaque)
             migrate_send_rp_req_pages(mis, NULL,
                                      rb_offset, hostpagesize);
         }
+
+        ret = qemu_file_get_error(mis->to_src_file);
+        if (ret != 0) {
+            qemu_file_clear_error(mis->to_src_file);
+            break;
+        }
     }
     trace_postcopy_ram_fault_thread_exit();
+    fprintf(stderr, "return path failed\n");
     return NULL;
 }
 
diff --git a/migration/savevm.c b/migration/savevm.c
index 33a2911..79f601c 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1829,6 +1829,7 @@ static int qemu_loadvm_state_main(QEMUFile *f, 
MigrationIncomingState *mis)
 {
     uint8_t section_type;
     int ret;
+    PostcopyState ps;
 
     while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
 
@@ -1837,28 +1838,46 @@ static int qemu_loadvm_state_main(QEMUFile *f, 
MigrationIncomingState *mis)
         case QEMU_VM_SECTION_START:
         case QEMU_VM_SECTION_FULL:
             ret = qemu_loadvm_section_start_full(f, mis);
-            if (ret < 0) {
-                return ret;
-            }
             break;
         case QEMU_VM_SECTION_PART:
         case QEMU_VM_SECTION_END:
             ret = qemu_loadvm_section_part_end(f, mis);
-            if (ret < 0) {
-                return ret;
-            }
             break;
         case QEMU_VM_COMMAND:
             ret = loadvm_process_command(f);
             trace_qemu_loadvm_state_section_command(ret);
-            if ((ret < 0) || (ret & LOADVM_QUIT)) {
+            if (ret & LOADVM_QUIT) {
+                fprintf(stderr, "LOADVM_QUIT\n");
                 return ret;
-            }
+             }
             break;
         default:
             error_report("Unknown savevm section type %d", section_type);
             return -EINVAL;
         }
+
+        if (ret < 0) {
+            ps = postcopy_state_get();
+            ret = qemu_file_get_error(f);
+
+            /*  This check is based on how the error is set during the network
+             *  recv(). When recv() returns 0 (i.e. no data to read), the error
+             *  is set to -EIO. For all other network errors, it is set
+             *  according to the return value received.
+             */
+            if (ret == -EIO && ps == POSTCOPY_INCOMING_RUNNING) {
+                ret = qemu_migrate_postcopy_incoming_recovery(&f, mis);
+
+                if (ret == 0) {
+                    postcopy_ram_enable_notify(mis);
+                    qemu_file_clear_error(f);
+                    continue;
+                }
+            }
+
+            ret = qemu_file_get_error(f);
+            return ret;
+        }
     }
 
     return 0;
diff --git a/qapi-schema.json b/qapi-schema.json
index a658462..6a4c23b 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2159,7 +2159,7 @@
 # compatible with -incoming and the format of the uri is already exposed
 # above libvirt
 ##
-{ 'command': 'migrate-incoming', 'data': {'uri': 'str' } }
+{ 'command': 'migrate-incoming', 'data': {'uri': 'str', '*recover': 'bool' } }
 
 # @xen-save-devices-state:
 #
diff --git a/qemu-version.h b/qemu-version.h
new file mode 100644
index 0000000..9ce32a4
--- /dev/null
+++ b/qemu-version.h
@@ -0,0 +1 @@
+#define QEMU_PKGVERSION " (v2.6.0-1776-g689a31f-dirty)"
diff --git a/qmp-commands.hx b/qmp-commands.hx
index dd727bf..4234bc9 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -694,7 +694,7 @@ EQMP
 
     {
         .name       = "migrate-incoming",
-        .args_type  = "uri:s",
+        .args_type  = "recover:-r,uri:s",
         .mhandler.cmd_new = qmp_marshal_migrate_incoming,
     },
 
@@ -703,10 +703,12 @@ migrate-incoming
 ----------------
 
 Continue an incoming migration
+ -r to recover from a broken migration
 
 Arguments:
 
 - "uri": Source/listening URI (json-string)
+- "recover": recover migration (json-bool, optional)
 
 Example:
 
-- 
2.7.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]