[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 3/6] Migration: Reconnect network in case of network
From: |
Md Haris Iqbal |
Subject: |
[Qemu-devel] [PATCH 3/6] Migration: Reconnect network in case of network failure during pc migration (destination) |
Date: |
Mon, 22 Aug 2016 02:28:49 +0530 |
Signed-off-by: Md Haris Iqbal <address@hidden>
---
hmp-commands.hx | 14 ++++---
hmp.c | 3 +-
include/migration/migration.h | 3 ++
migration/migration.c | 97 ++++++++++++++++++++++++++++++++++++++++---
migration/postcopy-ram.c | 9 ++++
migration/savevm.c | 35 ++++++++++++----
qapi-schema.json | 2 +-
qemu-version.h | 1 +
qmp-commands.hx | 4 +-
9 files changed, 145 insertions(+), 23 deletions(-)
create mode 100644 qemu-version.h
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 8f765fd..e468c53 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -932,17 +932,19 @@ ETEXI
{
.name = "migrate_incoming",
- .args_type = "uri:s",
- .params = "uri",
- .help = "Continue an incoming migration from an -incoming defer",
+ .args_type = "recover:-r,uri:s",
+ .params = "[-r] uri",
+ .help = "Continue an incoming migration from an -incoming defer"
+ "\n\t\t\t -r to recover from a broken migration",
.mhandler.cmd = hmp_migrate_incoming,
},
STEXI
address@hidden migrate_incoming @var{uri}
address@hidden migrate_incoming [-r] @var{uri}
@findex migrate_incoming
-Continue an incoming migration using the @var{uri} (that has the same syntax
-as the -incoming option).
+Continue an incoming migration using the @var{uri}
+ -r to recover from a broken migration (that has the same syntax
+ as the -incoming option).
ETEXI
diff --git a/hmp.c b/hmp.c
index 02ed457..965e4f3 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1186,9 +1186,10 @@ void hmp_migrate_cancel(Monitor *mon, const QDict *qdict)
void hmp_migrate_incoming(Monitor *mon, const QDict *qdict)
{
Error *err = NULL;
+ bool recover = qdict_get_try_bool(qdict, "recover", false);
const char *uri = qdict_get_str(qdict, "uri");
- qmp_migrate_incoming(uri, &err);
+ qmp_migrate_incoming(uri, !!recover, recover, &err);
hmp_handle_error(mon, &err);
}
diff --git a/include/migration/migration.h b/include/migration/migration.h
index bcaf55d..74d456e 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -82,6 +82,9 @@ typedef enum {
struct MigrationIncomingState {
QEMUFile *from_src_file;
+ /* To be used by a VM for recovery */
+ bool in_recovery;
+
/*
* Free at the start of the main state load, set as the main thread
finishes
* loading state.
diff --git a/migration/migration.c b/migration/migration.c
index 149cf1e..166f4f7 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -432,6 +432,8 @@ void migration_fd_process_incoming(QEMUFile *f)
void migration_channel_process_incoming(MigrationState *s,
QIOChannel *ioc)
{
+ MigrationIncomingState *mis = migration_incoming_get_current();
+
trace_migration_set_incoming_channel(
ioc, object_get_typename(OBJECT(ioc)));
@@ -445,6 +447,19 @@ void migration_channel_process_incoming(MigrationState *s,
}
} else {
QEMUFile *f = qemu_fopen_channel_input(ioc);
+
+ if (mis != NULL && atomic_mb_read(&mis->in_recovery)) {
+ mis->from_src_file = f;
+
+ qemu_mutex_lock(&migration_recovery_mutex);
+ atomic_mb_set(&mis->in_recovery, false);
+ qemu_cond_signal(&migration_recovery_cond);
+ qemu_mutex_unlock(&migration_recovery_mutex);
+
+ fprintf(stderr, "recovered\n");
+ return;
+ }
+
migration_fd_process_incoming(f);
}
}
@@ -1063,19 +1078,62 @@ void migrate_del_blocker(Error *reason)
migration_blockers = g_slist_remove(migration_blockers, reason);
}
-void qmp_migrate_incoming(const char *uri, Error **errp)
+void qmp_migrate_incoming(const char *uri, bool in_recover, bool recover,
Error **errp)
{
Error *local_err = NULL;
+ bool recovery = in_recover && recover;
static bool once = true;
+ MigrationIncomingState *mis = migration_incoming_get_current();
- if (!deferred_incoming) {
- error_setg(errp, "For use with '-incoming defer'");
- return;
- }
- if (!once) {
+ if (recovery) {
+ if (mis != NULL) {
+
+ if(!atomic_mb_read(&mis->in_recovery)) {
+ /* Recovery option was set but the VM
+ * Does not seem to have been in recovery
+ */
+ error_setg(errp, "No VM to recover");
+ return;
+ } else {
+ /* Recovery option was set and the VM
+ * needs a recovery, resetting the socket
+ * to NULL
+ */
+ mis->from_src_file = NULL;
+ if(mis->have_fault_thread) {
+ /* shutdown the socket to source, causing the fault_thread
to shutdown */
+ uint64_t tmp64 = 1;
+
+ fprintf(stderr, "rp shutdown\n");
+
+ if (write(mis->userfault_quit_fd, &tmp64, 8) != 8) {
+ error_report("%s: incrementing userfault_quit_fd: %s",
+ __func__, strerror(errno));
+ }
+ close(mis->userfault_quit_fd);
+ close(mis->userfault_fd);
+ mis->have_fault_thread = false;
+ }
+ fprintf(stderr, "rp after shutdown %p\n", mis->to_src_file);
+ }
+
+ } else {
+ /* Recovery option was set but there
+ * is no VM running/(in recovery)
+ */
+ error_setg(errp, "Cannot use -r option without a VM to recover");
+ return;
+ }
+ } else if (!once) {
error_setg(errp, "The incoming migration has already been started");
}
+ if (!recover && !deferred_incoming) {
+ error_setg(errp, "For use with '-incoming defer'");
+ return;
+ }
+
+
qemu_start_incoming_migration(uri, &local_err);
if (local_err) {
@@ -2007,6 +2065,33 @@ int
qemu_migrate_postcopy_outgoing_recovery(MigrationState* ms)
}
+int qemu_migrate_postcopy_incoming_recovery(QEMUFile **f,
+ MigrationIncomingState* mis)
+{
+ migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
+ MIGRATION_STATUS_POSTCOPY_RECOVERY);
+
+ atomic_mb_set(&mis->in_recovery, true);
+ /* Code for network recovery to be added here */
+ qemu_mutex_lock(&migration_recovery_mutex);
+ while(atomic_mb_read(&mis->in_recovery) == true) {
+ fprintf(stderr, "Recover, not letting it fail %p\n",
mis->from_src_file);
+ qemu_cond_wait(&migration_recovery_cond, &migration_recovery_mutex);
+ }
+ qemu_mutex_unlock(&migration_recovery_mutex);
+
+ if(mis->from_src_file != NULL) {
+ *f = mis->from_src_file;
+
+ migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_RECOVERY,
+ MIGRATION_STATUS_ACTIVE);
+ return 0;
+ }
+
+ return -1;
+}
+
+
PostcopyState postcopy_state_get(void)
{
return atomic_mb_read(&incoming_postcopy_state);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 9b04778..d19c13a 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -393,6 +393,8 @@ static int ram_block_enable_notify(const char *block_name,
void *host_addr,
*/
static void *postcopy_ram_fault_thread(void *opaque)
{
+ fprintf(stderr, "return path thread started\n");
+
MigrationIncomingState *mis = opaque;
struct uffd_msg msg;
int ret;
@@ -481,8 +483,15 @@ static void *postcopy_ram_fault_thread(void *opaque)
migrate_send_rp_req_pages(mis, NULL,
rb_offset, hostpagesize);
}
+
+ ret = qemu_file_get_error(mis->to_src_file);
+ if (ret != 0) {
+ qemu_file_clear_error(mis->to_src_file);
+ break;
+ }
}
trace_postcopy_ram_fault_thread_exit();
+ fprintf(stderr, "return path failed\n");
return NULL;
}
diff --git a/migration/savevm.c b/migration/savevm.c
index 33a2911..79f601c 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1829,6 +1829,7 @@ static int qemu_loadvm_state_main(QEMUFile *f,
MigrationIncomingState *mis)
{
uint8_t section_type;
int ret;
+ PostcopyState ps;
while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
@@ -1837,28 +1838,46 @@ static int qemu_loadvm_state_main(QEMUFile *f,
MigrationIncomingState *mis)
case QEMU_VM_SECTION_START:
case QEMU_VM_SECTION_FULL:
ret = qemu_loadvm_section_start_full(f, mis);
- if (ret < 0) {
- return ret;
- }
break;
case QEMU_VM_SECTION_PART:
case QEMU_VM_SECTION_END:
ret = qemu_loadvm_section_part_end(f, mis);
- if (ret < 0) {
- return ret;
- }
break;
case QEMU_VM_COMMAND:
ret = loadvm_process_command(f);
trace_qemu_loadvm_state_section_command(ret);
- if ((ret < 0) || (ret & LOADVM_QUIT)) {
+ if (ret & LOADVM_QUIT) {
+ fprintf(stderr, "LOADVM_QUIT\n");
return ret;
- }
+ }
break;
default:
error_report("Unknown savevm section type %d", section_type);
return -EINVAL;
}
+
+ if (ret < 0) {
+ ps = postcopy_state_get();
+ ret = qemu_file_get_error(f);
+
+ /* This check is based on how the error is set during the network
+ * recv(). When recv() returns 0 (i.e. no data to read), the error
+ * is set to -EIO. For all other network errors, it is set
+ * according to the return value received.
+ */
+ if (ret == -EIO && ps == POSTCOPY_INCOMING_RUNNING) {
+ ret = qemu_migrate_postcopy_incoming_recovery(&f, mis);
+
+ if (ret == 0) {
+ postcopy_ram_enable_notify(mis);
+ qemu_file_clear_error(f);
+ continue;
+ }
+ }
+
+ ret = qemu_file_get_error(f);
+ return ret;
+ }
}
return 0;
diff --git a/qapi-schema.json b/qapi-schema.json
index a658462..6a4c23b 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2159,7 +2159,7 @@
# compatible with -incoming and the format of the uri is already exposed
# above libvirt
##
-{ 'command': 'migrate-incoming', 'data': {'uri': 'str' } }
+{ 'command': 'migrate-incoming', 'data': {'uri': 'str', '*recover': 'bool' } }
# @xen-save-devices-state:
#
diff --git a/qemu-version.h b/qemu-version.h
new file mode 100644
index 0000000..9ce32a4
--- /dev/null
+++ b/qemu-version.h
@@ -0,0 +1 @@
+#define QEMU_PKGVERSION " (v2.6.0-1776-g689a31f-dirty)"
diff --git a/qmp-commands.hx b/qmp-commands.hx
index dd727bf..4234bc9 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -694,7 +694,7 @@ EQMP
{
.name = "migrate-incoming",
- .args_type = "uri:s",
+ .args_type = "recover:-r,uri:s",
.mhandler.cmd_new = qmp_marshal_migrate_incoming,
},
@@ -703,10 +703,12 @@ migrate-incoming
----------------
Continue an incoming migration
+ -r to recover from a broken migration
Arguments:
- "uri": Source/listening URI (json-string)
+- "recover": recover migration (json-bool, optional)
Example:
--
2.7.4
- [Qemu-devel] [PATCH 0/6] Recovery from network failure during Postcopy Migration, Md Haris Iqbal, 2016/08/21
- [Qemu-devel] [PATCH 2/6] migration : General additions for migration recovery, Md Haris Iqbal, 2016/08/21
- [Qemu-devel] [PATCH 3/6] Migration: Reconnect network in case of network failure during pc migration (destination),
Md Haris Iqbal <=
- [Qemu-devel] [PATCH 5/6] Migration: Recovering pages lost due to n/w failure during pc migration (source), Md Haris Iqbal, 2016/08/21
- [Qemu-devel] [PATCH 4/6] Migration: New bitmap for postcopy migration failure, Md Haris Iqbal, 2016/08/21
- [Qemu-devel] [PATCH 1/6] Migration: Reconnect network in case of network failure during pc migration (source), Md Haris Iqbal, 2016/08/21
- [Qemu-devel] [PATCH 6/6] Migration: Recovering pages lost due to n/w failure during pc migration (destination), Md Haris Iqbal, 2016/08/21
- Re: [Qemu-devel] [PATCH 0/6] Recovery from network failure during Postcopy Migration, no-reply, 2016/08/21