qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH COLO-Frame v8 34/34] COLO: Add block replication int


From: zhanghailiang
Subject: [Qemu-devel] [PATCH COLO-Frame v8 34/34] COLO: Add block replication into colo process
Date: Wed, 29 Jul 2015 16:45:44 +0800

Make sure master start block replication after slave's block replication 
started.

Signed-off-by: zhanghailiang <address@hidden>
Signed-off-by: Wen Congyang <address@hidden>
Signed-off-by: Yang Hongyang <address@hidden>
Signed-off-by: Li Zhijian <address@hidden>
---
 migration/colo.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
 trace-events     |  2 ++
 2 files changed, 70 insertions(+), 3 deletions(-)

diff --git a/migration/colo.c b/migration/colo.c
index 44b0f93..37d958c 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -20,6 +20,7 @@
 #include "qapi-event.h"
 #include "net/colo-nic.h"
 #include "qmp-commands.h"
+#include "block/block_int.h"
 
 /*
 * We should not do checkpoint one after another without any time interval,
@@ -134,6 +135,7 @@ static void secondary_vm_do_failover(void)
 {
     int old_state;
     MigrationIncomingState *mis = migration_incoming_get_current();
+    Error *local_err = NULL;
 
     /* Can not do failover during the process of VM's loading VMstate, Or
       * it will break the secondary VM.
@@ -156,6 +158,12 @@ static void secondary_vm_do_failover(void)
     }
     colo_proxy_destroy(COLO_MODE_SECONDARY);
 
+    bdrv_stop_replication_all(true, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+    }
+    trace_colo_stop_block_replication("failover");
+
     if (!autostart) {
         error_report("\"-S\" qemu option will be ignored in secondary side");
         /* recover runstate to normal migration finish state */
@@ -184,6 +192,7 @@ static void primary_vm_do_failover(void)
 {
     MigrationState *s = migrate_get_current();
     int old_state;
+    Error *local_err = NULL;
 
     colo_proxy_destroy(COLO_MODE_PRIMARY);
 
@@ -201,6 +210,12 @@ static void primary_vm_do_failover(void)
     }
     qemu_bh_schedule(s->cleanup_bh);
 
+    bdrv_stop_replication_all(true, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+    }
+    trace_colo_stop_block_replication("failover");
+
     vm_start();
 
     old_state = failover_set_state(FAILOVER_STATUS_HANDLING,
@@ -282,6 +297,7 @@ static int colo_do_checkpoint_transaction(MigrationState 
*s, QEMUFile *control)
     int colo_shutdown, ret;
     size_t size;
     QEMUFile *trans = NULL;
+    Error *local_err = NULL;
 
     ret = colo_ctl_put(s->file, COLO_CHECKPOINT_NEW);
     if (ret < 0) {
@@ -335,6 +351,16 @@ static int colo_do_checkpoint_transaction(MigrationState 
*s, QEMUFile *control)
         goto out;
     }
 
+    /* we call this api although this may do nothing on primary side */
+    qemu_mutex_lock_iothread();
+    bdrv_do_checkpoint_all(&local_err);
+    qemu_mutex_unlock_iothread();
+    if (local_err) {
+        error_report_err(local_err);
+        ret = -1;
+        goto out;
+    }
+
     ret = colo_ctl_put(s->file, COLO_CHECKPOINT_SEND);
     if (ret < 0) {
         goto out;
@@ -364,6 +390,10 @@ static int colo_do_checkpoint_transaction(MigrationState 
*s, QEMUFile *control)
     }
 
     if (colo_shutdown) {
+        qemu_mutex_lock_iothread();
+        bdrv_stop_replication_all(false, NULL);
+        trace_colo_stop_block_replication("shutdown");
+        qemu_mutex_unlock_iothread();
         colo_ctl_put(s->file, COLO_GUEST_SHUTDOWN);
         qemu_fflush(s->file);
         colo_shutdown_requested = 0;
@@ -393,6 +423,7 @@ static void *colo_thread(void *opaque)
     QEMUFile *colo_control = NULL;
     int64_t current_time, checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
     int i, ret;
+    Error *local_err = NULL;
 
     failover_init_state();
     if (colo_proxy_init(COLO_MODE_PRIMARY) != 0) {
@@ -422,6 +453,12 @@ static void *colo_thread(void *opaque)
     }
 
     qemu_mutex_lock_iothread();
+    /* start block replication */
+    bdrv_start_replication_all(REPLICATION_MODE_PRIMARY, &local_err);
+    if (local_err) {
+        goto out;
+    }
+    trace_colo_start_block_replication();
     vm_start();
     qemu_mutex_unlock_iothread();
     trace_colo_vm_state_change("stop", "run");
@@ -472,7 +509,12 @@ do_checkpoint:
     }
 
 out:
-    error_report("colo: some error happens in colo_thread");
+    if (local_err) {
+        error_report_err(local_err);
+    } else {
+        error_report("colo: some error happens in colo_thread");
+    }
+
     if (colo_control) {
         qemu_fclose(colo_control);
     }
@@ -544,6 +586,8 @@ static int colo_wait_handle_cmd(QEMUFile *f, int 
*checkpoint_request)
     case COLO_GUEST_SHUTDOWN:
         qemu_mutex_lock_iothread();
         vm_stop_force_state(RUN_STATE_COLO);
+        bdrv_stop_replication_all(false, NULL);
+        trace_colo_stop_block_replication("shutdown");
         qemu_system_shutdown_request_core();
         qemu_mutex_unlock_iothread();
         /* the main thread will exit and termiante the whole
@@ -563,6 +607,7 @@ void *colo_process_incoming_checkpoints(void *opaque)
     QEMUFile *ctl = NULL, *fb = NULL;
     uint64_t total_size;
     int i, ret;
+    Error *local_err = NULL;
 
     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
                       MIGRATION_STATUS_COLO);
@@ -590,6 +635,15 @@ void *colo_process_incoming_checkpoints(void *opaque)
         goto out;
     }
 
+    qemu_mutex_lock_iothread();
+    /* start block replication */
+    bdrv_start_replication_all(REPLICATION_MODE_SECONDARY, &local_err);
+    if (local_err) {
+        goto out;
+    }
+    qemu_mutex_unlock_iothread();
+    trace_colo_start_block_replication();
+
     ret = colo_ctl_put(ctl, COLO_CHECPOINT_READY);
     if (ret < 0) {
         goto out;
@@ -674,8 +728,15 @@ void *colo_process_incoming_checkpoints(void *opaque)
             goto out;
         }
 
-        vmstate_loading = false;
+        /* discard colo disk buffer */
+        bdrv_do_checkpoint_all(&local_err);
         qemu_mutex_unlock_iothread();
+        if (local_err) {
+            vmstate_loading = false;
+            goto out;
+        }
+
+        vmstate_loading = false;
 
         if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
             failover_set_state(FAILOVER_STATUS_RELAUNCH, FAILOVER_STATUS_NONE);
@@ -698,7 +759,11 @@ void *colo_process_incoming_checkpoints(void *opaque)
     }
 
 out:
-    error_report("Detect some error or get a failover request");
+    if (local_err) {
+        error_report_err(local_err);
+    } else {
+        error_report("Detect some error or get a failover request");
+    }
     /*
     * Here, we raise a qmp event to the user,
     * It can help user to know what happens, and help deciding whether to
diff --git a/trace-events b/trace-events
index 3e31809..03cd035 100644
--- a/trace-events
+++ b/trace-events
@@ -1478,6 +1478,8 @@ colo_ctl_put(const char *msg) "Send '%s'"
 colo_ctl_get(const char *msg) "Receive '%s'"
 colo_failover_set_state(int new_state) "new state %d"
 colo_rcv_pkt(int result) "Result of net packets comparing is different: %d"
+colo_start_block_replication(void) "Block replication is started"
+colo_stop_block_replication(const char *reason) "Block replication is 
stopped(reason: '%s')"
 
 # kvm-all.c
 kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
-- 
1.8.3.1





reply via email to

[Prev in Thread] Current Thread [Next in Thread]