[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH COLO-Frame v8 34/34] COLO: Add block replication int
From: |
zhanghailiang |
Subject: |
[Qemu-devel] [PATCH COLO-Frame v8 34/34] COLO: Add block replication into colo process |
Date: |
Wed, 29 Jul 2015 16:45:44 +0800 |
Make sure master start block replication after slave's block replication
started.
Signed-off-by: zhanghailiang <address@hidden>
Signed-off-by: Wen Congyang <address@hidden>
Signed-off-by: Yang Hongyang <address@hidden>
Signed-off-by: Li Zhijian <address@hidden>
---
migration/colo.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
trace-events | 2 ++
2 files changed, 70 insertions(+), 3 deletions(-)
diff --git a/migration/colo.c b/migration/colo.c
index 44b0f93..37d958c 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -20,6 +20,7 @@
#include "qapi-event.h"
#include "net/colo-nic.h"
#include "qmp-commands.h"
+#include "block/block_int.h"
/*
* We should not do checkpoint one after another without any time interval,
@@ -134,6 +135,7 @@ static void secondary_vm_do_failover(void)
{
int old_state;
MigrationIncomingState *mis = migration_incoming_get_current();
+ Error *local_err = NULL;
/* Can not do failover during the process of VM's loading VMstate, Or
* it will break the secondary VM.
@@ -156,6 +158,12 @@ static void secondary_vm_do_failover(void)
}
colo_proxy_destroy(COLO_MODE_SECONDARY);
+ bdrv_stop_replication_all(true, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ }
+ trace_colo_stop_block_replication("failover");
+
if (!autostart) {
error_report("\"-S\" qemu option will be ignored in secondary side");
/* recover runstate to normal migration finish state */
@@ -184,6 +192,7 @@ static void primary_vm_do_failover(void)
{
MigrationState *s = migrate_get_current();
int old_state;
+ Error *local_err = NULL;
colo_proxy_destroy(COLO_MODE_PRIMARY);
@@ -201,6 +210,12 @@ static void primary_vm_do_failover(void)
}
qemu_bh_schedule(s->cleanup_bh);
+ bdrv_stop_replication_all(true, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ }
+ trace_colo_stop_block_replication("failover");
+
vm_start();
old_state = failover_set_state(FAILOVER_STATUS_HANDLING,
@@ -282,6 +297,7 @@ static int colo_do_checkpoint_transaction(MigrationState
*s, QEMUFile *control)
int colo_shutdown, ret;
size_t size;
QEMUFile *trans = NULL;
+ Error *local_err = NULL;
ret = colo_ctl_put(s->file, COLO_CHECKPOINT_NEW);
if (ret < 0) {
@@ -335,6 +351,16 @@ static int colo_do_checkpoint_transaction(MigrationState
*s, QEMUFile *control)
goto out;
}
+ /* we call this api although this may do nothing on primary side */
+ qemu_mutex_lock_iothread();
+ bdrv_do_checkpoint_all(&local_err);
+ qemu_mutex_unlock_iothread();
+ if (local_err) {
+ error_report_err(local_err);
+ ret = -1;
+ goto out;
+ }
+
ret = colo_ctl_put(s->file, COLO_CHECKPOINT_SEND);
if (ret < 0) {
goto out;
@@ -364,6 +390,10 @@ static int colo_do_checkpoint_transaction(MigrationState
*s, QEMUFile *control)
}
if (colo_shutdown) {
+ qemu_mutex_lock_iothread();
+ bdrv_stop_replication_all(false, NULL);
+ trace_colo_stop_block_replication("shutdown");
+ qemu_mutex_unlock_iothread();
colo_ctl_put(s->file, COLO_GUEST_SHUTDOWN);
qemu_fflush(s->file);
colo_shutdown_requested = 0;
@@ -393,6 +423,7 @@ static void *colo_thread(void *opaque)
QEMUFile *colo_control = NULL;
int64_t current_time, checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
int i, ret;
+ Error *local_err = NULL;
failover_init_state();
if (colo_proxy_init(COLO_MODE_PRIMARY) != 0) {
@@ -422,6 +453,12 @@ static void *colo_thread(void *opaque)
}
qemu_mutex_lock_iothread();
+ /* start block replication */
+ bdrv_start_replication_all(REPLICATION_MODE_PRIMARY, &local_err);
+ if (local_err) {
+ goto out;
+ }
+ trace_colo_start_block_replication();
vm_start();
qemu_mutex_unlock_iothread();
trace_colo_vm_state_change("stop", "run");
@@ -472,7 +509,12 @@ do_checkpoint:
}
out:
- error_report("colo: some error happens in colo_thread");
+ if (local_err) {
+ error_report_err(local_err);
+ } else {
+ error_report("colo: some error happens in colo_thread");
+ }
+
if (colo_control) {
qemu_fclose(colo_control);
}
@@ -544,6 +586,8 @@ static int colo_wait_handle_cmd(QEMUFile *f, int
*checkpoint_request)
case COLO_GUEST_SHUTDOWN:
qemu_mutex_lock_iothread();
vm_stop_force_state(RUN_STATE_COLO);
+ bdrv_stop_replication_all(false, NULL);
+ trace_colo_stop_block_replication("shutdown");
qemu_system_shutdown_request_core();
qemu_mutex_unlock_iothread();
/* the main thread will exit and termiante the whole
@@ -563,6 +607,7 @@ void *colo_process_incoming_checkpoints(void *opaque)
QEMUFile *ctl = NULL, *fb = NULL;
uint64_t total_size;
int i, ret;
+ Error *local_err = NULL;
migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_COLO);
@@ -590,6 +635,15 @@ void *colo_process_incoming_checkpoints(void *opaque)
goto out;
}
+ qemu_mutex_lock_iothread();
+ /* start block replication */
+ bdrv_start_replication_all(REPLICATION_MODE_SECONDARY, &local_err);
+ if (local_err) {
+ goto out;
+ }
+ qemu_mutex_unlock_iothread();
+ trace_colo_start_block_replication();
+
ret = colo_ctl_put(ctl, COLO_CHECPOINT_READY);
if (ret < 0) {
goto out;
@@ -674,8 +728,15 @@ void *colo_process_incoming_checkpoints(void *opaque)
goto out;
}
- vmstate_loading = false;
+ /* discard colo disk buffer */
+ bdrv_do_checkpoint_all(&local_err);
qemu_mutex_unlock_iothread();
+ if (local_err) {
+ vmstate_loading = false;
+ goto out;
+ }
+
+ vmstate_loading = false;
if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
failover_set_state(FAILOVER_STATUS_RELAUNCH, FAILOVER_STATUS_NONE);
@@ -698,7 +759,11 @@ void *colo_process_incoming_checkpoints(void *opaque)
}
out:
- error_report("Detect some error or get a failover request");
+ if (local_err) {
+ error_report_err(local_err);
+ } else {
+ error_report("Detect some error or get a failover request");
+ }
/*
* Here, we raise a qmp event to the user,
* It can help user to know what happens, and help deciding whether to
diff --git a/trace-events b/trace-events
index 3e31809..03cd035 100644
--- a/trace-events
+++ b/trace-events
@@ -1478,6 +1478,8 @@ colo_ctl_put(const char *msg) "Send '%s'"
colo_ctl_get(const char *msg) "Receive '%s'"
colo_failover_set_state(int new_state) "new state %d"
colo_rcv_pkt(int result) "Result of net packets comparing is different: %d"
+colo_start_block_replication(void) "Block replication is started"
+colo_stop_block_replication(const char *reason) "Block replication is
stopped(reason: '%s')"
# kvm-all.c
kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
--
1.8.3.1
- [Qemu-devel] [PATCH COLO-Frame v8 26/34] COLO NIC: Implement colo nic init/destroy function, (continued)
- [Qemu-devel] [PATCH COLO-Frame v8 26/34] COLO NIC: Implement colo nic init/destroy function, zhanghailiang, 2015/07/29
- [Qemu-devel] [PATCH COLO-Frame v8 18/34] COLO failover: Implement COLO primary/secondary vm failover work, zhanghailiang, 2015/07/29
- [Qemu-devel] [PATCH COLO-Frame v8 33/34] COLO: Implement shutdown checkpoint, zhanghailiang, 2015/07/29
- [Qemu-devel] [PATCH COLO-Frame v8 29/34] COLO: Do checkpoint according to the result of packets comparation, zhanghailiang, 2015/07/29
- [Qemu-devel] [PATCH COLO-Frame v8 28/34] COLO: Handle nfnetlink message from proxy module, zhanghailiang, 2015/07/29
- [Qemu-devel] [PATCH COLO-Frame v8 24/34] COLO NIC: Implement colo nic device interface configure(), zhanghailiang, 2015/07/29
- [Qemu-devel] [PATCH COLO-Frame v8 32/34] COLO NIC: Implement NIC checkpoint and failover, zhanghailiang, 2015/07/29
- [Qemu-devel] [PATCH COLO-Frame v8 31/34] COLO: Add colo-set-checkpoint-period command, zhanghailiang, 2015/07/29
- [Qemu-devel] [PATCH COLO-Frame v8 27/34] COLO NIC: Some init work related with proxy module, zhanghailiang, 2015/07/29
- [Qemu-devel] [PATCH COLO-Frame v8 21/34] COLO: Add new command parameter 'forward_nic' 'colo_script' for net, zhanghailiang, 2015/07/29
- [Qemu-devel] [PATCH COLO-Frame v8 34/34] COLO: Add block replication into colo process,
zhanghailiang <=
- [Qemu-devel] [PATCH COLO-Frame v8 19/34] qmp event: Add event notification for COLO error, zhanghailiang, 2015/07/29
- [Qemu-devel] [PATCH COLO-Frame v8 30/34] COLO: Improve checkpoint efficiency by do additional periodic checkpoint, zhanghailiang, 2015/07/29
- [Qemu-devel] [PATCH COLO-Frame v8 02/34] migration: Introduce capability 'colo' to migration, zhanghailiang, 2015/07/29