qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH COLO-Frame v9 21/32] COLO: implement default failove


From: zhanghailiang
Subject: [Qemu-devel] [PATCH COLO-Frame v9 21/32] COLO: implement default failover treatment
Date: Wed, 2 Sep 2015 16:23:08 +0800

If we detect some error in colo,  we will wait for some time,
hoping users also detect it. If users don't issue failover command.
We will go into default failover procedure, which the PVM will takeover
work while SVM is exit in default.

Signed-off-by: zhanghailiang <address@hidden>
Signed-off-by: Li Zhijian <address@hidden>
---
 migration/colo.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/migration/colo.c b/migration/colo.c
index 141b1c8..9ad285c 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -25,6 +25,14 @@
  */
 #define CHECKPOINT_MAX_PEROID 200
 
+/*
+ * The delay time before qemu begin the procedure of default failover 
treatment.
+ * Unit: ms
+ * Fix me: This value should be able to change by command
+ * 'migrate-set-parameters'
+ */
+#define DEFAULT_FAILOVER_DELAY 2000
+
 static QEMUBH *colo_bh;
 /* colo buffer */
 #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
@@ -277,6 +285,7 @@ static void *colo_thread(void *opaque)
     MigrationState *s = opaque;
     QEMUSizedBuffer *buffer = NULL;
     int64_t current_time, checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+    int64_t error_time;
     int fd, ret = 0;
 
     failover_init_state();
@@ -335,8 +344,25 @@ static void *colo_thread(void *opaque)
     }
 
 out:
+    current_time = error_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
     if (ret < 0) {
         error_report("Detect some error: %s", strerror(-ret));
+        /* Give users time to get involved in this verdict */
+        while (current_time - error_time <= DEFAULT_FAILOVER_DELAY) {
+            if (failover_request_is_active()) {
+                error_report("Primary VM will take over work");
+                break;
+            }
+            usleep(100 * 1000);
+            current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+        }
+
+        qemu_mutex_lock_iothread();
+        if (!failover_request_is_active()) {
+            error_report("Primary VM will take over work in default");
+            failover_request_active(NULL);
+        }
+        qemu_mutex_unlock_iothread();
     }
 
     qsb_free(buffer);
@@ -414,6 +440,7 @@ void *colo_process_incoming_thread(void *opaque)
     QEMUFile *fb = NULL;
     QEMUSizedBuffer *buffer = NULL; /* Cache incoming device state */
     int  total_size;
+    int64_t error_time, current_time;
     int fd, ret = 0;
 
     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
@@ -521,9 +548,28 @@ void *colo_process_incoming_thread(void *opaque)
     }
 
 out:
+    current_time = error_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
     if (ret < 0) {
         error_report("colo incoming thread will exit, detect error: %s",
                      strerror(-ret));
+        /* Give users time to get involved in this verdict */
+        while (current_time - error_time <= DEFAULT_FAILOVER_DELAY) {
+            if (failover_request_is_active()) {
+                error_report("Secondary VM will take over work");
+                break;
+            }
+            usleep(100 * 1000);
+            current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+        }
+        /* check flag again*/
+        if (!failover_request_is_active()) {
+            /*
+            * We assume that Primary VM is still alive according to
+            * heartbeat, just kill Secondary VM
+            */
+            error_report("SVM is going to exit in default!");
+            exit(1);
+        }
     }
 
     if (fb) {
-- 
1.8.3.1





reply via email to

[Prev in Thread] Current Thread [Next in Thread]