qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC PATCH v2 06/12] mc: introduce state machine change


From: Li Guang
Subject: Re: [Qemu-devel] [RFC PATCH v2 06/12] mc: introduce state machine changes for MC
Date: Wed, 26 Feb 2014 10:52:25 +0800
User-agent: Mozilla/5.0 (X11; U; Linux i686 (x86_64); en-US; rv:1.9.1.16) Gecko/20120421 Iceape/2.0.11

Li Guang wrote:
Michael R. Hines wrote:
On 02/19/2014 09:00 AM, Li Guang wrote:
Hi,

address@hidden wrote:
From: "Michael R. Hines"<address@hidden>

This patch sets up the initial changes to the migration state
machine and prototypes to be used by the checkpointing code
to interact with the state machine so that we can later handle
failure and recovery scenarios.

Signed-off-by: Michael R. Hines<address@hidden>
---
  arch_init.c                   | 29 ++++++++++++++++++++++++-----
  include/migration/migration.h |  2 ++
migration.c | 37 +++++++++++++++++++++----------------
  3 files changed, 47 insertions(+), 21 deletions(-)

diff --git a/arch_init.c b/arch_init.c
index db75120..e9d4d9e 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -658,13 +658,13 @@ static void ram_migration_cancel(void *opaque)
      migration_end();
  }

-static void reset_ram_globals(void)
+static void reset_ram_globals(bool reset_bulk_stage)
  {
      last_seen_block = NULL;
      last_sent_block = NULL;
      last_offset = 0;
      last_version = ram_list.version;
-    ram_bulk_stage = true;
+    ram_bulk_stage = reset_bulk_stage;
  }


here is a chance that ram_save_block will never break while loop
if loat_seen_block be reset for mc when there are no dirty pages
to be migrated.

Thanks!


This bug is fixed now - you can re-pull from github.com.

    Believe it or not, when there is no network devices attached to the
    guest whatsoever, the initial bootup process can be extremely slow,
    where there are almost no processes dirtying memory at all or
    only occasionally except for maybe a DHCP client. This results in
    some 100ms periods of time where there are actually *no* dirty
    pages - hard to believe, but it does happen.

sorry, seems all my pull requests for github was blocked,
let me check it later.

Thanks!


tested, works well for me.

Thanks!


    ram_save_block() really doesn't understand this possibility,
surprisingly. It results in an infinite loop because it was expecting
    last_seen_block to always be non-NULL, when in fact, we have reset
    the value to start from the beginning of the guest can scan the
    entire VM for dirty memory.


  #define MAX_WAIT 50 /* ms, half buffered_file limit */
@@ -674,6 +674,15 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
      RAMBlock *block;
      int64_t ram_pages = last_ram_offset()>> TARGET_PAGE_BITS;

+    /*
+ * RAM stays open during micro-checkpointing for the next transaction.
+     */
+    if (migration_is_mc(migrate_get_current())) {
+        qemu_mutex_lock_ramlist();
+        reset_ram_globals(false);
+        goto skip_setup;
+    }
+
      migration_bitmap = bitmap_new(ram_pages);
      bitmap_set(migration_bitmap, 0, ram_pages);
      migration_dirty_pages = ram_pages;
@@ -710,12 +719,14 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
      qemu_mutex_lock_iothread();
      qemu_mutex_lock_ramlist();
      bytes_transferred = 0;
-    reset_ram_globals();
+    reset_ram_globals(true);

      memory_global_dirty_log_start();
      migration_bitmap_sync();
      qemu_mutex_unlock_iothread();

+skip_setup:
+
      qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);

      QTAILQ_FOREACH(block,&ram_list.blocks, next) {
@@ -744,7 +755,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
      qemu_mutex_lock_ramlist();

      if (ram_list.version != last_version) {
-        reset_ram_globals();
+        reset_ram_globals(true);
      }

      ram_control_before_iterate(f, RAM_CONTROL_ROUND);
@@ -825,7 +836,15 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
      }

      ram_control_after_iterate(f, RAM_CONTROL_FINISH);
-    migration_end();
+
+    /*
+     * Only cleanup at the end of normal migrations
+     * or if the MC destination failed and we got an error.
+ * Otherwise, we are (or will soon be) in MIG_STATE_CHECKPOINTING.
+     */
+ if(!migrate_use_mc() || migration_has_failed(migrate_get_current())) {
+        migration_end();
+    }

      qemu_mutex_unlock_ramlist();
      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
diff --git a/include/migration/migration.h b/include/migration/migration.h
index a7c54fe..e876a2c 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -101,7 +101,9 @@ int migrate_fd_close(MigrationState *s);

  void add_migration_state_change_notifier(Notifier *notify);
  void remove_migration_state_change_notifier(Notifier *notify);
+bool migration_is_active(MigrationState *);
  bool migration_in_setup(MigrationState *);
+bool migration_is_mc(MigrationState *s);
  bool migration_has_finished(MigrationState *);
  bool migration_has_failed(MigrationState *);
  MigrationState *migrate_get_current(void);
diff --git a/migration.c b/migration.c
index 25add6f..f42dae4 100644
--- a/migration.c
+++ b/migration.c
@@ -36,16 +36,6 @@
      do { } while (0)
  #endif

-enum {
-    MIG_STATE_ERROR = -1,
-    MIG_STATE_NONE,
-    MIG_STATE_SETUP,
-    MIG_STATE_CANCELLING,
-    MIG_STATE_CANCELLED,
-    MIG_STATE_ACTIVE,
-    MIG_STATE_COMPLETED,
-};
-
#define MAX_THROTTLE (32<< 20) /* Migration speed throttling */

  /* Amount of time to allocate to each "chunk" of bandwidth-throttled
@@ -273,7 +263,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
      MigrationState *s = migrate_get_current();
      MigrationCapabilityStatusList *cap;

- if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP) {
+    if (migration_is_active(s)) {
          error_set(errp, QERR_MIGRATION_ACTIVE);
          return;
      }
@@ -285,7 +275,13 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,

  /* shared migration helpers */

-static void migrate_set_state(MigrationState *s, int old_state, int new_state)
+bool migration_is_active(MigrationState *s)
+{
+ return (s->state == MIG_STATE_ACTIVE) || s->state == MIG_STATE_SETUP
+            || s->state == MIG_STATE_CHECKPOINTING;
+}
+
+void migrate_set_state(MigrationState *s, int old_state, int new_state)
  {
if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) {
          trace_migrate_set_state(new_state);
@@ -309,7 +305,7 @@ static void migrate_fd_cleanup(void *opaque)
          s->file = NULL;
      }

-    assert(s->state != MIG_STATE_ACTIVE);
+    assert(!migration_is_active(s));

      if (s->state != MIG_STATE_COMPLETED) {
          qemu_savevm_state_cancel();
@@ -356,7 +352,12 @@ void remove_migration_state_change_notifier(Notifier *notify)

  bool migration_in_setup(MigrationState *s)
  {
-    return s->state == MIG_STATE_SETUP;
+        return s->state == MIG_STATE_SETUP;
+}
+
+bool migration_is_mc(MigrationState *s)
+{
+        return s->state == MIG_STATE_CHECKPOINTING;
  }

  bool migration_has_finished(MigrationState *s)
@@ -419,7 +420,8 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
      params.shared = has_inc&&  inc;

if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP ||
-        s->state == MIG_STATE_CANCELLING) {
+        s->state == MIG_STATE_CANCELLING
+         || s->state == MIG_STATE_CHECKPOINTING) {
          error_set(errp, QERR_MIGRATION_ACTIVE);
          return;
      }
@@ -624,7 +626,10 @@ static void *migration_thread(void *opaque)
                  }

                  if (!qemu_file_get_error(s->file)) {
- migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_COMPLETED);
+                    if (!migrate_use_mc()) {
+                        migrate_set_state(s,
+                            MIG_STATE_ACTIVE, MIG_STATE_COMPLETED);
+                    }
                      break;
                  }
              }








reply via email to

[Prev in Thread] Current Thread [Next in Thread]