qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PULL 35/57] migration_completion: Take current state


From: Juan Quintela
Subject: [Qemu-devel] [PULL 35/57] migration_completion: Take current state
Date: Tue, 10 Nov 2015 15:25:24 +0100

From: "Dr. David Alan Gilbert" <address@hidden>

Soon we'll be in either ACTIVE or POSTCOPY_ACTIVE when we
complete migration, and we need to know which we expect to be
in to change state safely.

Signed-off-by: Dr. David Alan Gilbert <address@hidden>
Reviewed-by: Juan Quintela <address@hidden>
Signed-off-by: Juan Quintela <address@hidden>
---
 include/migration/migration.h    |   5 ++
 include/migration/postcopy-ram.h |  35 ++++++++
 include/qemu/typedefs.h          |   1 +
 migration/migration.c            |   1 +
 migration/postcopy-ram.c         | 126 +++++++++++++++++++++++++++
 migration/ram.c                  | 181 ++++++++++++++++++++++++++++++++++++++-
 migration/savevm.c               |   2 -
 trace-events                     |   6 ++
 8 files changed, 354 insertions(+), 3 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 219032d..b382d77 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -199,6 +199,11 @@ double xbzrle_mig_cache_miss_rate(void);

 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size);
 void ram_debug_dump_bitmap(unsigned long *todump, bool expected);
+/* For outgoing discard bitmap */
+int ram_postcopy_send_discard_bitmap(MigrationState *ms);
+/* For incoming postcopy discard */
+int ram_discard_range(MigrationIncomingState *mis, const char *block_name,
+                      uint64_t start, size_t length);

 /**
  * @migrate_add_blocker - prevent migration from proceeding
diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
index d81934f..de79fa7 100644
--- a/include/migration/postcopy-ram.h
+++ b/include/migration/postcopy-ram.h
@@ -16,4 +16,39 @@
 /* Return true if the host supports everything we need to do postcopy-ram */
 bool postcopy_ram_supported_by_host(void);

+/*
+ * Discard the contents of 'length' bytes from 'start'
+ * We can assume that if we've been called postcopy_ram_hosttest returned true
+ */
+int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
+                               size_t length);
+
+
+/*
+ * Called at the start of each RAMBlock by the bitmap code.
+ * 'offset' is the bitmap offset of the named RAMBlock in the migration
+ * bitmap.
+ * Returns a new PDS
+ */
+PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
+                                                 unsigned long offset,
+                                                 const char *name);
+
+/*
+ * Called by the bitmap code for each chunk to discard.
+ * May send a discard message, may just leave it queued to
+ * be sent later.
+ * @start,@length: a range of pages in the migration bitmap in the
+ *  RAM block passed to postcopy_discard_send_init() (length=1 is one page)
+ */
+void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
+                                 unsigned long start, unsigned long length);
+
+/*
+ * Called at the end of each RAMBlock by the bitmap code.
+ * Sends any outstanding discard messages, frees the PDS.
+ */
+void postcopy_discard_send_finish(MigrationState *ms,
+                                  PostcopyDiscardState *pds);
+
 #endif
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index f7e0ed0..6b1093d 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -67,6 +67,7 @@ typedef struct PCMachineState PCMachineState;
 typedef struct PCMachineClass PCMachineClass;
 typedef struct PCMCIACardState PCMCIACardState;
 typedef struct PixelFormat PixelFormat;
+typedef struct PostcopyDiscardState PostcopyDiscardState;
 typedef struct PropertyInfo PropertyInfo;
 typedef struct Property Property;
 typedef struct QEMUBH QEMUBH;
diff --git a/migration/migration.c b/migration/migration.c
index 2acd0e6..afc863a 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -24,6 +24,7 @@
 #include "qemu/sockets.h"
 #include "qemu/rcu.h"
 #include "migration/block.h"
+#include "migration/postcopy-ram.h"
 #include "qemu/thread.h"
 #include "qmp-commands.h"
 #include "trace.h"
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index cdd0168..261feda 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -27,6 +27,24 @@
 #include "qemu/error-report.h"
 #include "trace.h"

+/* Arbitrary limit on size of each discard command,
+ * keeps them around ~200 bytes
+ */
+#define MAX_DISCARDS_PER_COMMAND 12
+
+struct PostcopyDiscardState {
+    const char *ramblock_name;
+    uint64_t offset; /* Bitmap entry for the 1st bit of this RAMBlock */
+    uint16_t cur_entry;
+    /*
+     * Start and length of a discard range (bytes)
+     */
+    uint64_t start_list[MAX_DISCARDS_PER_COMMAND];
+    uint64_t length_list[MAX_DISCARDS_PER_COMMAND];
+    unsigned int nsentwords;
+    unsigned int nsentcmds;
+};
+
 /* Postcopy needs to detect accesses to pages that haven't yet been copied
  * across, and efficiently map new pages in, the techniques for doing this
  * are target OS specific.
@@ -145,6 +163,27 @@ out:
     return ret;
 }

+/**
+ * postcopy_ram_discard_range: Discard a range of memory.
+ * We can assume that if we've been called postcopy_ram_hosttest returned true.
+ *
+ * @mis: Current incoming migration state.
+ * @start, @length: range of memory to discard.
+ *
+ * returns: 0 on success.
+ */
+int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
+                               size_t length)
+{
+    trace_postcopy_ram_discard_range(start, length);
+    if (madvise(start, length, MADV_DONTNEED)) {
+        error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno));
+        return -1;
+    }
+
+    return 0;
+}
+
 #else
 /* No target OS support, stubs just fail */
 bool postcopy_ram_supported_by_host(void)
@@ -153,5 +192,92 @@ bool postcopy_ram_supported_by_host(void)
     return false;
 }

+int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
+                               size_t length)
+{
+    assert(0);
+}
 #endif

+/* ------------------------------------------------------------------------- */
+
+/**
+ * postcopy_discard_send_init: Called at the start of each RAMBlock before
+ *   asking to discard individual ranges.
+ *
+ * @ms: The current migration state.
+ * @offset: the bitmap offset of the named RAMBlock in the migration
+ *   bitmap.
+ * @name: RAMBlock that discards will operate on.
+ *
+ * returns: a new PDS.
+ */
+PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
+                                                 unsigned long offset,
+                                                 const char *name)
+{
+    PostcopyDiscardState *res = g_malloc0(sizeof(PostcopyDiscardState));
+
+    if (res) {
+        res->ramblock_name = name;
+        res->offset = offset;
+    }
+
+    return res;
+}
+
+/**
+ * postcopy_discard_send_range: Called by the bitmap code for each chunk to
+ *   discard. May send a discard message, may just leave it queued to
+ *   be sent later.
+ *
+ * @ms: Current migration state.
+ * @pds: Structure initialised by postcopy_discard_send_init().
+ * @start,@length: a range of pages in the migration bitmap in the
+ *   RAM block passed to postcopy_discard_send_init() (length=1 is one page)
+ */
+void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
+                                unsigned long start, unsigned long length)
+{
+    size_t tp_bits = qemu_target_page_bits();
+    /* Convert to byte offsets within the RAM block */
+    pds->start_list[pds->cur_entry] = (start - pds->offset) << tp_bits;
+    pds->length_list[pds->cur_entry] = length << tp_bits;
+    trace_postcopy_discard_send_range(pds->ramblock_name, start, length);
+    pds->cur_entry++;
+    pds->nsentwords++;
+
+    if (pds->cur_entry == MAX_DISCARDS_PER_COMMAND) {
+        /* Full set, ship it! */
+        qemu_savevm_send_postcopy_ram_discard(ms->file, pds->ramblock_name,
+                                              pds->cur_entry,
+                                              pds->start_list,
+                                              pds->length_list);
+        pds->nsentcmds++;
+        pds->cur_entry = 0;
+    }
+}
+
+/**
+ * postcopy_discard_send_finish: Called at the end of each RAMBlock by the
+ * bitmap code. Sends any outstanding discard messages, frees the PDS
+ *
+ * @ms: Current migration state.
+ * @pds: Structure initialised by postcopy_discard_send_init().
+ */
+void postcopy_discard_send_finish(MigrationState *ms, PostcopyDiscardState 
*pds)
+{
+    /* Anything unsent? */
+    if (pds->cur_entry) {
+        qemu_savevm_send_postcopy_ram_discard(ms->file, pds->ramblock_name,
+                                              pds->cur_entry,
+                                              pds->start_list,
+                                              pds->length_list);
+        pds->nsentcmds++;
+    }
+
+    trace_postcopy_discard_send_finish(pds->ramblock_name, pds->nsentwords,
+                                       pds->nsentcmds);
+
+    g_free(pds);
+}
diff --git a/migration/ram.c b/migration/ram.c
index c703176..b481d55 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -32,6 +32,7 @@
 #include "qemu/timer.h"
 #include "qemu/main-loop.h"
 #include "migration/migration.h"
+#include "migration/postcopy-ram.h"
 #include "exec/address-spaces.h"
 #include "migration/page_cache.h"
 #include "qemu/error-report.h"
@@ -967,6 +968,8 @@ static int ram_save_compressed_page(QEMUFile *f, RAMBlock 
*block,
  * @f: Current migration stream.
  * @pss: Data about the state of the current dirty page scan.
  * @*again: Set to false if the search has scanned the whole of RAM
+ * *ram_addr_abs: Pointer into which to store the address of the dirty page
+ *               within the global ram_addr space
  */
 static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss,
                              bool *again, ram_addr_t *ram_addr_abs)
@@ -1064,7 +1067,6 @@ static int ram_find_and_save_block(QEMUFile *f, bool 
last_stage,
                 if (unsentmap) {
                     clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
                 }
-
             }
         }
     } while (!pages && again);
@@ -1233,6 +1235,183 @@ void ram_debug_dump_bitmap(unsigned long *todump, bool 
expected)
     }
 }

+/* **** functions for postcopy ***** */
+
+/*
+ * Callback from postcopy_each_ram_send_discard for each RAMBlock
+ * Note: At this point the 'unsentmap' is the processed bitmap combined
+ *       with the dirtymap; so a '1' means it's either dirty or unsent.
+ * start,length: Indexes into the bitmap for the first bit
+ *            representing the named block and length in target-pages
+ */
+static int postcopy_send_discard_bm_ram(MigrationState *ms,
+                                        PostcopyDiscardState *pds,
+                                        unsigned long start,
+                                        unsigned long length)
+{
+    unsigned long end = start + length; /* one after the end */
+    unsigned long current;
+    unsigned long *unsentmap;
+
+    unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
+    for (current = start; current < end; ) {
+        unsigned long one = find_next_bit(unsentmap, end, current);
+
+        if (one <= end) {
+            unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
+            unsigned long discard_length;
+
+            if (zero >= end) {
+                discard_length = end - one;
+            } else {
+                discard_length = zero - one;
+            }
+            postcopy_discard_send_range(ms, pds, one, discard_length);
+            current = one + discard_length;
+        } else {
+            current = one;
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * Utility for the outgoing postcopy code.
+ *   Calls postcopy_send_discard_bm_ram for each RAMBlock
+ *   passing it bitmap indexes and name.
+ * Returns: 0 on success
+ * (qemu_ram_foreach_block ends up passing unscaled lengths
+ *  which would mean postcopy code would have to deal with target page)
+ */
+static int postcopy_each_ram_send_discard(MigrationState *ms)
+{
+    struct RAMBlock *block;
+    int ret;
+
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+        unsigned long first = block->offset >> TARGET_PAGE_BITS;
+        PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
+                                                               first,
+                                                               block->idstr);
+
+        /*
+         * Postcopy sends chunks of bitmap over the wire, but it
+         * just needs indexes at this point, avoids it having
+         * target page specific code.
+         */
+        ret = postcopy_send_discard_bm_ram(ms, pds, first,
+                                    block->used_length >> TARGET_PAGE_BITS);
+        postcopy_discard_send_finish(ms, pds);
+        if (ret) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * Transmit the set of pages to be discarded after precopy to the target
+ * these are pages that:
+ *     a) Have been previously transmitted but are now dirty again
+ *     b) Pages that have never been transmitted, this ensures that
+ *        any pages on the destination that have been mapped by background
+ *        tasks get discarded (transparent huge pages is the specific concern)
+ * Hopefully this is pretty sparse
+ */
+int ram_postcopy_send_discard_bitmap(MigrationState *ms)
+{
+    int ret;
+    unsigned long *bitmap, *unsentmap;
+
+    rcu_read_lock();
+
+    /* This should be our last sync, the src is now paused */
+    migration_bitmap_sync();
+
+    unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
+    if (!unsentmap) {
+        /* We don't have a safe way to resize the sentmap, so
+         * if the bitmap was resized it will be NULL at this
+         * point.
+         */
+        error_report("migration ram resized during precopy phase");
+        rcu_read_unlock();
+        return -EINVAL;
+    }
+
+    /*
+     * Update the unsentmap to be unsentmap = unsentmap | dirty
+     */
+    bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
+    bitmap_or(unsentmap, unsentmap, bitmap,
+               last_ram_offset() >> TARGET_PAGE_BITS);
+
+
+    trace_ram_postcopy_send_discard_bitmap();
+#ifdef DEBUG_POSTCOPY
+    ram_debug_dump_bitmap(unsentmap, true);
+#endif
+
+    ret = postcopy_each_ram_send_discard(ms);
+    rcu_read_unlock();
+
+    return ret;
+}
+
+/*
+ * At the start of the postcopy phase of migration, any now-dirty
+ * precopied pages are discarded.
+ *
+ * start, length describe a byte address range within the RAMBlock
+ *
+ * Returns 0 on success.
+ */
+int ram_discard_range(MigrationIncomingState *mis,
+                      const char *block_name,
+                      uint64_t start, size_t length)
+{
+    int ret = -1;
+
+    rcu_read_lock();
+    RAMBlock *rb = qemu_ram_block_by_name(block_name);
+
+    if (!rb) {
+        error_report("ram_discard_range: Failed to find block '%s'",
+                     block_name);
+        goto err;
+    }
+
+    uint8_t *host_startaddr = rb->host + start;
+
+    if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
+        error_report("ram_discard_range: Unaligned start address: %p",
+                     host_startaddr);
+        goto err;
+    }
+
+    if ((start + length) <= rb->used_length) {
+        uint8_t *host_endaddr = host_startaddr + length;
+        if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) {
+            error_report("ram_discard_range: Unaligned end address: %p",
+                         host_endaddr);
+            goto err;
+        }
+        ret = postcopy_ram_discard_range(mis, host_startaddr, length);
+    } else {
+        error_report("ram_discard_range: Overrun block '%s' (%" PRIu64
+                     "/%zu/%zu)",
+                     block_name, start, length, rb->used_length);
+    }
+
+err:
+    rcu_read_unlock();
+
+    return ret;
+}
+
+
 /* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
  * long-running RCU critical section.  When rcu-reclaims in the code
  * start to become numerous it will be necessary to reduce the
diff --git a/migration/savevm.c b/migration/savevm.c
index 8e11877..8f07abd 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1349,7 +1349,6 @@ static int 
loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
     }
     trace_loadvm_postcopy_ram_handle_discard_header(ramid, len);
     while (len) {
-        /* TODO - ram_discard_range gets added in a later patch
         uint64_t start_addr, block_length;
         start_addr = qemu_get_be64(mis->from_src_file);
         block_length = qemu_get_be64(mis->from_src_file);
@@ -1360,7 +1359,6 @@ static int 
loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
         if (ret) {
             return ret;
         }
-        */
     }
     trace_loadvm_postcopy_ram_handle_discard_end();

diff --git a/trace-events b/trace-events
index a51ce22..e74675e 100644
--- a/trace-events
+++ b/trace-events
@@ -1255,6 +1255,7 @@ qemu_file_fclose(void) ""
 migration_bitmap_sync_start(void) ""
 migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64""
 migration_throttle(void) ""
+ram_postcopy_send_discard_bitmap(void) ""

 # hw/display/qxl.c
 disable qxl_interface_set_mm_time(int qid, uint32_t mm_time) "%d %d"
@@ -1532,6 +1533,11 @@ rdma_start_incoming_migration_after_rdma_listen(void) ""
 rdma_start_outgoing_migration_after_rdma_connect(void) ""
 rdma_start_outgoing_migration_after_rdma_source_init(void) ""

+# migration/postcopy-ram.c
+postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s 
mask words sent=%d in %d commands"
+postcopy_discard_send_range(const char *ramblock, unsigned long start, 
unsigned long length) "%s:%lx/%lx"
+postcopy_ram_discard_range(void *start, size_t length) "%p,+%zx"
+
 # kvm-all.c
 kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
 kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p"
-- 
2.5.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]