qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH v8 32/54] Postcopy: Maintain sentmap and calculate d


From: Dr. David Alan Gilbert (git)
Subject: [Qemu-devel] [PATCH v8 32/54] Postcopy: Maintain sentmap and calculate discard
Date: Tue, 29 Sep 2015 09:37:56 +0100

From: "Dr. David Alan Gilbert" <address@hidden>

Where postcopy is preceeded by a period of precopy, the destination will
have received pages that may have been dirtied on the source after the
page was sent.  The destination must throw these pages away before
starting it's CPUs.

Maintain a 'sentmap' of pages that have already been sent.
Calculate list of sent & dirty pages
Provide helpers on the destination side to discard these.

Signed-off-by: Dr. David Alan Gilbert <address@hidden>
Reviewed-by: Amit Shah <address@hidden>
---
 include/migration/migration.h    |  12 +++
 include/migration/postcopy-ram.h |  35 +++++++
 include/qemu/typedefs.h          |   1 +
 migration/migration.c            |   1 +
 migration/postcopy-ram.c         | 129 +++++++++++++++++++++++
 migration/ram.c                  | 218 ++++++++++++++++++++++++++++++++++++++-
 migration/savevm.c               |   2 -
 trace-events                     |   5 +
 8 files changed, 396 insertions(+), 7 deletions(-)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index 219032d..4904d00 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -130,6 +130,13 @@ struct MigrationState
 
     /* Flag set once the migration has been asked to enter postcopy */
     bool start_postcopy;
+
+    /* bitmap of pages that have been sent at least once
+     * only maintained and used in postcopy at the moment
+     * where it's used to send the dirtymap at the start
+     * of the postcopy phase
+     */
+    unsigned long *sentmap;
 };
 
 void process_incoming_migration(QEMUFile *f);
@@ -199,6 +206,11 @@ double xbzrle_mig_cache_miss_rate(void);
 
 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size);
 void ram_debug_dump_bitmap(unsigned long *todump, bool expected);
+/* For outgoing discard bitmap */
+int ram_postcopy_send_discard_bitmap(MigrationState *ms);
+/* For incoming postcopy discard */
+int ram_discard_range(MigrationIncomingState *mis, const char *block_name,
+                      uint64_t start, size_t length);
 
 /**
  * @migrate_add_blocker - prevent migration from proceeding
diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
index d81934f..80ed2d9 100644
--- a/include/migration/postcopy-ram.h
+++ b/include/migration/postcopy-ram.h
@@ -16,4 +16,39 @@
 /* Return true if the host supports everything we need to do postcopy-ram */
 bool postcopy_ram_supported_by_host(void);
 
+/*
+ * Discard the contents of 'length' bytes from 'start'
+ * We can assume that if we've been called postcopy_ram_hosttest returned true
+ */
+int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
+                               size_t length);
+
+
+/*
+ * Called at the start of each RAMBlock by the bitmap code.
+ * 'offset' is the bitmap offset of the named RAMBlock in the migration
+ * bitmap.
+ * Returns a new PDS
+ */
+PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
+                                                 unsigned long offset,
+                                                 const char *name);
+
+/*
+ * Called by the bitmap code for each chunk to discard.
+ * May send a discard message, may just leave it queued to
+ * be sent later.
+ * 'start' and 'end' describe an inclusive range of pages in the
+ * migration bitmap in the RAM block passed to postcopy_discard_send_init.
+ */
+void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
+                                 unsigned long start, unsigned long end);
+
+/*
+ * Called at the end of each RAMBlock by the bitmap code.
+ * Sends any outstanding discard messages, frees the PDS.
+ */
+void postcopy_discard_send_finish(MigrationState *ms,
+                                  PostcopyDiscardState *pds);
+
 #endif
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 0bf7967..32332b8 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -63,6 +63,7 @@ typedef struct PCMachineState PCMachineState;
 typedef struct PCMachineClass PCMachineClass;
 typedef struct PCMCIACardState PCMCIACardState;
 typedef struct PixelFormat PixelFormat;
+typedef struct PostcopyDiscardState PostcopyDiscardState;
 typedef struct PropertyInfo PropertyInfo;
 typedef struct Property Property;
 typedef struct QEMUBH QEMUBH;
diff --git a/migration/migration.c b/migration/migration.c
index 2ae5909..b57a0e6 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -24,6 +24,7 @@
 #include "qemu/sockets.h"
 #include "qemu/rcu.h"
 #include "migration/block.h"
+#include "migration/postcopy-ram.h"
 #include "qemu/thread.h"
 #include "qmp-commands.h"
 #include "trace.h"
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index cdd0168..10c9cab 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -27,6 +27,24 @@
 #include "qemu/error-report.h"
 #include "trace.h"
 
+/* Arbitrary limit on size of each discard command,
+ * keeps them around ~200 bytes
+ */
+#define MAX_DISCARDS_PER_COMMAND 12
+
+struct PostcopyDiscardState {
+    const char *name;
+    uint64_t offset; /* Bitmap entry for the 1st bit of this RAMBlock */
+    uint16_t cur_entry;
+    /*
+     * Start and length of a discard range (bytes)
+     */
+    uint64_t start_list[MAX_DISCARDS_PER_COMMAND];
+    uint64_t length_list[MAX_DISCARDS_PER_COMMAND];
+    unsigned int nsentwords;
+    unsigned int nsentcmds;
+};
+
 /* Postcopy needs to detect accesses to pages that haven't yet been copied
  * across, and efficiently map new pages in, the techniques for doing this
  * are target OS specific.
@@ -145,6 +163,27 @@ out:
     return ret;
 }
 
+/**
+ * postcopy_ram_discard_range: Discard a range of memory.
+ * We can assume that if we've been called postcopy_ram_hosttest returned true.
+ *
+ * @mis: Current incoming migration state.
+ * @start, @length: range of memory to discard.
+ *
+ * returns: 0 on success.
+ */
+int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
+                               size_t length)
+{
+    trace_postcopy_ram_discard_range(start, length);
+    if (madvise(start, length, MADV_DONTNEED)) {
+        error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno));
+        return -1;
+    }
+
+    return 0;
+}
+
 #else
 /* No target OS support, stubs just fail */
 bool postcopy_ram_supported_by_host(void)
@@ -153,5 +192,95 @@ bool postcopy_ram_supported_by_host(void)
     return false;
 }
 
+int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
+                               size_t length)
+{
+    assert(0);
+}
 #endif
 
+/* ------------------------------------------------------------------------- */
+
+/**
+ * postcopy_discard_send_init: Called at the start of each RAMBlock before
+ *   asking to discard individual ranges.
+ *
+ * @ms: The current migration state.
+ * @offset: the bitmap offset of the named RAMBlock in the migration
+ *   bitmap.
+ * @name: RAMBlock that discards will operate on.
+ *
+ * returns: a new PDS.
+ */
+PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
+                                                 unsigned long offset,
+                                                 const char *name)
+{
+    PostcopyDiscardState *res = g_try_malloc(sizeof(PostcopyDiscardState));
+
+    if (res) {
+        res->name = name;
+        res->cur_entry = 0;
+        res->nsentwords = 0;
+        res->nsentcmds = 0;
+        res->offset = offset;
+    }
+
+    return res;
+}
+
+/**
+ * postcopy_discard_send_range: Called by the bitmap code for each chunk to
+ *   discard. May send a discard message, may just leave it queued to
+ *   be sent later.
+ *
+ * @ms: Current migration state.
+ * @pds: Structure initialised by postcopy_discard_send_init().
+ * @start,@end: an inclusive range of pages in the migration bitmap in the
+ *   RAM block passed to postcopy_discard_send_init().
+ */
+void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
+                                unsigned long start, unsigned long end)
+{
+    size_t tp_bits = qemu_target_page_bits();
+    /* Convert to byte offsets within the RAM block */
+    pds->start_list[pds->cur_entry] = (start - pds->offset) << tp_bits;
+    pds->length_list[pds->cur_entry] = ((1 + end - pds->offset) << tp_bits) -
+                                       pds->start_list[pds->cur_entry];
+    pds->cur_entry++;
+    pds->nsentwords++;
+
+    if (pds->cur_entry == MAX_DISCARDS_PER_COMMAND) {
+        /* Full set, ship it! */
+        qemu_savevm_send_postcopy_ram_discard(ms->file, pds->name,
+                                              pds->cur_entry,
+                                              pds->start_list,
+                                              pds->length_list);
+        pds->nsentcmds++;
+        pds->cur_entry = 0;
+    }
+}
+
+/**
+ * postcopy_discard_send_finish: Called at the end of each RAMBlock by the
+ * bitmap code. Sends any outstanding discard messages, frees the PDS
+ *
+ * @ms: Current migration state.
+ * @pds: Structure initialised by postcopy_discard_send_init().
+ */
+void postcopy_discard_send_finish(MigrationState *ms, PostcopyDiscardState 
*pds)
+{
+    /* Anything unsent? */
+    if (pds->cur_entry) {
+        qemu_savevm_send_postcopy_ram_discard(ms->file, pds->name,
+                                              pds->cur_entry,
+                                              pds->start_list,
+                                              pds->length_list);
+        pds->nsentcmds++;
+    }
+
+    trace_postcopy_discard_send_finish(pds->name, pds->nsentwords,
+                                       pds->nsentcmds);
+
+    g_free(pds);
+}
diff --git a/migration/ram.c b/migration/ram.c
index 8644675..e1c6c4a 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -32,6 +32,7 @@
 #include "qemu/timer.h"
 #include "qemu/main-loop.h"
 #include "migration/migration.h"
+#include "migration/postcopy-ram.h"
 #include "exec/address-spaces.h"
 #include "migration/page_cache.h"
 #include "qemu/error-report.h"
@@ -506,10 +507,18 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t 
**current_data,
     return 1;
 }
 
-/* Called with rcu_read_lock() to protect migration_bitmap */
+/* Called with rcu_read_lock() to protect migration_bitmap
+ * mr: The region to search for dirty pages in
+ * start: Start address (typically so we can continue from previous page)
+ * ram_addr_abs: Pointer into which to store the address of the dirty page
+ *               within the global ram_addr space
+ *
+ * Returns: byte offset within memory region of the start of a dirty page
+ */
 static inline
 ram_addr_t migration_bitmap_find_and_reset_dirty(RAMBlock *rb,
-                                                 ram_addr_t start)
+                                                 ram_addr_t start,
+                                                 ram_addr_t *ram_addr_abs)
 {
     unsigned long base = rb->offset >> TARGET_PAGE_BITS;
     unsigned long nr = base + (start >> TARGET_PAGE_BITS);
@@ -530,6 +539,7 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(RAMBlock 
*rb,
         clear_bit(next, bitmap);
         migration_dirty_pages--;
     }
+    *ram_addr_abs = next << TARGET_PAGE_BITS;
     return (next - base) << TARGET_PAGE_BITS;
 }
 
@@ -662,6 +672,24 @@ static int save_zero_page(QEMUFile *f, RAMBlock *block, 
ram_addr_t offset,
 }
 
 /**
+ * ram_find_block_by_id: Find a ramblock by name.
+ *
+ * Returns: The RAMBlock with matching ID, or NULL.
+ */
+static RAMBlock *ram_find_block_by_id(const char *id)
+{
+    RAMBlock *block;
+
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+        if (!strcmp(id, block->idstr)) {
+            return block;
+        }
+    }
+
+    return NULL;
+}
+
+/**
  * ram_save_page: Send the given page to the stream
  *
  * Returns: Number of pages written.
@@ -926,12 +954,15 @@ static int ram_save_compressed_page(QEMUFile *f, RAMBlock 
*block,
  * @f: Current migration stream.
  * @pss: Data about the state of the current dirty page scan.
  * @*again: Set to false if the search has scanned the whole of RAM
+ * *ram_addr_abs: Pointer into which to store the address of the dirty page
+ *               within the global ram_addr space
  */
 static bool find_dirty_block(QEMUFile *f, PageSearchStatus *pss,
-                             bool *again)
+                             bool *again, ram_addr_t *ram_addr_abs)
 {
     pss->offset = migration_bitmap_find_and_reset_dirty(pss->block,
-                                                       pss->offset);
+                                                       pss->offset,
+                                                       ram_addr_abs);
     if (pss->complete_round && pss->block == last_seen_block &&
         pss->offset >= last_offset) {
         /*
@@ -989,6 +1020,8 @@ static int ram_find_and_save_block(QEMUFile *f, bool 
last_stage,
     PageSearchStatus pss;
     int pages = 0;
     bool again, found;
+    ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
+                                 ram_addr_t space */
 
     pss.block = last_seen_block;
     pss.offset = last_offset;
@@ -999,7 +1032,7 @@ static int ram_find_and_save_block(QEMUFile *f, bool 
last_stage,
     }
 
     do {
-        found = find_dirty_block(f, &pss, &again);
+        found = find_dirty_block(f, &pss, &again, &dirty_ram_abs);
 
         if (found) {
             if (compression_switch && migrate_use_compression()) {
@@ -1013,7 +1046,11 @@ static int ram_find_and_save_block(QEMUFile *f, bool 
last_stage,
 
             /* if page is unmodified, continue to the next */
             if (pages > 0) {
+                MigrationState *ms = migrate_get_current();
                 last_sent_block = pss.block;
+                if (ms->sentmap) {
+                    set_bit(dirty_ram_abs >> TARGET_PAGE_BITS, ms->sentmap);
+                }
             }
         }
     } while (!pages && again);
@@ -1071,6 +1108,8 @@ void free_xbzrle_decoded_buf(void)
 
 static void migration_end(void)
 {
+    MigrationState *s = migrate_get_current();
+
     /* caller have hold iothread lock or is in a bh, so there is
      * no writing race against this migration_bitmap
      */
@@ -1082,6 +1121,9 @@ static void migration_end(void)
         g_free(bitmap);
     }
 
+    g_free(s->sentmap);
+    s->sentmap = NULL;
+
     XBZRLE_cache_lock();
     if (XBZRLE.cache) {
         cache_fini(XBZRLE.cache);
@@ -1174,6 +1216,166 @@ void ram_debug_dump_bitmap(unsigned long *todump, bool 
expected)
     }
 }
 
+/* **** functions for postcopy ***** */
+
+/*
+ * Callback from postcopy_each_ram_send_discard for each RAMBlock
+ * start,end: Indexes into the bitmap for the first and last bit
+ *            representing the named block
+ */
+static int postcopy_send_discard_bm_ram(MigrationState *ms,
+                                        PostcopyDiscardState *pds,
+                                        unsigned long start, unsigned long end)
+{
+    unsigned long current;
+
+    for (current = start; current <= end; ) {
+        unsigned long set = find_next_bit(ms->sentmap, end + 1, current);
+
+        if (set <= end) {
+            unsigned long zero = find_next_zero_bit(ms->sentmap,
+                                                    end + 1, set + 1);
+
+            if (zero > end) {
+                zero = end + 1;
+            }
+            postcopy_discard_send_range(ms, pds, set, zero - 1);
+            current = zero + 1;
+        } else {
+            current = set;
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * Utility for the outgoing postcopy code.
+ *   Calls postcopy_send_discard_bm_ram for each RAMBlock
+ *   passing it bitmap indexes and name.
+ * Returns: 0 on success
+ * (qemu_ram_foreach_block ends up passing unscaled lengths
+ *  which would mean postcopy code would have to deal with target page)
+ */
+static int postcopy_each_ram_send_discard(MigrationState *ms)
+{
+    struct RAMBlock *block;
+    int ret;
+
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+        unsigned long first = block->offset >> TARGET_PAGE_BITS;
+        unsigned long last = (block->offset + (block->used_length - 1))
+                                >> TARGET_PAGE_BITS;
+        PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
+                                                               first,
+                                                               block->idstr);
+
+        /*
+         * Postcopy sends chunks of bitmap over the wire, but it
+         * just needs indexes at this point, avoids it having
+         * target page specific code.
+         */
+        ret = postcopy_send_discard_bm_ram(ms, pds, first, last);
+        postcopy_discard_send_finish(ms, pds);
+        if (ret) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * Transmit the set of pages to be discarded after precopy to the target
+ * these are pages that:
+ *     a) Have been previously transmitted but are now dirty again
+ *     b) Pages that have never been transmitted, this ensures that
+ *        any pages on the destination that have been mapped by background
+ *        tasks get discarded (transparent huge pages is the specific concern)
+ * Hopefully this is pretty sparse
+ */
+int ram_postcopy_send_discard_bitmap(MigrationState *ms)
+{
+    int ret;
+
+    rcu_read_lock();
+
+    /* This should be our last sync, the src is now paused */
+    migration_bitmap_sync();
+
+    /*
+     * Update the sentmap to be sentmap = ~sentmap | dirty
+     */
+    bitmap_complement(ms->sentmap, ms->sentmap,
+               last_ram_offset() >> TARGET_PAGE_BITS);
+
+    bitmap_or(ms->sentmap, ms->sentmap, migration_bitmap,
+               last_ram_offset() >> TARGET_PAGE_BITS);
+
+
+    trace_ram_postcopy_send_discard_bitmap();
+#ifdef DEBUG_POSTCOPY
+    ram_debug_dump_bitmap(ms->sentmap, false);
+#endif
+
+    ret = postcopy_each_ram_send_discard(ms);
+    rcu_read_unlock();
+
+    return ret;
+}
+
+/*
+ * At the start of the postcopy phase of migration, any now-dirty
+ * precopied pages are discarded.
+ *
+ * start, length describe a byte address range within the RAMBlock
+ *
+ * Returns 0 on success.
+ */
+int ram_discard_range(MigrationIncomingState *mis,
+                      const char *block_name,
+                      uint64_t start, size_t length)
+{
+    int ret = -1;
+
+    rcu_read_lock();
+    RAMBlock *rb = ram_find_block_by_id(block_name);
+
+    if (!rb) {
+        error_report("ram_discard_range: Failed to find block '%s'",
+                     block_name);
+        goto err;
+    }
+
+    uint8_t *host_startaddr = rb->host + start;
+
+    if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
+        error_report("ram_discard_range: Unaligned start address: %p",
+                     host_startaddr);
+        goto err;
+    }
+
+    if ((start + length) <= rb->used_length) {
+        uint8_t *host_endaddr = host_startaddr + length;
+        if ((uintptr_t)host_endaddr & (qemu_host_page_size - 1)) {
+            error_report("ram_discard_range: Unaligned end address: %p",
+                         host_endaddr);
+            goto err;
+        }
+        ret = postcopy_ram_discard_range(mis, host_startaddr, length);
+    } else {
+        error_report("ram_discard_range: Overrun block '%s' (%" PRIu64
+                     "/%zu/%zu)",
+                     block_name, start, length, rb->used_length);
+    }
+
+err:
+    rcu_read_unlock();
+
+    return ret;
+}
+
+
 /* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
  * long-running RCU critical section.  When rcu-reclaims in the code
  * start to become numerous it will be necessary to reduce the
@@ -1232,6 +1434,12 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
     migration_bitmap = bitmap_new(ram_bitmap_pages);
     bitmap_set(migration_bitmap, 0, ram_bitmap_pages);
 
+    if (migrate_postcopy_ram()) {
+        MigrationState *s = migrate_get_current();
+        s->sentmap = bitmap_new(ram_bitmap_pages);
+        bitmap_clear(s->sentmap, 0, ram_bitmap_pages);
+    }
+
     /*
      * Count the total number of pages used by ram blocks not including any
      * gaps due to alignment or unplugs.
diff --git a/migration/savevm.c b/migration/savevm.c
index 52fca3c..85462b1 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1347,7 +1347,6 @@ static int 
loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
     }
     trace_loadvm_postcopy_ram_handle_discard_header(ramid, len);
     while (len) {
-        /* TODO - ram_discard_range gets added in a later patch
         uint64_t start_addr, block_length;
         start_addr = qemu_get_be64(mis->from_src_file);
         block_length = qemu_get_be64(mis->from_src_file);
@@ -1358,7 +1357,6 @@ static int 
loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
         if (ret) {
             return ret;
         }
-        */
     }
     trace_loadvm_postcopy_ram_handle_discard_end();
 
diff --git a/trace-events b/trace-events
index e68e69d..aa2d2e7 100644
--- a/trace-events
+++ b/trace-events
@@ -1247,6 +1247,7 @@ qemu_file_fclose(void) ""
 migration_bitmap_sync_start(void) ""
 migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64""
 migration_throttle(void) ""
+ram_postcopy_send_discard_bitmap(void) ""
 
 # hw/display/qxl.c
 disable qxl_interface_set_mm_time(int qid, uint32_t mm_time) "%d %d"
@@ -1518,6 +1519,10 @@ rdma_start_incoming_migration_after_rdma_listen(void) ""
 rdma_start_outgoing_migration_after_rdma_connect(void) ""
 rdma_start_outgoing_migration_after_rdma_source_init(void) ""
 
+# migration/postcopy-ram.c
+postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s 
mask words sent=%d in %d commands"
+postcopy_ram_discard_range(void *start, size_t length) "%p,+%zx"
+
 # kvm-all.c
 kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
 kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p"
-- 
2.5.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]