qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH v6 29/47] Postcopy: Maintain sentmap and calculate d


From: Dr. David Alan Gilbert (git)
Subject: [Qemu-devel] [PATCH v6 29/47] Postcopy: Maintain sentmap and calculate discard
Date: Tue, 14 Apr 2015 18:03:55 +0100

From: "Dr. David Alan Gilbert" <address@hidden>

Where postcopy is preceeded by a period of precopy, the destination will
have received pages that may have been dirtied on the source after the
page was sent.  The destination must throw these pages away before
starting it's CPUs.

Maintain a 'sentmap' of pages that have already been sent.
Calculate list of sent & dirty pages
Provide helpers on the destination side to discard these.

Signed-off-by: Dr. David Alan Gilbert <address@hidden>
---
 arch_init.c                      | 202 ++++++++++++++++++++++++++++++++++++++-
 include/migration/migration.h    |  12 +++
 include/migration/postcopy-ram.h |  35 +++++++
 include/qemu/typedefs.h          |   1 +
 migration/migration.c            |   1 +
 migration/postcopy-ram.c         | 108 +++++++++++++++++++++
 savevm.c                         |   2 -
 trace-events                     |   5 +
 8 files changed, 362 insertions(+), 4 deletions(-)

diff --git a/arch_init.c b/arch_init.c
index 0a49ace..efc2938 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -40,6 +40,7 @@
 #include "hw/audio/audio.h"
 #include "sysemu/kvm.h"
 #include "migration/migration.h"
+#include "migration/postcopy-ram.h"
 #include "hw/i386/smbios.h"
 #include "exec/address-spaces.h"
 #include "hw/audio/pcspk.h"
@@ -443,9 +444,17 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t 
**current_data,
     return 1;
 }
 
+/* mr: The region to search for dirty pages in
+ * start: Start address (typically so we can continue from previous page)
+ * ram_addr_abs: Pointer into which to store the address of the dirty page
+ *               within the global ram_addr space
+ *
+ * Returns: byte offset within memory region of the start of a dirty page
+ */
 static inline
 ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
-                                                 ram_addr_t start)
+                                                 ram_addr_t start,
+                                                 ram_addr_t *ram_addr_abs)
 {
     unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
     unsigned long nr = base + (start >> TARGET_PAGE_BITS);
@@ -464,6 +473,7 @@ ram_addr_t 
migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
         clear_bit(next, migration_bitmap);
         migration_dirty_pages--;
     }
+    *ram_addr_abs = next << TARGET_PAGE_BITS;
     return (next - base) << TARGET_PAGE_BITS;
 }
 
@@ -603,6 +613,19 @@ static void migration_bitmap_sync(void)
     }
 }
 
+static RAMBlock *ram_find_block(const char *id)
+{
+    RAMBlock *block;
+
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+        if (!strcmp(id, block->idstr)) {
+            return block;
+        }
+    }
+
+    return NULL;
+}
+
 /**
  * ram_save_page: Send the given page to the stream
  *
@@ -713,13 +736,16 @@ static int ram_find_and_save_block(QEMUFile *f, bool 
last_stage,
     bool complete_round = false;
     int pages = 0;
     MemoryRegion *mr;
+    ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
+                                 ram_addr_t space */
 
     if (!block)
         block = QLIST_FIRST_RCU(&ram_list.blocks);
 
     while (true) {
         mr = block->mr;
-        offset = migration_bitmap_find_and_reset_dirty(mr, offset);
+        offset = migration_bitmap_find_and_reset_dirty(mr, offset,
+                                                       &dirty_ram_abs);
         if (complete_round && block == last_seen_block &&
             offset >= last_offset) {
             break;
@@ -738,6 +764,11 @@ static int ram_find_and_save_block(QEMUFile *f, bool 
last_stage,
 
             /* if page is unmodified, continue to the next */
             if (pages > 0) {
+                MigrationState *ms = migrate_get_current();
+                if (ms->sentmap) {
+                    set_bit(dirty_ram_abs >> TARGET_PAGE_BITS, ms->sentmap);
+                }
+
                 last_sent_block = block;
                 break;
             }
@@ -799,12 +830,19 @@ void free_xbzrle_decoded_buf(void)
 
 static void migration_end(void)
 {
+    MigrationState *s = migrate_get_current();
+
     if (migration_bitmap) {
         memory_global_dirty_log_stop();
         g_free(migration_bitmap);
         migration_bitmap = NULL;
     }
 
+    if (s->sentmap) {
+        g_free(s->sentmap);
+        s->sentmap = NULL;
+    }
+
     XBZRLE_cache_lock();
     if (XBZRLE.cache) {
         cache_fini(XBZRLE.cache);
@@ -878,6 +916,160 @@ void ram_debug_dump_bitmap(unsigned long *todump, bool 
expected)
     }
 }
 
+/* **** functions for postcopy ***** */
+
+/*
+ * Callback from postcopy_each_ram_send_discard for each RAMBlock
+ * start,end: Indexes into the bitmap for the first and last bit
+ *            representing the named block
+ */
+static int postcopy_send_discard_bm_ram(MigrationState *ms,
+                                        PostcopyDiscardState *pds,
+                                        unsigned long start, unsigned long end)
+{
+    unsigned long current;
+
+    for (current = start; current <= end; ) {
+        unsigned long set = find_next_bit(ms->sentmap, end + 1, current);
+
+        if (set <= end) {
+            unsigned long zero = find_next_zero_bit(ms->sentmap,
+                                                    end + 1, set + 1);
+
+            if (zero > end) {
+                zero = end + 1;
+            }
+            postcopy_discard_send_range(ms, pds, set, zero - 1);
+            current = zero + 1;
+        } else {
+            current = set;
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * Utility for the outgoing postcopy code.
+ *   Calls postcopy_send_discard_bm_ram for each RAMBlock
+ *   passing it bitmap indexes and name.
+ * Returns: 0 on success
+ * (qemu_ram_foreach_block ends up passing unscaled lengths
+ *  which would mean postcopy code would have to deal with target page)
+ */
+static int postcopy_each_ram_send_discard(MigrationState *ms)
+{
+    struct RAMBlock *block;
+    int ret;
+
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+        unsigned long first = block->offset >> TARGET_PAGE_BITS;
+        unsigned long last = (block->offset + (block->max_length-1))
+                                >> TARGET_PAGE_BITS;
+        PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
+                                                               first,
+                                                               block->idstr);
+
+        /*
+         * Postcopy sends chunks of bitmap over the wire, but it
+         * just needs indexes at this point, avoids it having
+         * target page specific code.
+         */
+        ret = postcopy_send_discard_bm_ram(ms, pds, first, last);
+        postcopy_discard_send_finish(ms, pds);
+        if (ret) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
+/*
+ * Transmit the set of pages to be discarded after precopy to the target
+ * these are pages that have been sent previously but have been dirtied
+ * Hopefully this is pretty sparse
+ */
+int ram_postcopy_send_discard_bitmap(MigrationState *ms)
+{
+    int ret;
+
+    rcu_read_lock();
+    /* This should be our last sync, the src is now paused */
+    migration_bitmap_sync();
+
+    /*
+     * Update the sentmap to be  sentmap&=dirty
+     */
+    bitmap_and(ms->sentmap, ms->sentmap, migration_bitmap,
+               last_ram_offset() >> TARGET_PAGE_BITS);
+
+
+    trace_ram_postcopy_send_discard_bitmap();
+#ifdef DEBUG_POSTCOPY
+    ram_debug_dump_bitmap(ms->sentmap, false);
+#endif
+
+    ret = postcopy_each_ram_send_discard(ms);
+    rcu_read_unlock();
+
+    return ret;
+}
+
+/*
+ * At the start of the postcopy phase of migration, any now-dirty
+ * precopied pages are discarded.
+ *
+ * start..end is an inclusive byte address range within the RAMBlock
+ *
+ * Returns 0 on success.
+ */
+int ram_discard_range(MigrationIncomingState *mis,
+                      const char *block_name,
+                      uint64_t start, uint64_t end)
+{
+    int ret = -1;
+
+    assert(end >= start);
+
+    rcu_read_lock();
+    RAMBlock *rb = ram_find_block(block_name);
+
+    if (!rb) {
+        error_report("ram_discard_range: Failed to find block '%s'",
+                     block_name);
+        goto err;
+    }
+
+    uint8_t *host_startaddr = rb->host + start;
+    uint8_t *host_endaddr;
+
+    if ((uintptr_t)host_startaddr & (qemu_host_page_size - 1)) {
+        error_report("ram_discard_range: Unaligned start address: %p",
+                     host_startaddr);
+        goto err;
+    }
+
+    if (end <= rb->used_length) {
+        host_endaddr   = rb->host + end;
+        if (((uintptr_t)host_endaddr + 1) & (qemu_host_page_size - 1)) {
+            error_report("ram_discard_range: Unaligned end address: %p",
+                         host_endaddr);
+            goto err;
+        }
+        ret = postcopy_ram_discard_range(mis, host_startaddr, host_endaddr);
+    } else {
+        error_report("ram_discard_range: Overrun block '%s' (%" PRIu64
+                     "/%" PRIu64 "/%zu)",
+                     block_name, start, end, rb->used_length);
+    }
+
+err:
+    rcu_read_unlock();
+
+    return ret;
+}
+
 static int ram_save_setup(QEMUFile *f, void *opaque)
 {
     RAMBlock *block;
@@ -929,6 +1121,12 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
     migration_bitmap = bitmap_new(ram_bitmap_pages);
     bitmap_set(migration_bitmap, 0, ram_bitmap_pages);
 
+    if (migrate_postcopy_ram()) {
+        MigrationState *s = migrate_get_current();
+        s->sentmap = bitmap_new(ram_bitmap_pages);
+        bitmap_clear(s->sentmap, 0, ram_bitmap_pages);
+    }
+
     /*
      * Count the total number of pages used by ram blocks not including any
      * gaps due to alignment or unplugs.
diff --git a/include/migration/migration.h b/include/migration/migration.h
index b9d028c..15707fc 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -113,6 +113,13 @@ struct MigrationState
 
     /* Flag set once the migration has been asked to enter postcopy */
     bool start_postcopy;
+
+    /* bitmap of pages that have been sent at least once
+     * only maintained and used in postcopy at the moment
+     * where it's used to send the dirtymap at the start
+     * of the postcopy phase
+     */
+    unsigned long *sentmap;
 };
 
 void process_incoming_migration(QEMUFile *f);
@@ -178,6 +185,11 @@ double xbzrle_mig_cache_miss_rate(void);
 
 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size);
 void ram_debug_dump_bitmap(unsigned long *todump, bool expected);
+/* For outgoing discard bitmap */
+int ram_postcopy_send_discard_bitmap(MigrationState *ms);
+/* For incoming postcopy discard */
+int ram_discard_range(MigrationIncomingState *mis, const char *block_name,
+                      uint64_t start, uint64_t end);
 
 /**
  * @migrate_add_blocker - prevent migration from proceeding
diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h
index d81934f..1d38f76 100644
--- a/include/migration/postcopy-ram.h
+++ b/include/migration/postcopy-ram.h
@@ -16,4 +16,39 @@
 /* Return true if the host supports everything we need to do postcopy-ram */
 bool postcopy_ram_supported_by_host(void);
 
+/*
+ * Discard the contents of memory start..end inclusive.
+ * We can assume that if we've been called postcopy_ram_hosttest returned true
+ */
+int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
+                               uint8_t *end);
+
+
+/*
+ * Called at the start of each RAMBlock by the bitmap code
+ * 'offset' is the bitmap offset of the named RAMBlock in the migration
+ * bitmap.
+ * Returns a new PDS
+ */
+PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
+                                                 unsigned long offset,
+                                                 const char *name);
+
+/*
+ * Called by the bitmap code for each chunk to discard
+ * May send a discard message, may just leave it queued to
+ * be sent later
+ * 'start' and 'end' describe an inclusive range of pages in the
+ * migration bitmap in the RAM block passed to postcopy_discard_send_init
+ */
+void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
+                                unsigned long start, unsigned long end);
+
+/*
+ * Called at the end of each RAMBlock by the bitmap code
+ * Sends any outstanding discard messages, frees the PDS
+ */
+void postcopy_discard_send_finish(MigrationState *ms,
+                                  PostcopyDiscardState *pds);
+
 #endif
diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h
index 611db46..5f130fe 100644
--- a/include/qemu/typedefs.h
+++ b/include/qemu/typedefs.h
@@ -61,6 +61,7 @@ typedef struct PCIExpressHost PCIExpressHost;
 typedef struct PCIHostState PCIHostState;
 typedef struct PCMCIACardState PCMCIACardState;
 typedef struct PixelFormat PixelFormat;
+typedef struct PostcopyDiscardState PostcopyDiscardState;
 typedef struct PropertyInfo PropertyInfo;
 typedef struct Property Property;
 typedef struct QEMUBH QEMUBH;
diff --git a/migration/migration.c b/migration/migration.c
index d69e102..63205c3 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -22,6 +22,7 @@
 #include "block/block.h"
 #include "qemu/sockets.h"
 #include "migration/block.h"
+#include "migration/postcopy-ram.h"
 #include "qemu/thread.h"
 #include "qmp-commands.h"
 #include "trace.h"
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 7704bc1..a10f3ca 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -27,6 +27,22 @@
 #include "qemu/error-report.h"
 #include "trace.h"
 
+#define MAX_DISCARDS_PER_COMMAND 12
+
+struct PostcopyDiscardState {
+    const char *name;
+    uint64_t offset; /* Bitmap entry for the 1st bit of this RAMBlock */
+    uint16_t cur_entry;
+    /*
+     * Start and end address of a discard range; end_list points to the byte
+     * after the end of the range.
+     */
+    uint64_t start_list[MAX_DISCARDS_PER_COMMAND];
+    uint64_t   end_list[MAX_DISCARDS_PER_COMMAND];
+    unsigned int nsentwords;
+    unsigned int nsentcmds;
+};
+
 /* Postcopy needs to detect accesses to pages that haven't yet been copied
  * across, and efficiently map new pages in, the techniques for doing this
  * are target OS specific.
@@ -144,6 +160,22 @@ out:
     return ret;
 }
 
+/*
+ * Discard the contents of memory start..end inclusive.
+ * We can assume that if we've been called postcopy_ram_hosttest returned true
+ */
+int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
+                               uint8_t *end)
+{
+    trace_postcopy_ram_discard_range(start, end);
+    if (madvise(start, (end-start)+1, MADV_DONTNEED)) {
+        error_report("%s MADV_DONTNEED: %s", __func__, strerror(errno));
+        return -1;
+    }
+
+    return 0;
+}
+
 #else
 /* No target OS support, stubs just fail */
 
@@ -153,5 +185,81 @@ bool postcopy_ram_supported_by_host(void)
     return false;
 }
 
+int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start,
+                               uint8_t *end)
+{
+    assert(0);
+}
 #endif
 
+/* ------------------------------------------------------------------------- */
+
+/*
+ * Called at the start of each RAMBlock by the bitmap code
+ * 'offset' is the bitmap offset of the named RAMBlock in the migration
+ * bitmap.
+ * Returns a new PDS
+ */
+PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
+                                                 unsigned long offset,
+                                                 const char *name)
+{
+    PostcopyDiscardState *res = g_try_malloc(sizeof(PostcopyDiscardState));
+
+    if (res) {
+        res->name = name;
+        res->cur_entry = 0;
+        res->nsentwords = 0;
+        res->nsentcmds = 0;
+        res->offset = offset;
+    }
+
+    return res;
+}
+
+/*
+ * Called by the bitmap code for each chunk to discard
+ * May send a discard message, may just leave it queued to
+ * be sent later
+ * 'start' and 'end' describe an inclusive range of pages in the
+ * migration bitmap in the RAM block passed to postcopy_discard_send_init
+ */
+void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
+                                unsigned long start, unsigned long end)
+{
+    size_t tp_bits = qemu_target_page_bits();
+    /* Convert to byte offsets within the RAM block */
+    pds->start_list[pds->cur_entry] = (start - pds->offset) << tp_bits;
+    pds->end_list[pds->cur_entry] = (1 + end - pds->offset) << tp_bits;
+    pds->cur_entry++;
+    pds->nsentwords++;
+
+    if (pds->cur_entry == MAX_DISCARDS_PER_COMMAND) {
+        /* Full set, ship it! */
+        qemu_savevm_send_postcopy_ram_discard(ms->file, pds->name,
+                                              pds->cur_entry,
+                                              pds->start_list, pds->end_list);
+        pds->nsentcmds++;
+        pds->cur_entry = 0;
+    }
+}
+
+/*
+ * Called at the end of each RAMBlock by the bitmap code
+ * Sends any outstanding discard messages, frees the PDS
+ */
+void postcopy_discard_send_finish(MigrationState *ms, PostcopyDiscardState 
*pds)
+{
+    /* Anything unsent? */
+    if (pds->cur_entry) {
+        qemu_savevm_send_postcopy_ram_discard(ms->file, pds->name,
+                                              pds->cur_entry,
+                                              pds->start_list, pds->end_list);
+        pds->nsentcmds++;
+    }
+
+    trace_postcopy_discard_send_finish(pds->name, pds->nsentwords,
+                                       pds->nsentcmds);
+
+    g_free(pds);
+}
diff --git a/savevm.c b/savevm.c
index c2d6241..b7f0846 100644
--- a/savevm.c
+++ b/savevm.c
@@ -1242,7 +1242,6 @@ static int 
loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
     }
     trace_loadvm_postcopy_ram_handle_discard_header(ramid, len);
     while (len) {
-        /* TODO - ram_discard_range gets added in a later patch
         uint64_t start_addr, end_addr;
         start_addr = qemu_get_be64(mis->file);
         end_addr = qemu_get_be64(mis->file);
@@ -1252,7 +1251,6 @@ static int 
loadvm_postcopy_ram_handle_discard(MigrationIncomingState *mis,
         if (ret) {
             return ret;
         }
-        */
     }
     trace_loadvm_postcopy_ram_handle_discard_end();
 
diff --git a/trace-events b/trace-events
index 8983bde..6ab309d 100644
--- a/trace-events
+++ b/trace-events
@@ -1219,6 +1219,7 @@ qemu_file_fclose(void) ""
 migration_bitmap_sync_start(void) ""
 migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64""
 migration_throttle(void) ""
+ram_postcopy_send_discard_bitmap(void) ""
 
 # hw/display/qxl.c
 disable qxl_interface_set_mm_time(int qid, uint32_t mm_time) "%d %d"
@@ -1479,6 +1480,10 @@ rdma_start_incoming_migration_after_rdma_listen(void) ""
 rdma_start_outgoing_migration_after_rdma_connect(void) ""
 rdma_start_outgoing_migration_after_rdma_source_init(void) ""
 
+# migration/postcopy-ram.c
+postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s 
mask words sent=%d in %d commands"
+postcopy_ram_discard_range(void *start, void *end) "%p,%p"
+
 # kvm-all.c
 kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"
 kvm_vm_ioctl(int type, void *arg) "type 0x%x, arg %p"
-- 
2.1.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]