qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH RFC 1/2] migration: implement checkpoint loading


From: Bohdan Trach
Subject: [Qemu-devel] [PATCH RFC 1/2] migration: implement checkpoint loading
Date: Tue, 24 Nov 2015 17:42:28 +0100

This commit adds functions used to open the checkpoint saved by the
dump-guest-memory command and populate the hash table used by the
checkpoint-assisted migration mechanism. SHA256 is used to checkpoint
the pages. Only ELF memory dump format is supported at the moment.

Signed-off-by: Bohdan Trach <address@hidden>
---
 include/migration/migration.h |   4 ++
 migration/ram.c               | 157 ++++++++++++++++++++++++++++++++++++++++++
 qemu-options.hx               |   9 +++
 trace-events                  |   3 +
 vl.c                          |   9 +++
 5 files changed, 182 insertions(+)

diff --git a/include/migration/migration.h b/include/migration/migration.h
index fd018b7..4904c85 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -321,4 +321,8 @@ int ram_save_queue_pages(MigrationState *ms, const char 
*rbname,
 PostcopyState postcopy_state_get(void);
 /* Set the state and return the old state */
 PostcopyState postcopy_state_set(PostcopyState new_state);
+
+void allocate_checksum_table(void);
+void init_checksum_lookup_table(const char *checkpoint_path);
+extern const char *checkpoint_path;
 #endif
diff --git a/migration/ram.c b/migration/ram.c
index 1eb155a..379a381 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -27,6 +27,7 @@
  */
 #include <stdint.h>
 #include <zlib.h>
+#include <elf.h>
 #include "qemu/bitops.h"
 #include "qemu/bitmap.h"
 #include "qemu/timer.h"
@@ -39,6 +40,7 @@
 #include "trace.h"
 #include "exec/ram_addr.h"
 #include "qemu/rcu_queue.h"
+#include "crypto/hash.h"
 
 #ifdef DEBUG_MIGRATION_RAM
 #define DPRINTF(fmt, ...) \
@@ -48,6 +50,159 @@
     do { } while (0)
 #endif
 
+#define SHA256_DIGEST_LENGTH 32
+static int fd_checkpoint = -1;
+/* indexed by page number */
+static uint64_t hashes_size = 0;
+static uint64_t hashes_entries = 0;
+static uint8_t *hashes = 0;
+
+typedef struct  {
+    uint8_t hash[SHA256_DIGEST_LENGTH];
+    uint64_t offset;
+} hash_offset_entry;
+
+static uint64_t hash_offset_entries = 0;
+static uint64_t max_hash_offset_entries;
+static hash_offset_entry* hash_offset_array = 0;
+static uint8_t all_zeroes_hash[SHA256_DIGEST_LENGTH];
+
+static inline void SHA256(void *data, size_t data_len, void* digest)
+{
+    uint8_t *out = NULL;
+    size_t rlen = 0;
+    qcrypto_hash_bytes(QCRYPTO_HASH_ALG_SHA256, data, data_len, &out, &rlen, 
NULL);
+    assert(rlen == SHA256_DIGEST_LENGTH);
+    memcpy(digest, out, rlen);
+    g_free(out);
+}
+
+static char* sha256s(const uint8_t *digest) {
+    /* SHA256 is 32 bytes, i.e., 64 hexadecimal digits. + 1 for trailing \0. */
+    static const size_t size = 64 + 1;
+    static char hex_digits[64 + 1];
+    int digit;
+
+    for (digit = 0; digit < 64; digit += 2) {
+        snprintf(hex_digits+digit, 3, "%02x", digest[digit/2]);
+    }
+
+    hex_digits[size-1] = '\0';
+    return hex_digits;
+}
+
+static int uint256_compare(const void* x, const void* y)
+{
+    return memcmp(x, y, SHA256_DIGEST_LENGTH);
+}
+
+static int cmp_hash_offset_entry(const void* a, const void* b) {
+    hash_offset_entry* e = (hash_offset_entry*) a;
+    hash_offset_entry* f = (hash_offset_entry*) b;
+
+    return memcmp(e->hash, f->hash, SHA256_DIGEST_LENGTH);
+}
+
+void allocate_checksum_table(void) {
+    RAMBlock *block;
+    size_t sz = 0;
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+        sz += block->used_length;
+    }
+
+    max_hash_offset_entries = hashes_entries = (sz / TARGET_PAGE_SIZE);
+    trace_allocate_checksum_table(hashes_entries);
+    hashes_size = hashes_entries * SHA256_DIGEST_LENGTH;
+
+    hashes = g_try_malloc0(hashes_size);
+    if (!hashes) {
+        error_report("Error allocating hashes");
+        return;
+    }
+
+    uint8_t all_zeroes[TARGET_PAGE_SIZE];
+    bzero(all_zeroes, TARGET_PAGE_SIZE);
+    SHA256(all_zeroes, TARGET_PAGE_SIZE, all_zeroes_hash);
+
+    hash_offset_array = g_try_malloc0(max_hash_offset_entries * 
sizeof(hash_offset_entry));
+    if (!hash_offset_array) {
+        error_report("Error allocating hash_offset_array");
+        return;
+    }
+}
+
+/* phdr.p_offset + phdr.p_memsz is the beginning of the dumped memory */
+static off_t seek_elf64(int f)
+{
+    Elf64_Ehdr elf;
+    Elf64_Phdr phdr;
+    off_t off;
+
+    assert(sizeof(elf) == read(f, &elf, sizeof(elf)));
+    assert(sizeof(phdr) == read(f, &phdr, sizeof(phdr)));
+    off = lseek(f, phdr.p_offset + phdr.p_memsz, SEEK_SET);
+    return off;
+}
+
+static off_t seek_elf32(int f)
+{
+    Elf32_Ehdr elf;
+    Elf32_Phdr phdr;
+    off_t off;
+
+    assert(sizeof(elf) == read(f, &elf, sizeof(elf)));
+    assert(sizeof(phdr) == read(f, &phdr, sizeof(phdr)));
+    off = lseek(f, phdr.p_offset + phdr.p_memsz, SEEK_SET);
+    return off;
+}
+
+static off_t seek_to_memory(int checkpoint_fd)
+{
+    char ident[16];
+    assert(16 == read(checkpoint_fd, ident, sizeof(ident)));
+    /* seek_elf* expect zero offset */
+    lseek(checkpoint_fd, 0, SEEK_SET);
+    if (ident[EI_CLASS] == ELFCLASS64) {
+        return seek_elf64(checkpoint_fd);
+    } else {
+        return seek_elf32(checkpoint_fd);
+    }
+}
+
+void init_checksum_lookup_table(const char *checkpoint_path)
+{
+    ssize_t rc;
+    uint8_t* pg;
+    struct stat sb;
+    uint64_t idx;
+
+    trace_init_checksum_lookup_table_start(ram_size);
+
+    rc = stat(checkpoint_path, &sb);
+    if (rc == -1 && errno == ENOENT) return;
+    assert(rc == 0);
+
+    pg = g_malloc0(TARGET_PAGE_SIZE);
+    fd_checkpoint = qemu_open(checkpoint_path, O_RDONLY);
+    assert(fd_checkpoint != -1);
+
+    for (idx = seek_to_memory(fd_checkpoint); idx < sb.st_size;
+         idx += TARGET_PAGE_SIZE) {
+        rc = read(fd_checkpoint, pg, TARGET_PAGE_SIZE);
+        assert(rc == TARGET_PAGE_SIZE);
+        assert(hash_offset_entries < max_hash_offset_entries);
+        SHA256(pg, TARGET_PAGE_SIZE, 
hash_offset_array[hash_offset_entries].hash);
+        hash_offset_array[hash_offset_entries].offset = idx;
+        trace_init_checksum_lookup_table_hash(
+            sha256s(hash_offset_array[hash_offset_entries].hash),
+            hash_offset_array[hash_offset_entries].offset);
+        hash_offset_entries++;
+    };
+
+    qsort(hash_offset_array, hash_offset_entries, sizeof(hash_offset_entry),
+          cmp_hash_offset_entry);
+    g_free(pg);
+}
 static int dirty_rate_high_cnt;
 
 static uint64_t bitmap_sync_count;
@@ -1874,6 +2029,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
     migration_bitmap_sync_init();
     qemu_mutex_init(&migration_bitmap_mutex);
 
+    qsort(hashes, hashes_entries, SHA256_DIGEST_LENGTH, uint256_compare);
+
     if (migrate_use_xbzrle()) {
         XBZRLE_cache_lock();
         XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
diff --git a/qemu-options.hx b/qemu-options.hx
index 0eea4ee..1913375 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3557,6 +3557,15 @@ Dump json-encoded vmstate information for current 
machine type to file
 in @var{file}
 ETEXI
 
+DEF("checkpoint", HAS_ARG, QEMU_OPTION_checkpoint,
+    "-checkpoint file         path to checkpoint file\n", QEMU_ARCH_ALL)
+STEXI
address@hidden -checkpoint @var{path}
address@hidden -checkpoint
+Checkpoint file to use during incoming migrations. Reduces network
+traffic and total migration time.
+ETEXI
+
 DEFHEADING(Generic object creation)
 
 DEF("object", HAS_ARG, QEMU_OPTION_object,
diff --git a/trace-events b/trace-events
index 0b0ff02..eee060b 100644
--- a/trace-events
+++ b/trace-events
@@ -1264,6 +1264,9 @@ migration_throttle(void) ""
 ram_load_postcopy_loop(uint64_t addr, int flags) "@%" PRIx64 " %x"
 ram_postcopy_send_discard_bitmap(void) ""
 ram_save_queue_pages(const char *rbname, size_t start, size_t len) "%s: start: 
%zx len: %zx"
+allocate_checksum_table(uint64_t  npages) "pages=%" PRIu64
+init_checksum_lookup_table_start(uint64_t ram_size) "ram_size=%" PRIu64
+init_checksum_lookup_table_hash(const char* hash, uint64_t offset) "hash=%s 
offset=%" PRIu64
 
 # hw/display/qxl.c
 disable qxl_interface_set_mm_time(int qid, uint32_t mm_time) "%d %d"
diff --git a/vl.c b/vl.c
index 525929b..2dfac86 100644
--- a/vl.c
+++ b/vl.c
@@ -138,6 +138,7 @@ int display_opengl;
 static int display_remote;
 const char* keyboard_layout = NULL;
 ram_addr_t ram_size;
+const char *checkpoint_path = NULL;
 const char *mem_path = NULL;
 int mem_prealloc = 0; /* force preallocation of physical target memory */
 bool enable_mlock = false;
@@ -3355,6 +3356,9 @@ int main(int argc, char **argv, char **envp)
                 }
                 break;
 #endif
+            case QEMU_OPTION_checkpoint:
+                checkpoint_path = optarg;
+                break;
             case QEMU_OPTION_mempath:
                 mem_path = optarg;
                 break;
@@ -4653,6 +4657,7 @@ int main(int argc, char **argv, char **envp)
         }
     }
 
+    allocate_checksum_table();
     qdev_prop_check_globals();
     if (vmstate_dump_file) {
         /* dump and exit */
@@ -4662,6 +4667,10 @@ int main(int argc, char **argv, char **envp)
 
     if (incoming) {
         Error *local_err = NULL;
+        if (checkpoint_path) {
+            init_checksum_lookup_table(checkpoint_path);
+        }
+
         qemu_start_incoming_migration(incoming, &local_err);
         if (local_err) {
             error_report("-incoming %s: %s", incoming,
-- 
2.4.10




reply via email to

[Prev in Thread] Current Thread [Next in Thread]