qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH] XBRLE page delta compression for live migration of


From: Shribman, Aidan
Subject: [Qemu-devel] [PATCH] XBRLE page delta compression for live migration of large memory apps
Date: Sun, 22 May 2011 14:00:39 +0200

Subject: [PATCH] XBRLE page delta compression for live migration of large 
memory apps
From: Aidan Shribman <address@hidden>

By using XBRLE (Xor Based Run-Length-Encoding) we can reduce required
bandwidth for transfering of dirty memory pages during live migration
        migrate_set_cachesize <size>
        migrate -x <url>
Qemu host: Ubuntu 10.10
Testing: live migration (w and w/o XBRLE) tested successfully.

Signed-off-by: Benoit Hudzia <address@hidden>
Signed-off-by: Petter Svard <address@hidden>
Signed-off-by: Aidan Shribman <address@hidden>

---

 arch_init.c       |  647 +++++++++++++++++++++++++++++++++++++++++++++++++----
 block-migration.c |    3 +-
 hmp-commands.hx   |   36 +++-
 hw/hw.h           |    3 +-
 migration-exec.c  |    6 +-
 migration-fd.c    |    6 +-
 migration-tcp.c   |    6 +-
 migration-unix.c  |    6 +-
 migration.c       |   33 +++-
 migration.h       |   23 ++-
 qmp-commands.hx   |   43 +++-
 savevm.c          |   13 +-
 sysemu.h          |    3 +-
 13 files changed, 749 insertions(+), 79 deletions(-)

diff --git a/arch_init.c b/arch_init.c
index 4486925..069cd67 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -27,6 +27,7 @@
 #include <sys/types.h>
 #include <sys/mman.h>
 #endif
+#include <assert.h>
 #include "config.h"
 #include "monitor.h"
 #include "sysemu.h"
@@ -41,6 +42,24 @@
 #include "gdbstub.h"
 #include "hw/smbios.h"

+//#define DEBUG_ARCH_INIT
+#ifdef DEBUG_ARCH_INIT
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+//#define DEBUG_ARCH_INIT_CKSUM
+#ifdef DEBUG_ARCH_INIT_CKSUM
+#define PAGE_LOG(addr, pdata, fmt, ...) \
+    do { page_log(addr, pdata, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define PAGE_LOG(addr, pdata, fmt, ...) \
+    do { } while (0)
+#endif
+
 #ifdef TARGET_SPARC
 int graphic_width = 1024;
 int graphic_height = 768;
@@ -88,6 +107,402 @@ const uint32_t arch_type = QEMU_ARCH;
 #define RAM_SAVE_FLAG_PAGE     0x08
 #define RAM_SAVE_FLAG_EOS      0x10
 #define RAM_SAVE_FLAG_CONTINUE 0x20
+#define RAM_SAVE_FLAG_XBRLE    0x40
+
+/***********************************************************/
+/* Page cache for storing previous pages as basis for XBRLE compression */
+#define CACHE_N_WAY 2 /* 2-way assossiative cache */
+
+typedef struct CacheItem {
+    ram_addr_t it_addr;
+    unsigned long it_age;
+    uint8_t *it_data;
+} CacheItem;
+
+typedef struct CacheBucket {
+    CacheItem bkt_item[CACHE_N_WAY];
+} CacheBucket;
+
+static CacheBucket *page_cache;
+static int64_t cache_num_buckets;
+static uint64_t cache_max_item_age;
+static int64_t cache_num_items;
+
+static void cache_init(ssize_t num_buckets);
+static void cache_fini(void);
+static int cache_is_cached(ram_addr_t addr);
+static int cache_get_oldest(CacheBucket *buck);
+static int cache_get_newest(CacheBucket *buck, ram_addr_t addr);
+static void cache_insert(ram_addr_t id, uint8_t *pdata);
+static unsigned long cache_get_cache_pos(ram_addr_t address);
+static CacheItem *cache_item_get(unsigned long pos, int item);
+
+/***********************************************************/
+/* RAM Migration State */
+typedef struct ArchMigrationState {
+    int use_xbrle;
+    int64_t xbrle_cache_size;
+} ArchMigrationState;
+
+static ArchMigrationState arch_mig_state;
+
+void arch_set_params(int blk_enable, int shared_base, int use_xbrle,
+        int64_t xbrle_cache_size, void *opaque)
+{
+    arch_mig_state.use_xbrle = use_xbrle;
+    arch_mig_state.xbrle_cache_size = xbrle_cache_size;
+}
+
+/***********************************************************/
+/* XBRLE (Xor Based Run-Length Encoding) */
+typedef struct XBRLEHeader {
+    uint8_t xh_flags;
+    uint16_t xh_len;
+    uint32_t xh_cksum;
+} XBRLEHeader;
+
+static int rle_encode(uint8_t *src, int slen, uint8_t *dst, int dlen);
+static int rle_decode(uint8_t *src, int slen, uint8_t *dst, int dlen);
+#ifdef DEBUG_ARCH_INIT_CKSUM
+static uint32_t page_cksum(uint8_t *buf);
+static void page_log(ram_addr_t addr, uint8_t *pdata, const char *fmt, ...);
+#endif
+
+/***********************************************************/
+/* benchmarking */
+typedef struct BenchInfo {
+    uint64_t normal_pages;
+    uint64_t xbrle_pages;
+    uint64_t xbrle_bytes;
+    uint64_t xbrle_pages_aborted;
+    uint64_t dup_pages;
+    uint64_t iterations;
+} BenchInfo;
+
+static BenchInfo bench;
+
+/***********************************************************/
+/* XBRLE page cache implementation */
+static CacheItem *cache_item_get(unsigned long pos, int item)
+{
+    assert(page_cache);
+    return &page_cache[pos].bkt_item[item];
+}
+
+#ifdef DEBUG_ARCH_INIT_CKSUM
+static int64_t cache_max_items(void)
+{
+    return cache_num_buckets * CACHE_N_WAY;
+}
+#endif /* DEBUG_ARCH_INIT_CKSUM */
+
+static void cache_init(int64_t num_bytes)
+{
+    int i;
+
+    cache_num_items = 0;
+    cache_max_item_age = 0;
+    cache_num_buckets = num_bytes / (TARGET_PAGE_SIZE * CACHE_N_WAY);
+    assert(cache_num_buckets);
+    DPRINTF("Setting cache buckets to %ld\n", cache_num_buckets);
+
+    assert(!page_cache);
+    page_cache = (CacheBucket *)qemu_mallocz((cache_num_buckets) *
+            sizeof(CacheBucket));
+
+    for (i = 0; i < cache_num_buckets; i++) {
+        int j;
+        for (j = 0; j < CACHE_N_WAY; j++) {
+            CacheItem *it = cache_item_get(i, j);
+            it->it_data = NULL;
+            it->it_age = 0;
+            it->it_addr = -1;
+        }
+    }
+}
+
+static void cache_fini(void)
+{
+    int i;
+
+    assert(page_cache);
+
+    for (i = 0; i < cache_num_buckets; i++) {
+        int j;
+        for (j = 0; j < CACHE_N_WAY; j++) {
+            CacheItem *it = cache_item_get(i, j);
+            qemu_free(it->it_data);
+            it->it_data = 0;
+        }
+    }
+
+    qemu_free(page_cache);
+    page_cache = NULL;
+}
+
+static unsigned long cache_get_cache_pos(ram_addr_t address)
+{
+    unsigned long pos;
+
+    assert(cache_num_buckets);
+    pos = (address/TARGET_PAGE_SIZE) & (cache_num_buckets - 1);
+    return pos;
+}
+
+static int cache_get_newest(CacheBucket *buck, ram_addr_t addr)
+{
+    unsigned long big = 0;
+    int big_pos = -1;
+    int j;
+
+    assert(page_cache);
+
+    for (j = 0; j < CACHE_N_WAY; j++) {
+        CacheItem *it = &buck->bkt_item[j];
+
+        if (it->it_addr != addr) {
+            continue;
+        }
+
+        if (!j || it->it_age > big) {
+            big = it->it_age;
+            big_pos = j;
+        }
+    }
+
+    return big_pos;
+}
+
+static int cache_get_oldest(CacheBucket *buck)
+{
+    unsigned long small = 0;
+    int small_pos = -1;
+    int j;
+
+    assert(page_cache);
+
+    for (j = 0; j < CACHE_N_WAY; j++) {
+        CacheItem *it = &buck->bkt_item[j];
+
+        if (!j || it->it_age <  small) {
+            small = it->it_age;
+            small_pos = j;
+        }
+    }
+
+    return small_pos;
+}
+
+static int cache_is_cached(ram_addr_t addr)
+{
+    unsigned long pos = cache_get_cache_pos(addr);
+
+    assert(page_cache);
+    CacheBucket *bucket = &page_cache[pos];
+    return cache_get_newest(bucket, addr);
+}
+
+static void cache_insert(unsigned long addr, uint8_t *pdata)
+{
+    unsigned long pos;
+    int slot = -1;
+    CacheBucket *bucket;
+
+    pos = cache_get_cache_pos(addr);
+    assert(page_cache);
+    bucket = &page_cache[pos];
+    slot = cache_get_oldest(bucket); /* evict LRU */
+
+    /* actual update of entry */
+    CacheItem *it = cache_item_get(pos, slot);
+    if (!it->it_data) {
+        cache_num_items++;
+    }
+    qemu_free(it->it_data);
+    it->it_data = pdata;
+    it->it_age = ++cache_max_item_age;
+    it->it_addr = addr;
+}
+
+/* XBRLE (Xor Based Run-Length Encoding) */
+static int rle_encode(uint8_t *src, int slen, uint8_t *dst, int dlen)
+{
+    int d = 0, ch_run = 0, i;
+    uint8_t prev, ch;
+
+    for (i = 0; i <= slen; i++) {
+        if (i != slen) {
+            ch = src[i];
+        }
+
+        if (!i || (i != slen && ch == prev && ch_run < 255)) {
+            ch_run++;
+        } else {
+            if (d+2 > dlen)
+                return -1;
+            *dst++ = ch_run;
+            *dst++ = prev;
+            d += 2;
+            ch_run = 1;
+        }
+
+        prev = ch;
+    }
+    return d;
+}
+
+static int rle_decode(uint8_t *src, int slen, uint8_t *dst, int dlen)
+{
+    int d = 0, s;
+
+    for (s = 0; s < slen-1; s += 2) {
+        uint8_t ch_run = src[s];
+        uint8_t ch = src[s+1];
+        while (ch_run--) {
+            if (d == dlen) {
+                return -1;
+            }
+            dst[d] = ch;
+            d++;
+        }
+    }
+    return d;
+}
+
+#define PAGE_SAMPLE_PERCENT 0.02
+#define PAGE_SAMPLE_SIZE (TARGET_PAGE_SIZE * PAGE_SAMPLE_PERCENT)
+#define BYTES_CHANGED_PERCENT 0.30
+
+static int is_page_good_for_xbrle(uint8_t *old, uint8_t *new)
+{
+    int i, bytes_changed = 0;
+
+    srand(time(NULL)+getpid()+getpid()*987654+rand());
+
+    for (i = 0; i < PAGE_SAMPLE_SIZE; i++) {
+        unsigned long pos = (int) (rand() * TARGET_PAGE_SIZE / (RAND_MAX+1.0));
+
+         if (old[pos] != new[pos]) {
+             bytes_changed++;
+         }
+    }
+
+    return (((float) bytes_changed) / PAGE_SAMPLE_SIZE) < 
BYTES_CHANGED_PERCENT;
+}
+
+static void xor_encode(uint8_t *dst, uint8_t *src1, uint8_t *src2)
+{
+    int i;
+
+    for (i = 0; i < TARGET_PAGE_SIZE; i++) {
+        dst[i] = src1[i] ^ src2[i];
+    }
+}
+
+static void save_block_hdr(QEMUFile *f,
+        RAMBlock *block, ram_addr_t offset, int cont, int flag)
+{
+        qemu_put_be64(f, offset | cont | flag);
+        if (!cont) {
+                qemu_put_byte(f, strlen(block->idstr));
+                qemu_put_buffer(f, (uint8_t *)block->idstr,
+                                strlen(block->idstr));
+        }
+}
+
+#define ENCODING_FLAG_XBRLE 0x1
+#define ENCODING_FLAG_CKSUM 0x2
+
+static int save_xbrle_page(QEMUFile *f, uint8_t *current_data,
+        ram_addr_t current_addr, RAMBlock *block, ram_addr_t offset, int cont)
+{
+    int cache_location = -1, slot = -1, encoded_len = 0, bytes_sent = 0;
+    XBRLEHeader hdr = {0};
+    CacheItem *it;
+    uint8_t *xor_buf = NULL, *xbrle_buf = NULL;
+
+    /* get location */
+    slot = cache_is_cached(current_addr);
+    if (slot == -1) {
+        goto done;
+    }
+    cache_location = cache_get_cache_pos(current_addr);
+
+    /* abort if page changed too much */
+    it = cache_item_get(cache_location, slot);
+    if (!is_page_good_for_xbrle(it->it_data, current_data)) {
+        DPRINTF("Page changed too much! Aborting XBRLE.\n");
+        bench.xbrle_pages_aborted++;
+        goto done;
+    }
+
+    /* XOR encoding */
+    xor_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
+    xor_encode(xor_buf, it->it_data, current_data);
+
+    /* XBRLE (XOR+RLE) encoding */
+    xbrle_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
+    encoded_len = rle_encode(xor_buf, TARGET_PAGE_SIZE, xbrle_buf,
+            TARGET_PAGE_SIZE);
+
+    if (encoded_len < 0) {
+        DPRINTF("XBRLE encoding oeverflow - sending uncompressed\n");
+        goto done;
+    }
+
+    hdr.xh_len = encoded_len;
+    hdr.xh_flags |= ENCODING_FLAG_XBRLE;
+#ifdef DEBUG_ARCH_INIT_CKSUM
+    hdr.xh_cksum = page_cksum(current_data);
+    hdr.xh_flags |= ENCODING_FLAG_CKSUM;
+#endif
+
+    /* Send XBRLE compressed page */
+    save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBRLE);
+    qemu_put_buffer(f, (uint8_t *) &hdr, sizeof(hdr));
+    qemu_put_buffer(f, xbrle_buf, encoded_len);
+    PAGE_LOG(current_addr, current_data, "XBRLE page (enc len %d)\n",
+            encoded_len);
+    bench.xbrle_pages++;
+    bytes_sent = encoded_len + sizeof(hdr);
+    bench.xbrle_bytes += bytes_sent;
+
+done:
+    qemu_free(xor_buf);
+    qemu_free(xbrle_buf);
+    return bytes_sent;
+}
+
+#ifdef DEBUG_ARCH_INIT_CKSUM
+static uint32_t page_cksum(uint8_t *buf)
+{
+    uint32_t res = 0;
+    int i;
+
+    for (i = 0; i < TARGET_PAGE_SIZE; i++) {
+        res += buf[i];
+    }
+
+    return res;
+}
+
+static void page_log(ram_addr_t addr, uint8_t *pdata, const char *fmt, ...)
+{
+    va_list arg_ptr;
+    static FILE *fp;
+    static uint32_t page_seq;
+
+    va_start(arg_ptr, fmt);
+    if (!fp) {
+        fp = fopen("mig.log", "w");
+    }
+    page_seq++;
+    fprintf(fp, "[seq %d addr 0x%lX cksum 0x%X] ", page_seq,
+            (unsigned long) addr, page_cksum(pdata));
+    vfprintf(fp, fmt, arg_ptr);
+    va_end(arg_ptr);
+}
+#endif /* DEBUG_ARCH_INIT_CKSUM */

 static int is_dup_page(uint8_t *page, uint8_t ch)
 {
@@ -107,7 +522,7 @@ static int is_dup_page(uint8_t *page, uint8_t ch)
 static RAMBlock *last_block;
 static ram_addr_t last_offset;

-static int ram_save_block(QEMUFile *f)
+static int ram_save_block(QEMUFile *f, int stage)
 {
     RAMBlock *block = last_block;
     ram_addr_t offset = last_offset;
@@ -128,28 +543,32 @@ static int ram_save_block(QEMUFile *f)
                                             current_addr + TARGET_PAGE_SIZE,
                                             MIGRATION_DIRTY_FLAG);

-            p = block->host + offset;
+            p = qemu_mallocz(TARGET_PAGE_SIZE);
+            memcpy(p, block->host + offset, TARGET_PAGE_SIZE);

             if (is_dup_page(p, *p)) {
-                qemu_put_be64(f, offset | cont | RAM_SAVE_FLAG_COMPRESS);
-                if (!cont) {
-                    qemu_put_byte(f, strlen(block->idstr));
-                    qemu_put_buffer(f, (uint8_t *)block->idstr,
-                                    strlen(block->idstr));
-                }
+                save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS);
                 qemu_put_byte(f, *p);
                 bytes_sent = 1;
-            } else {
-                qemu_put_be64(f, offset | cont | RAM_SAVE_FLAG_PAGE);
-                if (!cont) {
-                    qemu_put_byte(f, strlen(block->idstr));
-                    qemu_put_buffer(f, (uint8_t *)block->idstr,
-                                    strlen(block->idstr));
+                bench.dup_pages++;
+                PAGE_LOG(current_addr, p, "DUP page\n");
+            } else if (stage == 2 && arch_mig_state.use_xbrle) {
+                bytes_sent = save_xbrle_page(f, p, current_addr, block,
+                    offset, cont);
+                if (bytes_sent) {
+                    cache_insert(current_addr, p);
                 }
+            }
+            if (!bytes_sent) {
+                save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
                 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
                 bytes_sent = TARGET_PAGE_SIZE;
+                bench.normal_pages++;
+                PAGE_LOG(current_addr, p, "NORMAL page\n");
+                if (arch_mig_state.use_xbrle) {
+                    cache_insert(current_addr, p);
+                }
             }
-
             break;
         }

@@ -212,6 +631,55 @@ uint64_t ram_bytes_total(void)
     return total;
 }

+#ifdef DEBUG_ARCH_INIT
+static void dump_percentage(const char *label, unsigned long absolute,
+        unsigned long total)
+{
+    printf("%s: %ld (%0.2f%%)\n", label, absolute,
+            (total ? (100.0 * absolute / total) : 0));
+}
+
+static void dump_migration_statistics(void)
+{
+    unsigned long normal_bytes = bench.normal_pages * TARGET_PAGE_SIZE;
+    unsigned long total_pages = bench.normal_pages + bench.xbrle_pages
+        + bench.dup_pages;
+    unsigned long total_bytes = normal_bytes + bench.xbrle_bytes
+        + bench.dup_pages;
+
+    printf("\n");
+    printf("=====================================================\n");
+    printf("Save VM Memory Statistics (SUCCESS or FAILURE):\n");
+    printf("Iterations: %ld\n", bench.iterations);
+
+    dump_percentage("Normal pages", bench.normal_pages, total_pages);
+    dump_percentage("Normal bytes", normal_bytes, total_bytes);
+
+    dump_percentage("Dup pages", bench.dup_pages, total_pages);
+    dump_percentage("Dup bytes", bench.dup_pages, total_bytes);
+
+    if (arch_mig_state.use_xbrle) {
+        dump_percentage("XBRLE pages", bench.xbrle_pages, total_pages);
+        dump_percentage("XBRLE bytes", bench.xbrle_bytes, total_bytes);
+        dump_percentage("Aborted XBRLE pages from XBRLE",
+            bench.xbrle_pages_aborted,
+            bench.xbrle_pages + bench.xbrle_pages_aborted);
+    }
+
+    dump_percentage("Total pages", total_pages, total_pages);
+    dump_percentage("Total bytes", total_bytes, total_bytes);
+
+    if (arch_mig_state.use_xbrle) {
+        printf("Cache number of inserts: %ld\n", cache_max_item_age);
+        printf("Cache max items: %ld\n", cache_max_items());
+        dump_percentage("Cache number of items", cache_num_items,
+            cache_max_items());
+    }
+
+    printf("=====================================================\n");
+}
+#endif /* DEBUG_ARCH_INIT_CKSUM */
+
 int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
 {
     ram_addr_t addr;
@@ -235,6 +703,10 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, 
void *opaque)
         last_block = NULL;
         last_offset = 0;

+        if (arch_mig_state.use_xbrle) {
+            cache_init(arch_mig_state.xbrle_cache_size);
+        }
+
         /* Make sure all dirty bits are set */
         QLIST_FOREACH(block, &ram_list.blocks, next) {
             for (addr = block->offset; addr < block->offset + block->length;
@@ -264,8 +736,9 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, 
void *opaque)
     while (!qemu_file_rate_limit(f)) {
         int bytes_sent;

-        bytes_sent = ram_save_block(f);
+        bytes_sent = ram_save_block(f, stage);
         bytes_transferred += bytes_sent;
+        bench.iterations++;
         if (bytes_sent == 0) { /* no more blocks */
             break;
         }
@@ -285,19 +758,83 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, 
void *opaque)
         int bytes_sent;

         /* flush all remaining blocks regardless of rate limiting */
-        while ((bytes_sent = ram_save_block(f)) != 0) {
+        while ((bytes_sent = ram_save_block(f, stage))) {
             bytes_transferred += bytes_sent;
         }
         cpu_physical_memory_set_dirty_tracking(0);
+        if (arch_mig_state.use_xbrle) {
+            cache_fini();
+#ifdef DEBUG_ARCH_INIT
+            dump_migration_statistics();
+#endif
+        }
     }

     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);

     expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;

+    DPRINTF("ram_save_live: expected(%ld) <= max(%ld)?\n", expected_time,
+        migrate_max_downtime());
+
     return (stage == 2) && (expected_time <= migrate_max_downtime());
 }

+static int load_xbrle(QEMUFile *f, ram_addr_t addr, void *host)
+{
+    int ret, rc = -1;
+    uint8_t *prev_page, *xor_buf, *xbrle_buf;
+    XBRLEHeader hdr = {0};
+
+    /* extract RLE header */
+    qemu_get_buffer(f, (uint8_t *) &hdr, sizeof(hdr));
+    if (!(hdr.xh_flags & ENCODING_FLAG_XBRLE)) {
+        fprintf(stderr, "Failed to load XBRLE page - wrong compression!\n");
+        goto done;
+    }
+
+    if (hdr.xh_len > TARGET_PAGE_SIZE) {
+        fprintf(stderr, "Failed to load XBRLE page - len overflow!\n");
+        goto done;
+    }
+
+    /* load data and decode */
+    xbrle_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
+    qemu_get_buffer(f, xbrle_buf, hdr.xh_len);
+
+    /* decode RLE */
+    xor_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
+    ret = rle_decode(xbrle_buf, hdr.xh_len, xor_buf, TARGET_PAGE_SIZE);
+    if (ret == -1) {
+        fprintf(stderr, "Failed to load XBRLE page - decode error!\n");
+        goto done;
+    }
+
+    if (ret != TARGET_PAGE_SIZE) {
+        fprintf(stderr, "Failed to load XBRLE page - size %d expected %d!\n",
+            ret, TARGET_PAGE_SIZE);
+        goto done;
+    }
+
+    /* decode XOR delta */
+    prev_page = host;
+    xor_encode(prev_page, prev_page, xor_buf);
+#ifdef DEBUG_ARCH_INIT_CKSUM
+    if (hdr.xh_flags & ENCODING_FLAG_CKSUM &&
+            hdr.xh_cksum != page_cksum(prev_page)) {
+        fprintf(stderr, "Failed to load XBRLE page - bad checksum!\n");
+        goto done;
+    }
+#endif
+
+    PAGE_LOG(addr, prev_page, "XBRLE page (enc len %d)\n", hdr.xh_len);
+    rc = 0;
+done:
+    qemu_free(xor_buf);
+    qemu_free(xbrle_buf);
+    return rc;
+}
+
 static inline void *host_from_stream_offset(QEMUFile *f,
                                             ram_addr_t offset,
                                             int flags)
@@ -328,16 +865,38 @@ static inline void *host_from_stream_offset(QEMUFile *f,
     return NULL;
 }

+static inline void *host_from_stream_offset_versioned(int version_id,
+        QEMUFile *f, ram_addr_t offset, int flags)
+{
+        void *host;
+        if (version_id == 3) {
+                host = qemu_get_ram_ptr(offset);
+        } else {
+                host = host_from_stream_offset(f, offset, flags);
+        }
+        if (!host) {
+            fprintf(stderr, "Failed to convert RAM address to host"
+                    " for offset 0x%lX!\n", offset);
+            abort();
+        }
+        return host;
+}
+
 int ram_load(QEMUFile *f, void *opaque, int version_id)
 {
     ram_addr_t addr;
-    int flags;
+    int flags, ret = 0;
+    static uint64_t seq_iter;
+
+    seq_iter++;

     if (version_id < 3 || version_id > 4) {
-        return -EINVAL;
+        ret = -EINVAL;
+        goto done;
     }

     do {
+        void *host;
         addr = qemu_get_be64(f);

         flags = addr & ~TARGET_PAGE_MASK;
@@ -346,7 +905,8 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
         if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
             if (version_id == 3) {
                 if (addr != ram_bytes_total()) {
-                    return -EINVAL;
+                    ret = -EINVAL;
+                    goto done;
                 }
             } else {
                 /* Synchronize RAM block list */
@@ -365,8 +925,10 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)

                     QLIST_FOREACH(block, &ram_list.blocks, next) {
                         if (!strncmp(id, block->idstr, sizeof(id))) {
-                            if (block->length != length)
-                                return -EINVAL;
+                            if (block->length != length) {
+                                ret = -EINVAL;
+                                goto done;
+                            }
                             break;
                         }
                     }
@@ -374,7 +936,8 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
                     if (!block) {
                         fprintf(stderr, "Unknown ramblock \"%s\", cannot "
                                 "accept migration\n", id);
-                        return -EINVAL;
+                        ret = -EINVAL;
+                        goto done;
                     }

                     total_ram_bytes -= length;
@@ -383,17 +946,10 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
         }

         if (flags & RAM_SAVE_FLAG_COMPRESS) {
-            void *host;
             uint8_t ch;

-            if (version_id == 3)
-                host = qemu_get_ram_ptr(addr);
-            else
-                host = host_from_stream_offset(f, addr, flags);
-            if (!host) {
-                return -EINVAL;
-            }
-
+            host = host_from_stream_offset_versioned(version_id,
+                            f, addr, flags);
             ch = qemu_get_byte(f);
             memset(host, ch, TARGET_PAGE_SIZE);
 #ifndef _WIN32
@@ -402,22 +958,31 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
                 qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
             }
 #endif
+            PAGE_LOG(addr, host, "DUP page\n");
         } else if (flags & RAM_SAVE_FLAG_PAGE) {
-            void *host;
-
-            if (version_id == 3)
-                host = qemu_get_ram_ptr(addr);
-            else
-                host = host_from_stream_offset(f, addr, flags);
-
+            host = host_from_stream_offset_versioned(version_id,
+                            f, addr, flags);
             qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
+            PAGE_LOG(addr, host, "NORMAL page\n");
+        } else if (flags & RAM_SAVE_FLAG_XBRLE) {
+            host = host_from_stream_offset_versioned(version_id,
+                            f, addr, flags);
+            if (load_xbrle(f, addr, host) < 0) {
+                ret = -EINVAL;
+                goto done;
+            }
         }
+
         if (qemu_file_has_error(f)) {
-            return -EIO;
+            ret = -EIO;
+            goto done;
         }
     } while (!(flags & RAM_SAVE_FLAG_EOS));

-    return 0;
+done:
+    DPRINTF("Completed load of VM with exit code %d seq iteration %ld\n",
+            ret, seq_iter);
+    return ret;
 }

 void qemu_service_io(void)
diff --git a/block-migration.c b/block-migration.c
index 3e66f49..004fc12 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -689,7 +689,8 @@ static int block_load(QEMUFile *f, void *opaque, int 
version_id)
     return 0;
 }

-static void block_set_params(int blk_enable, int shared_base, void *opaque)
+static void block_set_params(int blk_enable, int shared_base,
+       int use_xbrle, int64_t xbrle_cache_size, void *opaque)
 {
     block_mig_state.blk_enable = blk_enable;
     block_mig_state.shared_base = shared_base;
diff --git a/hmp-commands.hx b/hmp-commands.hx
index e5585ba..e49d5be 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -717,24 +717,27 @@ ETEXI

     {
         .name       = "migrate",
-        .args_type  = "detach:-d,blk:-b,inc:-i,uri:s",
-        .params     = "[-d] [-b] [-i] uri",
-        .help       = "migrate to URI (using -d to not wait for completion)"
-                     "\n\t\t\t -b for migration without shared storage with"
-                     " full copy of disk\n\t\t\t -i for migration without "
-                     "shared storage with incremental copy of disk "
-                     "(base image shared between src and destination)",
+        .args_type  = "detach:-d,blk:-b,inc:-i,xbrle:-x,uri:s",
+        .params     = "[-d] [-b] [-i] [-x] uri",
+        .help       = "migrate to URI"
+                      "\n\t -d to not wait for completion"
+                      "\n\t -b for migration without shared storage with"
+                      " full copy of disk"
+                      "\n\t -i for migration without"
+                      " shared storage with incremental copy of disk"
+                      " (base image shared between source and destination)"
+                      "\n\t -x to use XBRLE page delta compression",
         .user_print = monitor_user_noop,
        .mhandler.cmd_new = do_migrate,
     },

-
 STEXI
address@hidden migrate [-d] [-b] [-i] @var{uri}
address@hidden migrate [-d] [-b] [-i] [-x] @var{uri}
 @findex migrate
 Migrate to @var{uri} (using -d to not wait for completion).
        -b for migration with full copy of disk
        -i for migration with incremental copy of disk (base image is shared)
+    -x to use XBRLE page delta compression
 ETEXI

     {
@@ -753,10 +756,23 @@ Cancel the current VM migration.
 ETEXI

     {
+        .name       = "migrate_set_cachesize",
+        .args_type  = "value:s",
+        .params     = "value",
+        .help       = "set cache size (in MB) for XBRLE migrations",
+        .mhandler.cmd = do_migrate_set_cachesize,
+    },
+
+STEXI
address@hidden migrate_set_cachesize @var{value}
+Set cache size (in MB) for xbrle migrations.
+ETEXI
+
+    {
         .name       = "migrate_set_speed",
         .args_type  = "value:o",
         .params     = "value",
-        .help       = "set maximum speed (in bytes) for migrations. "
+        .help       = "set maximum XBRLE cache size (in bytes) for migrations. 
"
        "Defaults to MB if no size suffix is specified, ie. B/K/M/G/T",
         .user_print = monitor_user_noop,
         .mhandler.cmd_new = do_migrate_set_speed,
diff --git a/hw/hw.h b/hw/hw.h
index 9d2cfc2..de9e5a6 100644
--- a/hw/hw.h
+++ b/hw/hw.h
@@ -239,7 +239,8 @@ static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv)
 int64_t qemu_ftell(QEMUFile *f);
 int64_t qemu_fseek(QEMUFile *f, int64_t pos, int whence);

-typedef void SaveSetParamsHandler(int blk_enable, int shared, void * opaque);
+typedef void SaveSetParamsHandler(int blk_enable, int shared,
+       int use_xbrle, int64_t xbrle_cache_size, void * opaque);
 typedef void SaveStateHandler(QEMUFile *f, void *opaque);
 typedef int SaveLiveStateHandler(Monitor *mon, QEMUFile *f, int stage,
                                  void *opaque);
diff --git a/migration-exec.c b/migration-exec.c
index 14718dd..fe8254a 100644
--- a/migration-exec.c
+++ b/migration-exec.c
@@ -67,7 +67,9 @@ MigrationState *exec_start_outgoing_migration(Monitor *mon,
                                              int64_t bandwidth_limit,
                                              int detach,
                                              int blk,
-                                             int inc)
+                          int inc,
+                          int use_xbrle,
+                          int64_t xbrle_cache_size)
 {
     FdMigrationState *s;
     FILE *f;
@@ -99,6 +101,8 @@ MigrationState *exec_start_outgoing_migration(Monitor *mon,

     s->mig_state.blk = blk;
     s->mig_state.shared = inc;
+    s->mig_state.use_xbrle = use_xbrle;
+    s->mig_state.xbrle_cache_size = xbrle_cache_size;

     s->state = MIG_STATE_ACTIVE;
     s->mon = NULL;
diff --git a/migration-fd.c b/migration-fd.c
index 6d14505..4a1ddbd 100644
--- a/migration-fd.c
+++ b/migration-fd.c
@@ -56,7 +56,9 @@ MigrationState *fd_start_outgoing_migration(Monitor *mon,
                                            int64_t bandwidth_limit,
                                            int detach,
                                            int blk,
-                                           int inc)
+                        int inc,
+                        int use_xbrle,
+                        int64_t xbrle_cache_size)
 {
     FdMigrationState *s;

@@ -82,6 +84,8 @@ MigrationState *fd_start_outgoing_migration(Monitor *mon,

     s->mig_state.blk = blk;
     s->mig_state.shared = inc;
+    s->mig_state.use_xbrle = use_xbrle;
+    s->mig_state.xbrle_cache_size = xbrle_cache_size;

     s->state = MIG_STATE_ACTIVE;
     s->mon = NULL;
diff --git a/migration-tcp.c b/migration-tcp.c
index b55f419..4ca5bf6 100644
--- a/migration-tcp.c
+++ b/migration-tcp.c
@@ -81,7 +81,9 @@ MigrationState *tcp_start_outgoing_migration(Monitor *mon,
                                              int64_t bandwidth_limit,
                                              int detach,
                                             int blk,
-                                            int inc)
+                         int inc,
+                         int use_xbrle,
+                         int64_t xbrle_cache_size)
 {
     struct sockaddr_in addr;
     FdMigrationState *s;
@@ -101,6 +103,8 @@ MigrationState *tcp_start_outgoing_migration(Monitor *mon,

     s->mig_state.blk = blk;
     s->mig_state.shared = inc;
+    s->mig_state.use_xbrle = use_xbrle;
+    s->mig_state.xbrle_cache_size = xbrle_cache_size;

     s->state = MIG_STATE_ACTIVE;
     s->mon = NULL;
diff --git a/migration-unix.c b/migration-unix.c
index 57232c0..0813902 100644
--- a/migration-unix.c
+++ b/migration-unix.c
@@ -80,7 +80,9 @@ MigrationState *unix_start_outgoing_migration(Monitor *mon,
                                              int64_t bandwidth_limit,
                                              int detach,
                                              int blk,
-                                             int inc)
+                          int inc,
+                          int use_xbrle,
+                          int64_t xbrle_cache_size)
 {
     FdMigrationState *s;
     struct sockaddr_un addr;
@@ -100,6 +102,8 @@ MigrationState *unix_start_outgoing_migration(Monitor *mon,

     s->mig_state.blk = blk;
     s->mig_state.shared = inc;
+    s->mig_state.use_xbrle = use_xbrle;
+    s->mig_state.xbrle_cache_size = xbrle_cache_size;

     s->state = MIG_STATE_ACTIVE;
     s->mon = NULL;
diff --git a/migration.c b/migration.c
index 9ee8b17..b5b530b 100644
--- a/migration.c
+++ b/migration.c
@@ -34,6 +34,9 @@
 /* Migration speed throttling */
 static uint32_t max_throttle = (32 << 20);

+/* Migration XBRLE cache size */
+static int64_t migrate_cache_size = 0x8000000; /* 256 MB size */
+
 static MigrationState *current_migration;

 int qemu_start_incoming_migration(const char *uri)
@@ -80,6 +83,7 @@ int do_migrate(Monitor *mon, const QDict *qdict, QObject 
**ret_data)
     int detach = qdict_get_try_bool(qdict, "detach", 0);
     int blk = qdict_get_try_bool(qdict, "blk", 0);
     int inc = qdict_get_try_bool(qdict, "inc", 0);
+    int use_xbrle = qdict_get_try_bool(qdict, "xbrle", 0);
     const char *uri = qdict_get_str(qdict, "uri");

     if (current_migration &&
@@ -90,17 +94,21 @@ int do_migrate(Monitor *mon, const QDict *qdict, QObject 
**ret_data)

     if (strstart(uri, "tcp:", &p)) {
         s = tcp_start_outgoing_migration(mon, p, max_throttle, detach,
-                                         blk, inc);
+                                         blk, inc, use_xbrle,
+                                         migrate_cache_size);
 #if !defined(WIN32)
     } else if (strstart(uri, "exec:", &p)) {
         s = exec_start_outgoing_migration(mon, p, max_throttle, detach,
-                                          blk, inc);
+                                          blk, inc, use_xbrle,
+                                          migrate_cache_size);
     } else if (strstart(uri, "unix:", &p)) {
         s = unix_start_outgoing_migration(mon, p, max_throttle, detach,
-                                          blk, inc);
+                                          blk, inc, use_xbrle,
+                                          migrate_cache_size);
     } else if (strstart(uri, "fd:", &p)) {
         s = fd_start_outgoing_migration(mon, p, max_throttle, detach,
-                                        blk, inc);
+                                        blk, inc, use_xbrle,
+                                        migrate_cache_size);
 #endif
     } else {
         monitor_printf(mon, "unknown migration protocol: %s\n", uri);
@@ -341,7 +349,8 @@ void migrate_fd_connect(FdMigrationState *s)

     DPRINTF("beginning savevm\n");
     ret = qemu_savevm_state_begin(s->mon, s->file, s->mig_state.blk,
-                                  s->mig_state.shared);
+                                  s->mig_state.shared, s->mig_state.use_xbrle,
+                                  s->mig_state.xbrle_cache_size);
     if (ret < 0) {
         DPRINTF("failed, %d\n", ret);
         migrate_fd_error(s);
@@ -448,3 +457,17 @@ int migrate_fd_close(void *opaque)
     qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
     return s->close(s);
 }
+
+void do_migrate_set_cachesize(Monitor *mon, const QDict *qdict)
+{
+    ssize_t bytes;
+    char *ptr;
+    const char *value = qdict_get_str(qdict, "value");
+
+    bytes = strtod(value, &ptr);
+    if (bytes > 0) {
+        migrate_cache_size = bytes;
+        monitor_printf(mon, "Cache size set to %ld bytes\n", bytes);
+    }
+}
+
diff --git a/migration.h b/migration.h
index d13ed4f..62be2f8 100644
--- a/migration.h
+++ b/migration.h
@@ -32,6 +32,8 @@ struct MigrationState
     void (*release)(MigrationState *s);
     int blk;
     int shared;
+    int use_xbrle;
+    int64_t xbrle_cache_size;
 };

 typedef struct FdMigrationState FdMigrationState;
@@ -76,7 +78,9 @@ MigrationState *exec_start_outgoing_migration(Monitor *mon,
                                              int64_t bandwidth_limit,
                                              int detach,
                                              int blk,
-                                             int inc);
+                          int inc,
+                          int use_xbrle,
+                          int64_t xbrle_cache_size);

 int tcp_start_incoming_migration(const char *host_port);

@@ -85,7 +89,9 @@ MigrationState *tcp_start_outgoing_migration(Monitor *mon,
                                             int64_t bandwidth_limit,
                                             int detach,
                                             int blk,
-                                            int inc);
+                         int inc,
+                         int use_xbrle,
+                         int64_t xbrle_cache_size);

 int unix_start_incoming_migration(const char *path);

@@ -94,7 +100,9 @@ MigrationState *unix_start_outgoing_migration(Monitor *mon,
                                              int64_t bandwidth_limit,
                                              int detach,
                                              int blk,
-                                             int inc);
+                          int inc,
+                          int use_xbrle,
+                          int64_t xbrle_cache_size);

 int fd_start_incoming_migration(const char *path);

@@ -103,7 +111,9 @@ MigrationState *fd_start_outgoing_migration(Monitor *mon,
                                            int64_t bandwidth_limit,
                                            int detach,
                                            int blk,
-                                           int inc);
+                        int inc,
+                        int use_xbrle,
+                        int64_t xbrle_cache_size);

 void migrate_fd_monitor_suspend(FdMigrationState *s, Monitor *mon);

@@ -134,4 +144,9 @@ static inline FdMigrationState 
*migrate_to_fms(MigrationState *mig_state)
     return container_of(mig_state, FdMigrationState, mig_state);
 }

+void do_migrate_set_cachesize(Monitor *mon, const QDict *qdict);
+
+void arch_set_params(int blk_enable, int shared_base,
+        int use_xbrle, int64_t xbrle_cache_size, void *opaque);
+
 #endif
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 793cf1c..8fbe64b 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -431,13 +431,16 @@ EQMP

     {
         .name       = "migrate",
-        .args_type  = "detach:-d,blk:-b,inc:-i,uri:s",
-        .params     = "[-d] [-b] [-i] uri",
-        .help       = "migrate to URI (using -d to not wait for completion)"
-                     "\n\t\t\t -b for migration without shared storage with"
-                     " full copy of disk\n\t\t\t -i for migration without "
-                     "shared storage with incremental copy of disk "
-                     "(base image shared between src and destination)",
+        .args_type  = "detach:-d,blk:-b,inc:-i,xbrle:-x,uri:s",
+        .params     = "[-d] [-b] [-i] [-x] uri",
+        .help       = "migrate to URI"
+                      "\n\t -d to not wait for completion"
+                      "\n\t -b for migration without shared storage with"
+                      " full copy of disk"
+                      "\n\t -i for migration without"
+                      " shared storage with incremental copy of disk"
+                      " (base image shared between source and destination)"
+                      "\n\t -x to use XBRLE page delta compression",
         .user_print = monitor_user_noop,
        .mhandler.cmd_new = do_migrate,
     },
@@ -453,6 +456,7 @@ Arguments:
 - "blk": block migration, full disk copy (json-bool, optional)
 - "inc": incremental disk copy (json-bool, optional)
 - "uri": Destination URI (json-string)
+- "xbrle": to use XBRLE page delta compression

 Example:

@@ -494,6 +498,31 @@ Example:
 EQMP

     {
+        .name       = "migrate_set_cachesize",
+        .args_type  = "value:s",
+        .params     = "value",
+        .help       = "set cache size (in MB) for xbrle migrations",
+        .mhandler.cmd = do_migrate_set_cachesize,
+    },
+
+SQMP
+migrate_set_cachesize
+---------------------
+
+Set cache size to be used by XBRLE migration
+
+Arguments:
+
+- "value": cache size in bytes (json-number)
+
+Example:
+
+-> { "execute": "migrate_set_cachesize", "arguments": { "value": 500M } }
+<- { "return": {} }
+
+EQMP
+
+    {
         .name       = "migrate_set_speed",
         .args_type  = "value:f",
         .params     = "value",
diff --git a/savevm.c b/savevm.c
index 4e49765..93b512b 100644
--- a/savevm.c
+++ b/savevm.c
@@ -1141,7 +1141,8 @@ int register_savevm(DeviceState *dev,
                     void *opaque)
 {
     return register_savevm_live(dev, idstr, instance_id, version_id,
-                                NULL, NULL, save_state, load_state, opaque);
+                                arch_set_params, NULL, save_state,
+                                load_state, opaque);
 }

 void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque)
@@ -1428,15 +1429,17 @@ static int vmstate_save(QEMUFile *f, SaveStateEntry *se)
 #define QEMU_VM_SUBSECTION           0x05

 int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int blk_enable,
-                            int shared)
+                            int shared, int use_xbrle,
+                            int64_t xbrle_cache_size)
 {
     SaveStateEntry *se;

     QTAILQ_FOREACH(se, &savevm_handlers, entry) {
         if(se->set_params == NULL) {
             continue;
-       }
-       se->set_params(blk_enable, shared, se->opaque);
+        }
+        se->set_params(blk_enable, shared, use_xbrle, xbrle_cache_size,
+                se->opaque);
     }

     qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
@@ -1577,7 +1580,7 @@ static int qemu_savevm_state(Monitor *mon, QEMUFile *f)

     bdrv_flush_all();

-    ret = qemu_savevm_state_begin(mon, f, 0, 0);
+    ret = qemu_savevm_state_begin(mon, f, 0, 0, 0, 0);
     if (ret < 0)
         goto out;

diff --git a/sysemu.h b/sysemu.h
index b81a70e..15a0664 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -74,7 +74,8 @@ void qemu_announce_self(void);
 void main_loop_wait(int nonblocking);

 int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int blk_enable,
-                            int shared);
+                            int shared, int use_xbrle,
+                            int64_t xbrle_cache_size);
 int qemu_savevm_state_iterate(Monitor *mon, QEMUFile *f);
 int qemu_savevm_state_complete(Monitor *mon, QEMUFile *f);
 void qemu_savevm_state_cancel(Monitor *mon, QEMUFile *f);





reply via email to

[Prev in Thread] Current Thread [Next in Thread]