qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH] XBRLE page delta compression for live migration


From: Anthony Liguori
Subject: Re: [Qemu-devel] [PATCH] XBRLE page delta compression for live migration of large memory apps
Date: Wed, 22 Jun 2011 07:01:55 -0500
User-agent: Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.17) Gecko/20110424 Lightning/1.0b2 Thunderbird/3.1.10

On 05/22/2011 07:00 AM, Shribman, Aidan wrote:
Subject: [PATCH] XBRLE page delta compression for live migration of large 
memory apps
From: Aidan Shribman<address@hidden>

By using XBRLE (Xor Based Run-Length-Encoding) we can reduce required
bandwidth for transfering of dirty memory pages during live migration
         migrate_set_cachesize<size>
         migrate -x<url>
Qemu host: Ubuntu 10.10
Testing: live migration (w and w/o XBRLE) tested successfully.

By how much?

This is a change to the live migration protocol, it would also require documentation and an understanding of how it affects compatibility.

The patch really needs to be split into logical pieces too. It's a bit too big for a meaningful review.

Regards,

Anthony Liguori


Signed-off-by: Benoit Hudzia<address@hidden>
Signed-off-by: Petter Svard<address@hidden>
Signed-off-by: Aidan Shribman<address@hidden>

---

  arch_init.c       |  647 +++++++++++++++++++++++++++++++++++++++++++++++++----
  block-migration.c |    3 +-
  hmp-commands.hx   |   36 +++-
  hw/hw.h           |    3 +-
  migration-exec.c  |    6 +-
  migration-fd.c    |    6 +-
  migration-tcp.c   |    6 +-
  migration-unix.c  |    6 +-
  migration.c       |   33 +++-
  migration.h       |   23 ++-
  qmp-commands.hx   |   43 +++-
  savevm.c          |   13 +-
  sysemu.h          |    3 +-
  13 files changed, 749 insertions(+), 79 deletions(-)

diff --git a/arch_init.c b/arch_init.c
index 4486925..069cd67 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -27,6 +27,7 @@
  #include<sys/types.h>
  #include<sys/mman.h>
  #endif
+#include<assert.h>
  #include "config.h"
  #include "monitor.h"
  #include "sysemu.h"
@@ -41,6 +42,24 @@
  #include "gdbstub.h"
  #include "hw/smbios.h"

+//#define DEBUG_ARCH_INIT
+#ifdef DEBUG_ARCH_INIT
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+//#define DEBUG_ARCH_INIT_CKSUM
+#ifdef DEBUG_ARCH_INIT_CKSUM
+#define PAGE_LOG(addr, pdata, fmt, ...) \
+    do { page_log(addr, pdata, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define PAGE_LOG(addr, pdata, fmt, ...) \
+    do { } while (0)
+#endif
+
  #ifdef TARGET_SPARC
  int graphic_width = 1024;
  int graphic_height = 768;
@@ -88,6 +107,402 @@ const uint32_t arch_type = QEMU_ARCH;
  #define RAM_SAVE_FLAG_PAGE     0x08
  #define RAM_SAVE_FLAG_EOS      0x10
  #define RAM_SAVE_FLAG_CONTINUE 0x20
+#define RAM_SAVE_FLAG_XBRLE    0x40
+
+/***********************************************************/
+/* Page cache for storing previous pages as basis for XBRLE compression */
+#define CACHE_N_WAY 2 /* 2-way assossiative cache */
+
+typedef struct CacheItem {
+    ram_addr_t it_addr;
+    unsigned long it_age;
+    uint8_t *it_data;
+} CacheItem;
+
+typedef struct CacheBucket {
+    CacheItem bkt_item[CACHE_N_WAY];
+} CacheBucket;
+
+static CacheBucket *page_cache;
+static int64_t cache_num_buckets;
+static uint64_t cache_max_item_age;
+static int64_t cache_num_items;
+
+static void cache_init(ssize_t num_buckets);
+static void cache_fini(void);
+static int cache_is_cached(ram_addr_t addr);
+static int cache_get_oldest(CacheBucket *buck);
+static int cache_get_newest(CacheBucket *buck, ram_addr_t addr);
+static void cache_insert(ram_addr_t id, uint8_t *pdata);
+static unsigned long cache_get_cache_pos(ram_addr_t address);
+static CacheItem *cache_item_get(unsigned long pos, int item);
+
+/***********************************************************/
+/* RAM Migration State */
+typedef struct ArchMigrationState {
+    int use_xbrle;
+    int64_t xbrle_cache_size;
+} ArchMigrationState;
+
+static ArchMigrationState arch_mig_state;
+
+void arch_set_params(int blk_enable, int shared_base, int use_xbrle,
+        int64_t xbrle_cache_size, void *opaque)
+{
+    arch_mig_state.use_xbrle = use_xbrle;
+    arch_mig_state.xbrle_cache_size = xbrle_cache_size;
+}
+
+/***********************************************************/
+/* XBRLE (Xor Based Run-Length Encoding) */
+typedef struct XBRLEHeader {
+    uint8_t xh_flags;
+    uint16_t xh_len;
+    uint32_t xh_cksum;
+} XBRLEHeader;
+
+static int rle_encode(uint8_t *src, int slen, uint8_t *dst, int dlen);
+static int rle_decode(uint8_t *src, int slen, uint8_t *dst, int dlen);
+#ifdef DEBUG_ARCH_INIT_CKSUM
+static uint32_t page_cksum(uint8_t *buf);
+static void page_log(ram_addr_t addr, uint8_t *pdata, const char *fmt, ...);
+#endif
+
+/***********************************************************/
+/* benchmarking */
+typedef struct BenchInfo {
+    uint64_t normal_pages;
+    uint64_t xbrle_pages;
+    uint64_t xbrle_bytes;
+    uint64_t xbrle_pages_aborted;
+    uint64_t dup_pages;
+    uint64_t iterations;
+} BenchInfo;
+
+static BenchInfo bench;
+
+/***********************************************************/
+/* XBRLE page cache implementation */
+static CacheItem *cache_item_get(unsigned long pos, int item)
+{
+    assert(page_cache);
+    return&page_cache[pos].bkt_item[item];
+}
+
+#ifdef DEBUG_ARCH_INIT_CKSUM
+static int64_t cache_max_items(void)
+{
+    return cache_num_buckets * CACHE_N_WAY;
+}
+#endif /* DEBUG_ARCH_INIT_CKSUM */
+
+static void cache_init(int64_t num_bytes)
+{
+    int i;
+
+    cache_num_items = 0;
+    cache_max_item_age = 0;
+    cache_num_buckets = num_bytes / (TARGET_PAGE_SIZE * CACHE_N_WAY);
+    assert(cache_num_buckets);
+    DPRINTF("Setting cache buckets to %ld\n", cache_num_buckets);
+
+    assert(!page_cache);
+    page_cache = (CacheBucket *)qemu_mallocz((cache_num_buckets) *
+            sizeof(CacheBucket));
+
+    for (i = 0; i<  cache_num_buckets; i++) {
+        int j;
+        for (j = 0; j<  CACHE_N_WAY; j++) {
+            CacheItem *it = cache_item_get(i, j);
+            it->it_data = NULL;
+            it->it_age = 0;
+            it->it_addr = -1;
+        }
+    }
+}
+
+static void cache_fini(void)
+{
+    int i;
+
+    assert(page_cache);
+
+    for (i = 0; i<  cache_num_buckets; i++) {
+        int j;
+        for (j = 0; j<  CACHE_N_WAY; j++) {
+            CacheItem *it = cache_item_get(i, j);
+            qemu_free(it->it_data);
+            it->it_data = 0;
+        }
+    }
+
+    qemu_free(page_cache);
+    page_cache = NULL;
+}
+
+static unsigned long cache_get_cache_pos(ram_addr_t address)
+{
+    unsigned long pos;
+
+    assert(cache_num_buckets);
+    pos = (address/TARGET_PAGE_SIZE)&  (cache_num_buckets - 1);
+    return pos;
+}
+
+static int cache_get_newest(CacheBucket *buck, ram_addr_t addr)
+{
+    unsigned long big = 0;
+    int big_pos = -1;
+    int j;
+
+    assert(page_cache);
+
+    for (j = 0; j<  CACHE_N_WAY; j++) {
+        CacheItem *it =&buck->bkt_item[j];
+
+        if (it->it_addr != addr) {
+            continue;
+        }
+
+        if (!j || it->it_age>  big) {
+            big = it->it_age;
+            big_pos = j;
+        }
+    }
+
+    return big_pos;
+}
+
+static int cache_get_oldest(CacheBucket *buck)
+{
+    unsigned long small = 0;
+    int small_pos = -1;
+    int j;
+
+    assert(page_cache);
+
+    for (j = 0; j<  CACHE_N_WAY; j++) {
+        CacheItem *it =&buck->bkt_item[j];
+
+        if (!j || it->it_age<   small) {
+            small = it->it_age;
+            small_pos = j;
+        }
+    }
+
+    return small_pos;
+}
+
+static int cache_is_cached(ram_addr_t addr)
+{
+    unsigned long pos = cache_get_cache_pos(addr);
+
+    assert(page_cache);
+    CacheBucket *bucket =&page_cache[pos];
+    return cache_get_newest(bucket, addr);
+}
+
+static void cache_insert(unsigned long addr, uint8_t *pdata)
+{
+    unsigned long pos;
+    int slot = -1;
+    CacheBucket *bucket;
+
+    pos = cache_get_cache_pos(addr);
+    assert(page_cache);
+    bucket =&page_cache[pos];
+    slot = cache_get_oldest(bucket); /* evict LRU */
+
+    /* actual update of entry */
+    CacheItem *it = cache_item_get(pos, slot);
+    if (!it->it_data) {
+        cache_num_items++;
+    }
+    qemu_free(it->it_data);
+    it->it_data = pdata;
+    it->it_age = ++cache_max_item_age;
+    it->it_addr = addr;
+}
+
+/* XBRLE (Xor Based Run-Length Encoding) */
+static int rle_encode(uint8_t *src, int slen, uint8_t *dst, int dlen)
+{
+    int d = 0, ch_run = 0, i;
+    uint8_t prev, ch;
+
+    for (i = 0; i<= slen; i++) {
+        if (i != slen) {
+            ch = src[i];
+        }
+
+        if (!i || (i != slen&&  ch == prev&&  ch_run<  255)) {
+            ch_run++;
+        } else {
+            if (d+2>  dlen)
+                return -1;
+            *dst++ = ch_run;
+            *dst++ = prev;
+            d += 2;
+            ch_run = 1;
+        }
+
+        prev = ch;
+    }
+    return d;
+}
+
+static int rle_decode(uint8_t *src, int slen, uint8_t *dst, int dlen)
+{
+    int d = 0, s;
+
+    for (s = 0; s<  slen-1; s += 2) {
+        uint8_t ch_run = src[s];
+        uint8_t ch = src[s+1];
+        while (ch_run--) {
+            if (d == dlen) {
+                return -1;
+            }
+            dst[d] = ch;
+            d++;
+        }
+    }
+    return d;
+}
+
+#define PAGE_SAMPLE_PERCENT 0.02
+#define PAGE_SAMPLE_SIZE (TARGET_PAGE_SIZE * PAGE_SAMPLE_PERCENT)
+#define BYTES_CHANGED_PERCENT 0.30
+
+static int is_page_good_for_xbrle(uint8_t *old, uint8_t *new)
+{
+    int i, bytes_changed = 0;
+
+    srand(time(NULL)+getpid()+getpid()*987654+rand());
+
+    for (i = 0; i<  PAGE_SAMPLE_SIZE; i++) {
+        unsigned long pos = (int) (rand() * TARGET_PAGE_SIZE / (RAND_MAX+1.0));
+
+         if (old[pos] != new[pos]) {
+             bytes_changed++;
+         }
+    }
+
+    return (((float) bytes_changed) / PAGE_SAMPLE_SIZE)<  
BYTES_CHANGED_PERCENT;
+}
+
+static void xor_encode(uint8_t *dst, uint8_t *src1, uint8_t *src2)
+{
+    int i;
+
+    for (i = 0; i<  TARGET_PAGE_SIZE; i++) {
+        dst[i] = src1[i] ^ src2[i];
+    }
+}
+
+static void save_block_hdr(QEMUFile *f,
+        RAMBlock *block, ram_addr_t offset, int cont, int flag)
+{
+        qemu_put_be64(f, offset | cont | flag);
+        if (!cont) {
+                qemu_put_byte(f, strlen(block->idstr));
+                qemu_put_buffer(f, (uint8_t *)block->idstr,
+                                strlen(block->idstr));
+        }
+}
+
+#define ENCODING_FLAG_XBRLE 0x1
+#define ENCODING_FLAG_CKSUM 0x2
+
+static int save_xbrle_page(QEMUFile *f, uint8_t *current_data,
+        ram_addr_t current_addr, RAMBlock *block, ram_addr_t offset, int cont)
+{
+    int cache_location = -1, slot = -1, encoded_len = 0, bytes_sent = 0;
+    XBRLEHeader hdr = {0};
+    CacheItem *it;
+    uint8_t *xor_buf = NULL, *xbrle_buf = NULL;
+
+    /* get location */
+    slot = cache_is_cached(current_addr);
+    if (slot == -1) {
+        goto done;
+    }
+    cache_location = cache_get_cache_pos(current_addr);
+
+    /* abort if page changed too much */
+    it = cache_item_get(cache_location, slot);
+    if (!is_page_good_for_xbrle(it->it_data, current_data)) {
+        DPRINTF("Page changed too much! Aborting XBRLE.\n");
+        bench.xbrle_pages_aborted++;
+        goto done;
+    }
+
+    /* XOR encoding */
+    xor_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
+    xor_encode(xor_buf, it->it_data, current_data);
+
+    /* XBRLE (XOR+RLE) encoding */
+    xbrle_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
+    encoded_len = rle_encode(xor_buf, TARGET_PAGE_SIZE, xbrle_buf,
+            TARGET_PAGE_SIZE);
+
+    if (encoded_len<  0) {
+        DPRINTF("XBRLE encoding oeverflow - sending uncompressed\n");
+        goto done;
+    }
+
+    hdr.xh_len = encoded_len;
+    hdr.xh_flags |= ENCODING_FLAG_XBRLE;
+#ifdef DEBUG_ARCH_INIT_CKSUM
+    hdr.xh_cksum = page_cksum(current_data);
+    hdr.xh_flags |= ENCODING_FLAG_CKSUM;
+#endif
+
+    /* Send XBRLE compressed page */
+    save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBRLE);
+    qemu_put_buffer(f, (uint8_t *)&hdr, sizeof(hdr));
+    qemu_put_buffer(f, xbrle_buf, encoded_len);
+    PAGE_LOG(current_addr, current_data, "XBRLE page (enc len %d)\n",
+            encoded_len);
+    bench.xbrle_pages++;
+    bytes_sent = encoded_len + sizeof(hdr);
+    bench.xbrle_bytes += bytes_sent;
+
+done:
+    qemu_free(xor_buf);
+    qemu_free(xbrle_buf);
+    return bytes_sent;
+}
+
+#ifdef DEBUG_ARCH_INIT_CKSUM
+static uint32_t page_cksum(uint8_t *buf)
+{
+    uint32_t res = 0;
+    int i;
+
+    for (i = 0; i<  TARGET_PAGE_SIZE; i++) {
+        res += buf[i];
+    }
+
+    return res;
+}
+
+static void page_log(ram_addr_t addr, uint8_t *pdata, const char *fmt, ...)
+{
+    va_list arg_ptr;
+    static FILE *fp;
+    static uint32_t page_seq;
+
+    va_start(arg_ptr, fmt);
+    if (!fp) {
+        fp = fopen("mig.log", "w");
+    }
+    page_seq++;
+    fprintf(fp, "[seq %d addr 0x%lX cksum 0x%X] ", page_seq,
+            (unsigned long) addr, page_cksum(pdata));
+    vfprintf(fp, fmt, arg_ptr);
+    va_end(arg_ptr);
+}
+#endif /* DEBUG_ARCH_INIT_CKSUM */

  static int is_dup_page(uint8_t *page, uint8_t ch)
  {
@@ -107,7 +522,7 @@ static int is_dup_page(uint8_t *page, uint8_t ch)
  static RAMBlock *last_block;
  static ram_addr_t last_offset;

-static int ram_save_block(QEMUFile *f)
+static int ram_save_block(QEMUFile *f, int stage)
  {
      RAMBlock *block = last_block;
      ram_addr_t offset = last_offset;
@@ -128,28 +543,32 @@ static int ram_save_block(QEMUFile *f)
                                              current_addr + TARGET_PAGE_SIZE,
                                              MIGRATION_DIRTY_FLAG);

-            p = block->host + offset;
+            p = qemu_mallocz(TARGET_PAGE_SIZE);
+            memcpy(p, block->host + offset, TARGET_PAGE_SIZE);

              if (is_dup_page(p, *p)) {
-                qemu_put_be64(f, offset | cont | RAM_SAVE_FLAG_COMPRESS);
-                if (!cont) {
-                    qemu_put_byte(f, strlen(block->idstr));
-                    qemu_put_buffer(f, (uint8_t *)block->idstr,
-                                    strlen(block->idstr));
-                }
+                save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_COMPRESS);
                  qemu_put_byte(f, *p);
                  bytes_sent = 1;
-            } else {
-                qemu_put_be64(f, offset | cont | RAM_SAVE_FLAG_PAGE);
-                if (!cont) {
-                    qemu_put_byte(f, strlen(block->idstr));
-                    qemu_put_buffer(f, (uint8_t *)block->idstr,
-                                    strlen(block->idstr));
+                bench.dup_pages++;
+                PAGE_LOG(current_addr, p, "DUP page\n");
+            } else if (stage == 2&&  arch_mig_state.use_xbrle) {
+                bytes_sent = save_xbrle_page(f, p, current_addr, block,
+                    offset, cont);
+                if (bytes_sent) {
+                    cache_insert(current_addr, p);
                  }
+            }
+            if (!bytes_sent) {
+                save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
                  qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
                  bytes_sent = TARGET_PAGE_SIZE;
+                bench.normal_pages++;
+                PAGE_LOG(current_addr, p, "NORMAL page\n");
+                if (arch_mig_state.use_xbrle) {
+                    cache_insert(current_addr, p);
+                }
              }
-
              break;
          }

@@ -212,6 +631,55 @@ uint64_t ram_bytes_total(void)
      return total;
  }

+#ifdef DEBUG_ARCH_INIT
+static void dump_percentage(const char *label, unsigned long absolute,
+        unsigned long total)
+{
+    printf("%s: %ld (%0.2f%%)\n", label, absolute,
+            (total ? (100.0 * absolute / total) : 0));
+}
+
+static void dump_migration_statistics(void)
+{
+    unsigned long normal_bytes = bench.normal_pages * TARGET_PAGE_SIZE;
+    unsigned long total_pages = bench.normal_pages + bench.xbrle_pages
+        + bench.dup_pages;
+    unsigned long total_bytes = normal_bytes + bench.xbrle_bytes
+        + bench.dup_pages;
+
+    printf("\n");
+    printf("=====================================================\n");
+    printf("Save VM Memory Statistics (SUCCESS or FAILURE):\n");
+    printf("Iterations: %ld\n", bench.iterations);
+
+    dump_percentage("Normal pages", bench.normal_pages, total_pages);
+    dump_percentage("Normal bytes", normal_bytes, total_bytes);
+
+    dump_percentage("Dup pages", bench.dup_pages, total_pages);
+    dump_percentage("Dup bytes", bench.dup_pages, total_bytes);
+
+    if (arch_mig_state.use_xbrle) {
+        dump_percentage("XBRLE pages", bench.xbrle_pages, total_pages);
+        dump_percentage("XBRLE bytes", bench.xbrle_bytes, total_bytes);
+        dump_percentage("Aborted XBRLE pages from XBRLE",
+            bench.xbrle_pages_aborted,
+            bench.xbrle_pages + bench.xbrle_pages_aborted);
+    }
+
+    dump_percentage("Total pages", total_pages, total_pages);
+    dump_percentage("Total bytes", total_bytes, total_bytes);
+
+    if (arch_mig_state.use_xbrle) {
+        printf("Cache number of inserts: %ld\n", cache_max_item_age);
+        printf("Cache max items: %ld\n", cache_max_items());
+        dump_percentage("Cache number of items", cache_num_items,
+            cache_max_items());
+    }
+
+    printf("=====================================================\n");
+}
+#endif /* DEBUG_ARCH_INIT_CKSUM */
+
  int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
  {
      ram_addr_t addr;
@@ -235,6 +703,10 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, 
void *opaque)
          last_block = NULL;
          last_offset = 0;

+        if (arch_mig_state.use_xbrle) {
+            cache_init(arch_mig_state.xbrle_cache_size);
+        }
+
          /* Make sure all dirty bits are set */
          QLIST_FOREACH(block,&ram_list.blocks, next) {
              for (addr = block->offset; addr<  block->offset + block->length;
@@ -264,8 +736,9 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, 
void *opaque)
      while (!qemu_file_rate_limit(f)) {
          int bytes_sent;

-        bytes_sent = ram_save_block(f);
+        bytes_sent = ram_save_block(f, stage);
          bytes_transferred += bytes_sent;
+        bench.iterations++;
          if (bytes_sent == 0) { /* no more blocks */
              break;
          }
@@ -285,19 +758,83 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage, 
void *opaque)
          int bytes_sent;

          /* flush all remaining blocks regardless of rate limiting */
-        while ((bytes_sent = ram_save_block(f)) != 0) {
+        while ((bytes_sent = ram_save_block(f, stage))) {
              bytes_transferred += bytes_sent;
          }
          cpu_physical_memory_set_dirty_tracking(0);
+        if (arch_mig_state.use_xbrle) {
+            cache_fini();
+#ifdef DEBUG_ARCH_INIT
+            dump_migration_statistics();
+#endif
+        }
      }

      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);

      expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;

+    DPRINTF("ram_save_live: expected(%ld)<= max(%ld)?\n", expected_time,
+        migrate_max_downtime());
+
      return (stage == 2)&&  (expected_time<= migrate_max_downtime());
  }

+static int load_xbrle(QEMUFile *f, ram_addr_t addr, void *host)
+{
+    int ret, rc = -1;
+    uint8_t *prev_page, *xor_buf, *xbrle_buf;
+    XBRLEHeader hdr = {0};
+
+    /* extract RLE header */
+    qemu_get_buffer(f, (uint8_t *)&hdr, sizeof(hdr));
+    if (!(hdr.xh_flags&  ENCODING_FLAG_XBRLE)) {
+        fprintf(stderr, "Failed to load XBRLE page - wrong compression!\n");
+        goto done;
+    }
+
+    if (hdr.xh_len>  TARGET_PAGE_SIZE) {
+        fprintf(stderr, "Failed to load XBRLE page - len overflow!\n");
+        goto done;
+    }
+
+    /* load data and decode */
+    xbrle_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
+    qemu_get_buffer(f, xbrle_buf, hdr.xh_len);
+
+    /* decode RLE */
+    xor_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
+    ret = rle_decode(xbrle_buf, hdr.xh_len, xor_buf, TARGET_PAGE_SIZE);
+    if (ret == -1) {
+        fprintf(stderr, "Failed to load XBRLE page - decode error!\n");
+        goto done;
+    }
+
+    if (ret != TARGET_PAGE_SIZE) {
+        fprintf(stderr, "Failed to load XBRLE page - size %d expected %d!\n",
+            ret, TARGET_PAGE_SIZE);
+        goto done;
+    }
+
+    /* decode XOR delta */
+    prev_page = host;
+    xor_encode(prev_page, prev_page, xor_buf);
+#ifdef DEBUG_ARCH_INIT_CKSUM
+    if (hdr.xh_flags&  ENCODING_FLAG_CKSUM&&
+            hdr.xh_cksum != page_cksum(prev_page)) {
+        fprintf(stderr, "Failed to load XBRLE page - bad checksum!\n");
+        goto done;
+    }
+#endif
+
+    PAGE_LOG(addr, prev_page, "XBRLE page (enc len %d)\n", hdr.xh_len);
+    rc = 0;
+done:
+    qemu_free(xor_buf);
+    qemu_free(xbrle_buf);
+    return rc;
+}
+
  static inline void *host_from_stream_offset(QEMUFile *f,
                                              ram_addr_t offset,
                                              int flags)
@@ -328,16 +865,38 @@ static inline void *host_from_stream_offset(QEMUFile *f,
      return NULL;
  }

+static inline void *host_from_stream_offset_versioned(int version_id,
+        QEMUFile *f, ram_addr_t offset, int flags)
+{
+        void *host;
+        if (version_id == 3) {
+                host = qemu_get_ram_ptr(offset);
+        } else {
+                host = host_from_stream_offset(f, offset, flags);
+        }
+        if (!host) {
+            fprintf(stderr, "Failed to convert RAM address to host"
+                    " for offset 0x%lX!\n", offset);
+            abort();
+        }
+        return host;
+}
+
  int ram_load(QEMUFile *f, void *opaque, int version_id)
  {
      ram_addr_t addr;
-    int flags;
+    int flags, ret = 0;
+    static uint64_t seq_iter;
+
+    seq_iter++;

      if (version_id<  3 || version_id>  4) {
-        return -EINVAL;
+        ret = -EINVAL;
+        goto done;
      }

      do {
+        void *host;
          addr = qemu_get_be64(f);

          flags = addr&  ~TARGET_PAGE_MASK;
@@ -346,7 +905,8 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
          if (flags&  RAM_SAVE_FLAG_MEM_SIZE) {
              if (version_id == 3) {
                  if (addr != ram_bytes_total()) {
-                    return -EINVAL;
+                    ret = -EINVAL;
+                    goto done;
                  }
              } else {
                  /* Synchronize RAM block list */
@@ -365,8 +925,10 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)

                      QLIST_FOREACH(block,&ram_list.blocks, next) {
                          if (!strncmp(id, block->idstr, sizeof(id))) {
-                            if (block->length != length)
-                                return -EINVAL;
+                            if (block->length != length) {
+                                ret = -EINVAL;
+                                goto done;
+                            }
                              break;
                          }
                      }
@@ -374,7 +936,8 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
                      if (!block) {
                          fprintf(stderr, "Unknown ramblock \"%s\", cannot "
                                  "accept migration\n", id);
-                        return -EINVAL;
+                        ret = -EINVAL;
+                        goto done;
                      }

                      total_ram_bytes -= length;
@@ -383,17 +946,10 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
          }

          if (flags&  RAM_SAVE_FLAG_COMPRESS) {
-            void *host;
              uint8_t ch;

-            if (version_id == 3)
-                host = qemu_get_ram_ptr(addr);
-            else
-                host = host_from_stream_offset(f, addr, flags);
-            if (!host) {
-                return -EINVAL;
-            }
-
+            host = host_from_stream_offset_versioned(version_id,
+                            f, addr, flags);
              ch = qemu_get_byte(f);
              memset(host, ch, TARGET_PAGE_SIZE);
  #ifndef _WIN32
@@ -402,22 +958,31 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
                  qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
              }
  #endif
+            PAGE_LOG(addr, host, "DUP page\n");
          } else if (flags&  RAM_SAVE_FLAG_PAGE) {
-            void *host;
-
-            if (version_id == 3)
-                host = qemu_get_ram_ptr(addr);
-            else
-                host = host_from_stream_offset(f, addr, flags);
-
+            host = host_from_stream_offset_versioned(version_id,
+                            f, addr, flags);
              qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
+            PAGE_LOG(addr, host, "NORMAL page\n");
+        } else if (flags&  RAM_SAVE_FLAG_XBRLE) {
+            host = host_from_stream_offset_versioned(version_id,
+                            f, addr, flags);
+            if (load_xbrle(f, addr, host)<  0) {
+                ret = -EINVAL;
+                goto done;
+            }
          }
+
          if (qemu_file_has_error(f)) {
-            return -EIO;
+            ret = -EIO;
+            goto done;
          }
      } while (!(flags&  RAM_SAVE_FLAG_EOS));

-    return 0;
+done:
+    DPRINTF("Completed load of VM with exit code %d seq iteration %ld\n",
+            ret, seq_iter);
+    return ret;
  }

  void qemu_service_io(void)
diff --git a/block-migration.c b/block-migration.c
index 3e66f49..004fc12 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -689,7 +689,8 @@ static int block_load(QEMUFile *f, void *opaque, int 
version_id)
      return 0;
  }

-static void block_set_params(int blk_enable, int shared_base, void *opaque)
+static void block_set_params(int blk_enable, int shared_base,
+       int use_xbrle, int64_t xbrle_cache_size, void *opaque)
  {
      block_mig_state.blk_enable = blk_enable;
      block_mig_state.shared_base = shared_base;
diff --git a/hmp-commands.hx b/hmp-commands.hx
index e5585ba..e49d5be 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -717,24 +717,27 @@ ETEXI

      {
          .name       = "migrate",
-        .args_type  = "detach:-d,blk:-b,inc:-i,uri:s",
-        .params     = "[-d] [-b] [-i] uri",
-        .help       = "migrate to URI (using -d to not wait for completion)"
-                     "\n\t\t\t -b for migration without shared storage with"
-                     " full copy of disk\n\t\t\t -i for migration without "
-                     "shared storage with incremental copy of disk "
-                     "(base image shared between src and destination)",
+        .args_type  = "detach:-d,blk:-b,inc:-i,xbrle:-x,uri:s",
+        .params     = "[-d] [-b] [-i] [-x] uri",
+        .help       = "migrate to URI"
+                      "\n\t -d to not wait for completion"
+                      "\n\t -b for migration without shared storage with"
+                      " full copy of disk"
+                      "\n\t -i for migration without"
+                      " shared storage with incremental copy of disk"
+                      " (base image shared between source and destination)"
+                      "\n\t -x to use XBRLE page delta compression",
          .user_print = monitor_user_noop,
         .mhandler.cmd_new = do_migrate,
      },

-
  STEXI
address@hidden migrate [-d] [-b] [-i] @var{uri}
address@hidden migrate [-d] [-b] [-i] [-x] @var{uri}
  @findex migrate
  Migrate to @var{uri} (using -d to not wait for completion).
         -b for migration with full copy of disk
         -i for migration with incremental copy of disk (base image is shared)
+    -x to use XBRLE page delta compression
  ETEXI

      {
@@ -753,10 +756,23 @@ Cancel the current VM migration.
  ETEXI

      {
+        .name       = "migrate_set_cachesize",
+        .args_type  = "value:s",
+        .params     = "value",
+        .help       = "set cache size (in MB) for XBRLE migrations",
+        .mhandler.cmd = do_migrate_set_cachesize,
+    },
+
+STEXI
address@hidden migrate_set_cachesize @var{value}
+Set cache size (in MB) for xbrle migrations.
+ETEXI
+
+    {
          .name       = "migrate_set_speed",
          .args_type  = "value:o",
          .params     = "value",
-        .help       = "set maximum speed (in bytes) for migrations. "
+        .help       = "set maximum XBRLE cache size (in bytes) for migrations. 
"
         "Defaults to MB if no size suffix is specified, ie. B/K/M/G/T",
          .user_print = monitor_user_noop,
          .mhandler.cmd_new = do_migrate_set_speed,
diff --git a/hw/hw.h b/hw/hw.h
index 9d2cfc2..de9e5a6 100644
--- a/hw/hw.h
+++ b/hw/hw.h
@@ -239,7 +239,8 @@ static inline void qemu_get_sbe64s(QEMUFile *f, int64_t *pv)
  int64_t qemu_ftell(QEMUFile *f);
  int64_t qemu_fseek(QEMUFile *f, int64_t pos, int whence);

-typedef void SaveSetParamsHandler(int blk_enable, int shared, void * opaque);
+typedef void SaveSetParamsHandler(int blk_enable, int shared,
+       int use_xbrle, int64_t xbrle_cache_size, void * opaque);
  typedef void SaveStateHandler(QEMUFile *f, void *opaque);
  typedef int SaveLiveStateHandler(Monitor *mon, QEMUFile *f, int stage,
                                   void *opaque);
diff --git a/migration-exec.c b/migration-exec.c
index 14718dd..fe8254a 100644
--- a/migration-exec.c
+++ b/migration-exec.c
@@ -67,7 +67,9 @@ MigrationState *exec_start_outgoing_migration(Monitor *mon,
                                               int64_t bandwidth_limit,
                                               int detach,
                                               int blk,
-                                             int inc)
+                          int inc,
+                          int use_xbrle,
+                          int64_t xbrle_cache_size)
  {
      FdMigrationState *s;
      FILE *f;
@@ -99,6 +101,8 @@ MigrationState *exec_start_outgoing_migration(Monitor *mon,

      s->mig_state.blk = blk;
      s->mig_state.shared = inc;
+    s->mig_state.use_xbrle = use_xbrle;
+    s->mig_state.xbrle_cache_size = xbrle_cache_size;

      s->state = MIG_STATE_ACTIVE;
      s->mon = NULL;
diff --git a/migration-fd.c b/migration-fd.c
index 6d14505..4a1ddbd 100644
--- a/migration-fd.c
+++ b/migration-fd.c
@@ -56,7 +56,9 @@ MigrationState *fd_start_outgoing_migration(Monitor *mon,
                                             int64_t bandwidth_limit,
                                             int detach,
                                             int blk,
-                                           int inc)
+                        int inc,
+                        int use_xbrle,
+                        int64_t xbrle_cache_size)
  {
      FdMigrationState *s;

@@ -82,6 +84,8 @@ MigrationState *fd_start_outgoing_migration(Monitor *mon,

      s->mig_state.blk = blk;
      s->mig_state.shared = inc;
+    s->mig_state.use_xbrle = use_xbrle;
+    s->mig_state.xbrle_cache_size = xbrle_cache_size;

      s->state = MIG_STATE_ACTIVE;
      s->mon = NULL;
diff --git a/migration-tcp.c b/migration-tcp.c
index b55f419..4ca5bf6 100644
--- a/migration-tcp.c
+++ b/migration-tcp.c
@@ -81,7 +81,9 @@ MigrationState *tcp_start_outgoing_migration(Monitor *mon,
                                               int64_t bandwidth_limit,
                                               int detach,
                                              int blk,
-                                            int inc)
+                         int inc,
+                         int use_xbrle,
+                         int64_t xbrle_cache_size)
  {
      struct sockaddr_in addr;
      FdMigrationState *s;
@@ -101,6 +103,8 @@ MigrationState *tcp_start_outgoing_migration(Monitor *mon,

      s->mig_state.blk = blk;
      s->mig_state.shared = inc;
+    s->mig_state.use_xbrle = use_xbrle;
+    s->mig_state.xbrle_cache_size = xbrle_cache_size;

      s->state = MIG_STATE_ACTIVE;
      s->mon = NULL;
diff --git a/migration-unix.c b/migration-unix.c
index 57232c0..0813902 100644
--- a/migration-unix.c
+++ b/migration-unix.c
@@ -80,7 +80,9 @@ MigrationState *unix_start_outgoing_migration(Monitor *mon,
                                               int64_t bandwidth_limit,
                                               int detach,
                                               int blk,
-                                             int inc)
+                          int inc,
+                          int use_xbrle,
+                          int64_t xbrle_cache_size)
  {
      FdMigrationState *s;
      struct sockaddr_un addr;
@@ -100,6 +102,8 @@ MigrationState *unix_start_outgoing_migration(Monitor *mon,

      s->mig_state.blk = blk;
      s->mig_state.shared = inc;
+    s->mig_state.use_xbrle = use_xbrle;
+    s->mig_state.xbrle_cache_size = xbrle_cache_size;

      s->state = MIG_STATE_ACTIVE;
      s->mon = NULL;
diff --git a/migration.c b/migration.c
index 9ee8b17..b5b530b 100644
--- a/migration.c
+++ b/migration.c
@@ -34,6 +34,9 @@
  /* Migration speed throttling */
  static uint32_t max_throttle = (32<<  20);

+/* Migration XBRLE cache size */
+static int64_t migrate_cache_size = 0x8000000; /* 256 MB size */
+
  static MigrationState *current_migration;

  int qemu_start_incoming_migration(const char *uri)
@@ -80,6 +83,7 @@ int do_migrate(Monitor *mon, const QDict *qdict, QObject 
**ret_data)
      int detach = qdict_get_try_bool(qdict, "detach", 0);
      int blk = qdict_get_try_bool(qdict, "blk", 0);
      int inc = qdict_get_try_bool(qdict, "inc", 0);
+    int use_xbrle = qdict_get_try_bool(qdict, "xbrle", 0);
      const char *uri = qdict_get_str(qdict, "uri");

      if (current_migration&&
@@ -90,17 +94,21 @@ int do_migrate(Monitor *mon, const QDict *qdict, QObject 
**ret_data)

      if (strstart(uri, "tcp:",&p)) {
          s = tcp_start_outgoing_migration(mon, p, max_throttle, detach,
-                                         blk, inc);
+                                         blk, inc, use_xbrle,
+                                         migrate_cache_size);
  #if !defined(WIN32)
      } else if (strstart(uri, "exec:",&p)) {
          s = exec_start_outgoing_migration(mon, p, max_throttle, detach,
-                                          blk, inc);
+                                          blk, inc, use_xbrle,
+                                          migrate_cache_size);
      } else if (strstart(uri, "unix:",&p)) {
          s = unix_start_outgoing_migration(mon, p, max_throttle, detach,
-                                          blk, inc);
+                                          blk, inc, use_xbrle,
+                                          migrate_cache_size);
      } else if (strstart(uri, "fd:",&p)) {
          s = fd_start_outgoing_migration(mon, p, max_throttle, detach,
-                                        blk, inc);
+                                        blk, inc, use_xbrle,
+                                        migrate_cache_size);
  #endif
      } else {
          monitor_printf(mon, "unknown migration protocol: %s\n", uri);
@@ -341,7 +349,8 @@ void migrate_fd_connect(FdMigrationState *s)

      DPRINTF("beginning savevm\n");
      ret = qemu_savevm_state_begin(s->mon, s->file, s->mig_state.blk,
-                                  s->mig_state.shared);
+                                  s->mig_state.shared, s->mig_state.use_xbrle,
+                                  s->mig_state.xbrle_cache_size);
      if (ret<  0) {
          DPRINTF("failed, %d\n", ret);
          migrate_fd_error(s);
@@ -448,3 +457,17 @@ int migrate_fd_close(void *opaque)
      qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
      return s->close(s);
  }
+
+void do_migrate_set_cachesize(Monitor *mon, const QDict *qdict)
+{
+    ssize_t bytes;
+    char *ptr;
+    const char *value = qdict_get_str(qdict, "value");
+
+    bytes = strtod(value,&ptr);
+    if (bytes>  0) {
+        migrate_cache_size = bytes;
+        monitor_printf(mon, "Cache size set to %ld bytes\n", bytes);
+    }
+}
+
diff --git a/migration.h b/migration.h
index d13ed4f..62be2f8 100644
--- a/migration.h
+++ b/migration.h
@@ -32,6 +32,8 @@ struct MigrationState
      void (*release)(MigrationState *s);
      int blk;
      int shared;
+    int use_xbrle;
+    int64_t xbrle_cache_size;
  };

  typedef struct FdMigrationState FdMigrationState;
@@ -76,7 +78,9 @@ MigrationState *exec_start_outgoing_migration(Monitor *mon,
                                               int64_t bandwidth_limit,
                                               int detach,
                                               int blk,
-                                             int inc);
+                          int inc,
+                          int use_xbrle,
+                          int64_t xbrle_cache_size);

  int tcp_start_incoming_migration(const char *host_port);

@@ -85,7 +89,9 @@ MigrationState *tcp_start_outgoing_migration(Monitor *mon,
                                              int64_t bandwidth_limit,
                                              int detach,
                                              int blk,
-                                            int inc);
+                         int inc,
+                         int use_xbrle,
+                         int64_t xbrle_cache_size);

  int unix_start_incoming_migration(const char *path);

@@ -94,7 +100,9 @@ MigrationState *unix_start_outgoing_migration(Monitor *mon,
                                               int64_t bandwidth_limit,
                                               int detach,
                                               int blk,
-                                             int inc);
+                          int inc,
+                          int use_xbrle,
+                          int64_t xbrle_cache_size);

  int fd_start_incoming_migration(const char *path);

@@ -103,7 +111,9 @@ MigrationState *fd_start_outgoing_migration(Monitor *mon,
                                             int64_t bandwidth_limit,
                                             int detach,
                                             int blk,
-                                           int inc);
+                        int inc,
+                        int use_xbrle,
+                        int64_t xbrle_cache_size);

  void migrate_fd_monitor_suspend(FdMigrationState *s, Monitor *mon);

@@ -134,4 +144,9 @@ static inline FdMigrationState 
*migrate_to_fms(MigrationState *mig_state)
      return container_of(mig_state, FdMigrationState, mig_state);
  }

+void do_migrate_set_cachesize(Monitor *mon, const QDict *qdict);
+
+void arch_set_params(int blk_enable, int shared_base,
+        int use_xbrle, int64_t xbrle_cache_size, void *opaque);
+
  #endif
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 793cf1c..8fbe64b 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -431,13 +431,16 @@ EQMP

      {
          .name       = "migrate",
-        .args_type  = "detach:-d,blk:-b,inc:-i,uri:s",
-        .params     = "[-d] [-b] [-i] uri",
-        .help       = "migrate to URI (using -d to not wait for completion)"
-                     "\n\t\t\t -b for migration without shared storage with"
-                     " full copy of disk\n\t\t\t -i for migration without "
-                     "shared storage with incremental copy of disk "
-                     "(base image shared between src and destination)",
+        .args_type  = "detach:-d,blk:-b,inc:-i,xbrle:-x,uri:s",
+        .params     = "[-d] [-b] [-i] [-x] uri",
+        .help       = "migrate to URI"
+                      "\n\t -d to not wait for completion"
+                      "\n\t -b for migration without shared storage with"
+                      " full copy of disk"
+                      "\n\t -i for migration without"
+                      " shared storage with incremental copy of disk"
+                      " (base image shared between source and destination)"
+                      "\n\t -x to use XBRLE page delta compression",
          .user_print = monitor_user_noop,
         .mhandler.cmd_new = do_migrate,
      },
@@ -453,6 +456,7 @@ Arguments:
  - "blk": block migration, full disk copy (json-bool, optional)
  - "inc": incremental disk copy (json-bool, optional)
  - "uri": Destination URI (json-string)
+- "xbrle": to use XBRLE page delta compression

  Example:

@@ -494,6 +498,31 @@ Example:
  EQMP

      {
+        .name       = "migrate_set_cachesize",
+        .args_type  = "value:s",
+        .params     = "value",
+        .help       = "set cache size (in MB) for xbrle migrations",
+        .mhandler.cmd = do_migrate_set_cachesize,
+    },
+
+SQMP
+migrate_set_cachesize
+---------------------
+
+Set cache size to be used by XBRLE migration
+
+Arguments:
+
+- "value": cache size in bytes (json-number)
+
+Example:
+
+->  { "execute": "migrate_set_cachesize", "arguments": { "value": 500M } }
+<- { "return": {} }
+
+EQMP
+
+    {
          .name       = "migrate_set_speed",
          .args_type  = "value:f",
          .params     = "value",
diff --git a/savevm.c b/savevm.c
index 4e49765..93b512b 100644
--- a/savevm.c
+++ b/savevm.c
@@ -1141,7 +1141,8 @@ int register_savevm(DeviceState *dev,
                      void *opaque)
  {
      return register_savevm_live(dev, idstr, instance_id, version_id,
-                                NULL, NULL, save_state, load_state, opaque);
+                                arch_set_params, NULL, save_state,
+                                load_state, opaque);
  }

  void unregister_savevm(DeviceState *dev, const char *idstr, void *opaque)
@@ -1428,15 +1429,17 @@ static int vmstate_save(QEMUFile *f, SaveStateEntry *se)
  #define QEMU_VM_SUBSECTION           0x05

  int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int blk_enable,
-                            int shared)
+                            int shared, int use_xbrle,
+                            int64_t xbrle_cache_size)
  {
      SaveStateEntry *se;

      QTAILQ_FOREACH(se,&savevm_handlers, entry) {
          if(se->set_params == NULL) {
              continue;
-       }
-       se->set_params(blk_enable, shared, se->opaque);
+        }
+        se->set_params(blk_enable, shared, use_xbrle, xbrle_cache_size,
+                se->opaque);
      }

      qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
@@ -1577,7 +1580,7 @@ static int qemu_savevm_state(Monitor *mon, QEMUFile *f)

      bdrv_flush_all();

-    ret = qemu_savevm_state_begin(mon, f, 0, 0);
+    ret = qemu_savevm_state_begin(mon, f, 0, 0, 0, 0);
      if (ret<  0)
          goto out;

diff --git a/sysemu.h b/sysemu.h
index b81a70e..15a0664 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -74,7 +74,8 @@ void qemu_announce_self(void);
  void main_loop_wait(int nonblocking);

  int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int blk_enable,
-                            int shared);
+                            int shared, int use_xbrle,
+                            int64_t xbrle_cache_size);
  int qemu_savevm_state_iterate(Monitor *mon, QEMUFile *f);
  int qemu_savevm_state_complete(Monitor *mon, QEMUFile *f);
  void qemu_savevm_state_cancel(Monitor *mon, QEMUFile *f);







reply via email to

[Prev in Thread] Current Thread [Next in Thread]