[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH] XBRLE page delta compression for live migration
From: |
Shribman, Aidan |
Subject: |
Re: [Qemu-devel] [PATCH] XBRLE page delta compression for live migration of large memory apps |
Date: |
Mon, 13 Jun 2011 15:10:37 +0200 |
The proposed patch slipped by with no apparent response - kindly provide
feedback,
Aidan
> -----Original Message-----
> From: address@hidden
> [mailto:address@hidden
> On Behalf Of Shribman, Aidan
> Sent: Sunday, May 22, 2011 3:01 PM
> To: address@hidden
> Subject: [Qemu-devel] [PATCH] XBRLE page delta compression
> for live migration of large memory apps
>
> Subject: [PATCH] XBRLE page delta compression for live
> migration of large memory apps
> From: Aidan Shribman <address@hidden>
>
> By using XBRLE (Xor Based Run-Length-Encoding) we can reduce required
> bandwidth for transfering of dirty memory pages during live migration
> migrate_set_cachesize <size>
> migrate -x <url>
> Qemu host: Ubuntu 10.10
> Testing: live migration (w and w/o XBRLE) tested successfully.
>
> Signed-off-by: Benoit Hudzia <address@hidden>
> Signed-off-by: Petter Svard <address@hidden>
> Signed-off-by: Aidan Shribman <address@hidden>
>
> ---
>
> arch_init.c | 647
> +++++++++++++++++++++++++++++++++++++++++++++++++----
> block-migration.c | 3 +-
> hmp-commands.hx | 36 +++-
> hw/hw.h | 3 +-
> migration-exec.c | 6 +-
> migration-fd.c | 6 +-
> migration-tcp.c | 6 +-
> migration-unix.c | 6 +-
> migration.c | 33 +++-
> migration.h | 23 ++-
> qmp-commands.hx | 43 +++-
> savevm.c | 13 +-
> sysemu.h | 3 +-
> 13 files changed, 749 insertions(+), 79 deletions(-)
>
> diff --git a/arch_init.c b/arch_init.c
> index 4486925..069cd67 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -27,6 +27,7 @@
> #include <sys/types.h>
> #include <sys/mman.h>
> #endif
> +#include <assert.h>
> #include "config.h"
> #include "monitor.h"
> #include "sysemu.h"
> @@ -41,6 +42,24 @@
> #include "gdbstub.h"
> #include "hw/smbios.h"
>
> +//#define DEBUG_ARCH_INIT
> +#ifdef DEBUG_ARCH_INIT
> +#define DPRINTF(fmt, ...) \
> + do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__);
> } while (0)
> +#else
> +#define DPRINTF(fmt, ...) \
> + do { } while (0)
> +#endif
> +
> +//#define DEBUG_ARCH_INIT_CKSUM
> +#ifdef DEBUG_ARCH_INIT_CKSUM
> +#define PAGE_LOG(addr, pdata, fmt, ...) \
> + do { page_log(addr, pdata, fmt, ## __VA_ARGS__); } while (0)
> +#else
> +#define PAGE_LOG(addr, pdata, fmt, ...) \
> + do { } while (0)
> +#endif
> +
> #ifdef TARGET_SPARC
> int graphic_width = 1024;
> int graphic_height = 768;
> @@ -88,6 +107,402 @@ const uint32_t arch_type = QEMU_ARCH;
> #define RAM_SAVE_FLAG_PAGE 0x08
> #define RAM_SAVE_FLAG_EOS 0x10
> #define RAM_SAVE_FLAG_CONTINUE 0x20
> +#define RAM_SAVE_FLAG_XBRLE 0x40
> +
> +/***********************************************************/
> +/* Page cache for storing previous pages as basis for XBRLE
> compression */
> +#define CACHE_N_WAY 2 /* 2-way assossiative cache */
> +
> +typedef struct CacheItem {
> + ram_addr_t it_addr;
> + unsigned long it_age;
> + uint8_t *it_data;
> +} CacheItem;
> +
> +typedef struct CacheBucket {
> + CacheItem bkt_item[CACHE_N_WAY];
> +} CacheBucket;
> +
> +static CacheBucket *page_cache;
> +static int64_t cache_num_buckets;
> +static uint64_t cache_max_item_age;
> +static int64_t cache_num_items;
> +
> +static void cache_init(ssize_t num_buckets);
> +static void cache_fini(void);
> +static int cache_is_cached(ram_addr_t addr);
> +static int cache_get_oldest(CacheBucket *buck);
> +static int cache_get_newest(CacheBucket *buck, ram_addr_t addr);
> +static void cache_insert(ram_addr_t id, uint8_t *pdata);
> +static unsigned long cache_get_cache_pos(ram_addr_t address);
> +static CacheItem *cache_item_get(unsigned long pos, int item);
> +
> +/***********************************************************/
> +/* RAM Migration State */
> +typedef struct ArchMigrationState {
> + int use_xbrle;
> + int64_t xbrle_cache_size;
> +} ArchMigrationState;
> +
> +static ArchMigrationState arch_mig_state;
> +
> +void arch_set_params(int blk_enable, int shared_base, int use_xbrle,
> + int64_t xbrle_cache_size, void *opaque)
> +{
> + arch_mig_state.use_xbrle = use_xbrle;
> + arch_mig_state.xbrle_cache_size = xbrle_cache_size;
> +}
> +
> +/***********************************************************/
> +/* XBRLE (Xor Based Run-Length Encoding) */
> +typedef struct XBRLEHeader {
> + uint8_t xh_flags;
> + uint16_t xh_len;
> + uint32_t xh_cksum;
> +} XBRLEHeader;
> +
> +static int rle_encode(uint8_t *src, int slen, uint8_t *dst,
> int dlen);
> +static int rle_decode(uint8_t *src, int slen, uint8_t *dst,
> int dlen);
> +#ifdef DEBUG_ARCH_INIT_CKSUM
> +static uint32_t page_cksum(uint8_t *buf);
> +static void page_log(ram_addr_t addr, uint8_t *pdata, const
> char *fmt, ...);
> +#endif
> +
> +/***********************************************************/
> +/* benchmarking */
> +typedef struct BenchInfo {
> + uint64_t normal_pages;
> + uint64_t xbrle_pages;
> + uint64_t xbrle_bytes;
> + uint64_t xbrle_pages_aborted;
> + uint64_t dup_pages;
> + uint64_t iterations;
> +} BenchInfo;
> +
> +static BenchInfo bench;
> +
> +/***********************************************************/
> +/* XBRLE page cache implementation */
> +static CacheItem *cache_item_get(unsigned long pos, int item)
> +{
> + assert(page_cache);
> + return &page_cache[pos].bkt_item[item];
> +}
> +
> +#ifdef DEBUG_ARCH_INIT_CKSUM
> +static int64_t cache_max_items(void)
> +{
> + return cache_num_buckets * CACHE_N_WAY;
> +}
> +#endif /* DEBUG_ARCH_INIT_CKSUM */
> +
> +static void cache_init(int64_t num_bytes)
> +{
> + int i;
> +
> + cache_num_items = 0;
> + cache_max_item_age = 0;
> + cache_num_buckets = num_bytes / (TARGET_PAGE_SIZE * CACHE_N_WAY);
> + assert(cache_num_buckets);
> + DPRINTF("Setting cache buckets to %ld\n", cache_num_buckets);
> +
> + assert(!page_cache);
> + page_cache = (CacheBucket *)qemu_mallocz((cache_num_buckets) *
> + sizeof(CacheBucket));
> +
> + for (i = 0; i < cache_num_buckets; i++) {
> + int j;
> + for (j = 0; j < CACHE_N_WAY; j++) {
> + CacheItem *it = cache_item_get(i, j);
> + it->it_data = NULL;
> + it->it_age = 0;
> + it->it_addr = -1;
> + }
> + }
> +}
> +
> +static void cache_fini(void)
> +{
> + int i;
> +
> + assert(page_cache);
> +
> + for (i = 0; i < cache_num_buckets; i++) {
> + int j;
> + for (j = 0; j < CACHE_N_WAY; j++) {
> + CacheItem *it = cache_item_get(i, j);
> + qemu_free(it->it_data);
> + it->it_data = 0;
> + }
> + }
> +
> + qemu_free(page_cache);
> + page_cache = NULL;
> +}
> +
> +static unsigned long cache_get_cache_pos(ram_addr_t address)
> +{
> + unsigned long pos;
> +
> + assert(cache_num_buckets);
> + pos = (address/TARGET_PAGE_SIZE) & (cache_num_buckets - 1);
> + return pos;
> +}
> +
> +static int cache_get_newest(CacheBucket *buck, ram_addr_t addr)
> +{
> + unsigned long big = 0;
> + int big_pos = -1;
> + int j;
> +
> + assert(page_cache);
> +
> + for (j = 0; j < CACHE_N_WAY; j++) {
> + CacheItem *it = &buck->bkt_item[j];
> +
> + if (it->it_addr != addr) {
> + continue;
> + }
> +
> + if (!j || it->it_age > big) {
> + big = it->it_age;
> + big_pos = j;
> + }
> + }
> +
> + return big_pos;
> +}
> +
> +static int cache_get_oldest(CacheBucket *buck)
> +{
> + unsigned long small = 0;
> + int small_pos = -1;
> + int j;
> +
> + assert(page_cache);
> +
> + for (j = 0; j < CACHE_N_WAY; j++) {
> + CacheItem *it = &buck->bkt_item[j];
> +
> + if (!j || it->it_age < small) {
> + small = it->it_age;
> + small_pos = j;
> + }
> + }
> +
> + return small_pos;
> +}
> +
> +static int cache_is_cached(ram_addr_t addr)
> +{
> + unsigned long pos = cache_get_cache_pos(addr);
> +
> + assert(page_cache);
> + CacheBucket *bucket = &page_cache[pos];
> + return cache_get_newest(bucket, addr);
> +}
> +
> +static void cache_insert(unsigned long addr, uint8_t *pdata)
> +{
> + unsigned long pos;
> + int slot = -1;
> + CacheBucket *bucket;
> +
> + pos = cache_get_cache_pos(addr);
> + assert(page_cache);
> + bucket = &page_cache[pos];
> + slot = cache_get_oldest(bucket); /* evict LRU */
> +
> + /* actual update of entry */
> + CacheItem *it = cache_item_get(pos, slot);
> + if (!it->it_data) {
> + cache_num_items++;
> + }
> + qemu_free(it->it_data);
> + it->it_data = pdata;
> + it->it_age = ++cache_max_item_age;
> + it->it_addr = addr;
> +}
> +
> +/* XBRLE (Xor Based Run-Length Encoding) */
> +static int rle_encode(uint8_t *src, int slen, uint8_t *dst, int dlen)
> +{
> + int d = 0, ch_run = 0, i;
> + uint8_t prev, ch;
> +
> + for (i = 0; i <= slen; i++) {
> + if (i != slen) {
> + ch = src[i];
> + }
> +
> + if (!i || (i != slen && ch == prev && ch_run < 255)) {
> + ch_run++;
> + } else {
> + if (d+2 > dlen)
> + return -1;
> + *dst++ = ch_run;
> + *dst++ = prev;
> + d += 2;
> + ch_run = 1;
> + }
> +
> + prev = ch;
> + }
> + return d;
> +}
> +
> +static int rle_decode(uint8_t *src, int slen, uint8_t *dst, int dlen)
> +{
> + int d = 0, s;
> +
> + for (s = 0; s < slen-1; s += 2) {
> + uint8_t ch_run = src[s];
> + uint8_t ch = src[s+1];
> + while (ch_run--) {
> + if (d == dlen) {
> + return -1;
> + }
> + dst[d] = ch;
> + d++;
> + }
> + }
> + return d;
> +}
> +
> +#define PAGE_SAMPLE_PERCENT 0.02
> +#define PAGE_SAMPLE_SIZE (TARGET_PAGE_SIZE * PAGE_SAMPLE_PERCENT)
> +#define BYTES_CHANGED_PERCENT 0.30
> +
> +static int is_page_good_for_xbrle(uint8_t *old, uint8_t *new)
> +{
> + int i, bytes_changed = 0;
> +
> + srand(time(NULL)+getpid()+getpid()*987654+rand());
> +
> + for (i = 0; i < PAGE_SAMPLE_SIZE; i++) {
> + unsigned long pos = (int) (rand() * TARGET_PAGE_SIZE
> / (RAND_MAX+1.0));
> +
> + if (old[pos] != new[pos]) {
> + bytes_changed++;
> + }
> + }
> +
> + return (((float) bytes_changed) / PAGE_SAMPLE_SIZE) <
> BYTES_CHANGED_PERCENT;
> +}
> +
> +static void xor_encode(uint8_t *dst, uint8_t *src1, uint8_t *src2)
> +{
> + int i;
> +
> + for (i = 0; i < TARGET_PAGE_SIZE; i++) {
> + dst[i] = src1[i] ^ src2[i];
> + }
> +}
> +
> +static void save_block_hdr(QEMUFile *f,
> + RAMBlock *block, ram_addr_t offset, int cont, int flag)
> +{
> + qemu_put_be64(f, offset | cont | flag);
> + if (!cont) {
> + qemu_put_byte(f, strlen(block->idstr));
> + qemu_put_buffer(f, (uint8_t *)block->idstr,
> + strlen(block->idstr));
> + }
> +}
> +
> +#define ENCODING_FLAG_XBRLE 0x1
> +#define ENCODING_FLAG_CKSUM 0x2
> +
> +static int save_xbrle_page(QEMUFile *f, uint8_t *current_data,
> + ram_addr_t current_addr, RAMBlock *block, ram_addr_t
> offset, int cont)
> +{
> + int cache_location = -1, slot = -1, encoded_len = 0,
> bytes_sent = 0;
> + XBRLEHeader hdr = {0};
> + CacheItem *it;
> + uint8_t *xor_buf = NULL, *xbrle_buf = NULL;
> +
> + /* get location */
> + slot = cache_is_cached(current_addr);
> + if (slot == -1) {
> + goto done;
> + }
> + cache_location = cache_get_cache_pos(current_addr);
> +
> + /* abort if page changed too much */
> + it = cache_item_get(cache_location, slot);
> + if (!is_page_good_for_xbrle(it->it_data, current_data)) {
> + DPRINTF("Page changed too much! Aborting XBRLE.\n");
> + bench.xbrle_pages_aborted++;
> + goto done;
> + }
> +
> + /* XOR encoding */
> + xor_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
> + xor_encode(xor_buf, it->it_data, current_data);
> +
> + /* XBRLE (XOR+RLE) encoding */
> + xbrle_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
> + encoded_len = rle_encode(xor_buf, TARGET_PAGE_SIZE, xbrle_buf,
> + TARGET_PAGE_SIZE);
> +
> + if (encoded_len < 0) {
> + DPRINTF("XBRLE encoding oeverflow - sending uncompressed\n");
> + goto done;
> + }
> +
> + hdr.xh_len = encoded_len;
> + hdr.xh_flags |= ENCODING_FLAG_XBRLE;
> +#ifdef DEBUG_ARCH_INIT_CKSUM
> + hdr.xh_cksum = page_cksum(current_data);
> + hdr.xh_flags |= ENCODING_FLAG_CKSUM;
> +#endif
> +
> + /* Send XBRLE compressed page */
> + save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBRLE);
> + qemu_put_buffer(f, (uint8_t *) &hdr, sizeof(hdr));
> + qemu_put_buffer(f, xbrle_buf, encoded_len);
> + PAGE_LOG(current_addr, current_data, "XBRLE page (enc len %d)\n",
> + encoded_len);
> + bench.xbrle_pages++;
> + bytes_sent = encoded_len + sizeof(hdr);
> + bench.xbrle_bytes += bytes_sent;
> +
> +done:
> + qemu_free(xor_buf);
> + qemu_free(xbrle_buf);
> + return bytes_sent;
> +}
> +
> +#ifdef DEBUG_ARCH_INIT_CKSUM
> +static uint32_t page_cksum(uint8_t *buf)
> +{
> + uint32_t res = 0;
> + int i;
> +
> + for (i = 0; i < TARGET_PAGE_SIZE; i++) {
> + res += buf[i];
> + }
> +
> + return res;
> +}
> +
> +static void page_log(ram_addr_t addr, uint8_t *pdata, const
> char *fmt, ...)
> +{
> + va_list arg_ptr;
> + static FILE *fp;
> + static uint32_t page_seq;
> +
> + va_start(arg_ptr, fmt);
> + if (!fp) {
> + fp = fopen("mig.log", "w");
> + }
> + page_seq++;
> + fprintf(fp, "[seq %d addr 0x%lX cksum 0x%X] ", page_seq,
> + (unsigned long) addr, page_cksum(pdata));
> + vfprintf(fp, fmt, arg_ptr);
> + va_end(arg_ptr);
> +}
> +#endif /* DEBUG_ARCH_INIT_CKSUM */
>
> static int is_dup_page(uint8_t *page, uint8_t ch)
> {
> @@ -107,7 +522,7 @@ static int is_dup_page(uint8_t *page, uint8_t ch)
> static RAMBlock *last_block;
> static ram_addr_t last_offset;
>
> -static int ram_save_block(QEMUFile *f)
> +static int ram_save_block(QEMUFile *f, int stage)
> {
> RAMBlock *block = last_block;
> ram_addr_t offset = last_offset;
> @@ -128,28 +543,32 @@ static int ram_save_block(QEMUFile *f)
> current_addr +
> TARGET_PAGE_SIZE,
> MIGRATION_DIRTY_FLAG);
>
> - p = block->host + offset;
> + p = qemu_mallocz(TARGET_PAGE_SIZE);
> + memcpy(p, block->host + offset, TARGET_PAGE_SIZE);
>
> if (is_dup_page(p, *p)) {
> - qemu_put_be64(f, offset | cont |
> RAM_SAVE_FLAG_COMPRESS);
> - if (!cont) {
> - qemu_put_byte(f, strlen(block->idstr));
> - qemu_put_buffer(f, (uint8_t *)block->idstr,
> - strlen(block->idstr));
> - }
> + save_block_hdr(f, block, offset, cont,
> RAM_SAVE_FLAG_COMPRESS);
> qemu_put_byte(f, *p);
> bytes_sent = 1;
> - } else {
> - qemu_put_be64(f, offset | cont | RAM_SAVE_FLAG_PAGE);
> - if (!cont) {
> - qemu_put_byte(f, strlen(block->idstr));
> - qemu_put_buffer(f, (uint8_t *)block->idstr,
> - strlen(block->idstr));
> + bench.dup_pages++;
> + PAGE_LOG(current_addr, p, "DUP page\n");
> + } else if (stage == 2 && arch_mig_state.use_xbrle) {
> + bytes_sent = save_xbrle_page(f, p,
> current_addr, block,
> + offset, cont);
> + if (bytes_sent) {
> + cache_insert(current_addr, p);
> }
> + }
> + if (!bytes_sent) {
> + save_block_hdr(f, block, offset, cont,
> RAM_SAVE_FLAG_PAGE);
> qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
> bytes_sent = TARGET_PAGE_SIZE;
> + bench.normal_pages++;
> + PAGE_LOG(current_addr, p, "NORMAL page\n");
> + if (arch_mig_state.use_xbrle) {
> + cache_insert(current_addr, p);
> + }
> }
> -
> break;
> }
>
> @@ -212,6 +631,55 @@ uint64_t ram_bytes_total(void)
> return total;
> }
>
> +#ifdef DEBUG_ARCH_INIT
> +static void dump_percentage(const char *label, unsigned long
> absolute,
> + unsigned long total)
> +{
> + printf("%s: %ld (%0.2f%%)\n", label, absolute,
> + (total ? (100.0 * absolute / total) : 0));
> +}
> +
> +static void dump_migration_statistics(void)
> +{
> + unsigned long normal_bytes = bench.normal_pages *
> TARGET_PAGE_SIZE;
> + unsigned long total_pages = bench.normal_pages +
> bench.xbrle_pages
> + + bench.dup_pages;
> + unsigned long total_bytes = normal_bytes + bench.xbrle_bytes
> + + bench.dup_pages;
> +
> + printf("\n");
> +
> printf("=====================================================\n");
> + printf("Save VM Memory Statistics (SUCCESS or FAILURE):\n");
> + printf("Iterations: %ld\n", bench.iterations);
> +
> + dump_percentage("Normal pages", bench.normal_pages, total_pages);
> + dump_percentage("Normal bytes", normal_bytes, total_bytes);
> +
> + dump_percentage("Dup pages", bench.dup_pages, total_pages);
> + dump_percentage("Dup bytes", bench.dup_pages, total_bytes);
> +
> + if (arch_mig_state.use_xbrle) {
> + dump_percentage("XBRLE pages", bench.xbrle_pages,
> total_pages);
> + dump_percentage("XBRLE bytes", bench.xbrle_bytes,
> total_bytes);
> + dump_percentage("Aborted XBRLE pages from XBRLE",
> + bench.xbrle_pages_aborted,
> + bench.xbrle_pages + bench.xbrle_pages_aborted);
> + }
> +
> + dump_percentage("Total pages", total_pages, total_pages);
> + dump_percentage("Total bytes", total_bytes, total_bytes);
> +
> + if (arch_mig_state.use_xbrle) {
> + printf("Cache number of inserts: %ld\n", cache_max_item_age);
> + printf("Cache max items: %ld\n", cache_max_items());
> + dump_percentage("Cache number of items", cache_num_items,
> + cache_max_items());
> + }
> +
> +
> printf("=====================================================\n");
> +}
> +#endif /* DEBUG_ARCH_INIT_CKSUM */
> +
> int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
> {
> ram_addr_t addr;
> @@ -235,6 +703,10 @@ int ram_save_live(Monitor *mon, QEMUFile
> *f, int stage, void *opaque)
> last_block = NULL;
> last_offset = 0;
>
> + if (arch_mig_state.use_xbrle) {
> + cache_init(arch_mig_state.xbrle_cache_size);
> + }
> +
> /* Make sure all dirty bits are set */
> QLIST_FOREACH(block, &ram_list.blocks, next) {
> for (addr = block->offset; addr < block->offset
> + block->length;
> @@ -264,8 +736,9 @@ int ram_save_live(Monitor *mon, QEMUFile
> *f, int stage, void *opaque)
> while (!qemu_file_rate_limit(f)) {
> int bytes_sent;
>
> - bytes_sent = ram_save_block(f);
> + bytes_sent = ram_save_block(f, stage);
> bytes_transferred += bytes_sent;
> + bench.iterations++;
> if (bytes_sent == 0) { /* no more blocks */
> break;
> }
> @@ -285,19 +758,83 @@ int ram_save_live(Monitor *mon,
> QEMUFile *f, int stage, void *opaque)
> int bytes_sent;
>
> /* flush all remaining blocks regardless of rate limiting */
> - while ((bytes_sent = ram_save_block(f)) != 0) {
> + while ((bytes_sent = ram_save_block(f, stage))) {
> bytes_transferred += bytes_sent;
> }
> cpu_physical_memory_set_dirty_tracking(0);
> + if (arch_mig_state.use_xbrle) {
> + cache_fini();
> +#ifdef DEBUG_ARCH_INIT
> + dump_migration_statistics();
> +#endif
> + }
> }
>
> qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>
> expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
>
> + DPRINTF("ram_save_live: expected(%ld) <= max(%ld)?\n",
> expected_time,
> + migrate_max_downtime());
> +
> return (stage == 2) && (expected_time <= migrate_max_downtime());
> }
>
> +static int load_xbrle(QEMUFile *f, ram_addr_t addr, void *host)
> +{
> + int ret, rc = -1;
> + uint8_t *prev_page, *xor_buf, *xbrle_buf;
> + XBRLEHeader hdr = {0};
> +
> + /* extract RLE header */
> + qemu_get_buffer(f, (uint8_t *) &hdr, sizeof(hdr));
> + if (!(hdr.xh_flags & ENCODING_FLAG_XBRLE)) {
> + fprintf(stderr, "Failed to load XBRLE page - wrong
> compression!\n");
> + goto done;
> + }
> +
> + if (hdr.xh_len > TARGET_PAGE_SIZE) {
> + fprintf(stderr, "Failed to load XBRLE page - len
> overflow!\n");
> + goto done;
> + }
> +
> + /* load data and decode */
> + xbrle_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
> + qemu_get_buffer(f, xbrle_buf, hdr.xh_len);
> +
> + /* decode RLE */
> + xor_buf = (uint8_t *) qemu_mallocz(TARGET_PAGE_SIZE);
> + ret = rle_decode(xbrle_buf, hdr.xh_len, xor_buf,
> TARGET_PAGE_SIZE);
> + if (ret == -1) {
> + fprintf(stderr, "Failed to load XBRLE page - decode
> error!\n");
> + goto done;
> + }
> +
> + if (ret != TARGET_PAGE_SIZE) {
> + fprintf(stderr, "Failed to load XBRLE page - size %d
> expected %d!\n",
> + ret, TARGET_PAGE_SIZE);
> + goto done;
> + }
> +
> + /* decode XOR delta */
> + prev_page = host;
> + xor_encode(prev_page, prev_page, xor_buf);
> +#ifdef DEBUG_ARCH_INIT_CKSUM
> + if (hdr.xh_flags & ENCODING_FLAG_CKSUM &&
> + hdr.xh_cksum != page_cksum(prev_page)) {
> + fprintf(stderr, "Failed to load XBRLE page - bad
> checksum!\n");
> + goto done;
> + }
> +#endif
> +
> + PAGE_LOG(addr, prev_page, "XBRLE page (enc len %d)\n",
> hdr.xh_len);
> + rc = 0;
> +done:
> + qemu_free(xor_buf);
> + qemu_free(xbrle_buf);
> + return rc;
> +}
> +
> static inline void *host_from_stream_offset(QEMUFile *f,
> ram_addr_t offset,
> int flags)
> @@ -328,16 +865,38 @@ static inline void
> *host_from_stream_offset(QEMUFile *f,
> return NULL;
> }
>
> +static inline void *host_from_stream_offset_versioned(int version_id,
> + QEMUFile *f, ram_addr_t offset, int flags)
> +{
> + void *host;
> + if (version_id == 3) {
> + host = qemu_get_ram_ptr(offset);
> + } else {
> + host = host_from_stream_offset(f, offset, flags);
> + }
> + if (!host) {
> + fprintf(stderr, "Failed to convert RAM address to host"
> + " for offset 0x%lX!\n", offset);
> + abort();
> + }
> + return host;
> +}
> +
> int ram_load(QEMUFile *f, void *opaque, int version_id)
> {
> ram_addr_t addr;
> - int flags;
> + int flags, ret = 0;
> + static uint64_t seq_iter;
> +
> + seq_iter++;
>
> if (version_id < 3 || version_id > 4) {
> - return -EINVAL;
> + ret = -EINVAL;
> + goto done;
> }
>
> do {
> + void *host;
> addr = qemu_get_be64(f);
>
> flags = addr & ~TARGET_PAGE_MASK;
> @@ -346,7 +905,8 @@ int ram_load(QEMUFile *f, void *opaque,
> int version_id)
> if (flags & RAM_SAVE_FLAG_MEM_SIZE) {
> if (version_id == 3) {
> if (addr != ram_bytes_total()) {
> - return -EINVAL;
> + ret = -EINVAL;
> + goto done;
> }
> } else {
> /* Synchronize RAM block list */
> @@ -365,8 +925,10 @@ int ram_load(QEMUFile *f, void *opaque,
> int version_id)
>
> QLIST_FOREACH(block, &ram_list.blocks, next) {
> if (!strncmp(id, block->idstr, sizeof(id))) {
> - if (block->length != length)
> - return -EINVAL;
> + if (block->length != length) {
> + ret = -EINVAL;
> + goto done;
> + }
> break;
> }
> }
> @@ -374,7 +936,8 @@ int ram_load(QEMUFile *f, void *opaque,
> int version_id)
> if (!block) {
> fprintf(stderr, "Unknown ramblock
> \"%s\", cannot "
> "accept migration\n", id);
> - return -EINVAL;
> + ret = -EINVAL;
> + goto done;
> }
>
> total_ram_bytes -= length;
> @@ -383,17 +946,10 @@ int ram_load(QEMUFile *f, void *opaque,
> int version_id)
> }
>
> if (flags & RAM_SAVE_FLAG_COMPRESS) {
> - void *host;
> uint8_t ch;
>
> - if (version_id == 3)
> - host = qemu_get_ram_ptr(addr);
> - else
> - host = host_from_stream_offset(f, addr, flags);
> - if (!host) {
> - return -EINVAL;
> - }
> -
> + host = host_from_stream_offset_versioned(version_id,
> + f, addr, flags);
> ch = qemu_get_byte(f);
> memset(host, ch, TARGET_PAGE_SIZE);
> #ifndef _WIN32
> @@ -402,22 +958,31 @@ int ram_load(QEMUFile *f, void *opaque,
> int version_id)
> qemu_madvise(host, TARGET_PAGE_SIZE,
> QEMU_MADV_DONTNEED);
> }
> #endif
> + PAGE_LOG(addr, host, "DUP page\n");
> } else if (flags & RAM_SAVE_FLAG_PAGE) {
> - void *host;
> -
> - if (version_id == 3)
> - host = qemu_get_ram_ptr(addr);
> - else
> - host = host_from_stream_offset(f, addr, flags);
> -
> + host = host_from_stream_offset_versioned(version_id,
> + f, addr, flags);
> qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
> + PAGE_LOG(addr, host, "NORMAL page\n");
> + } else if (flags & RAM_SAVE_FLAG_XBRLE) {
> + host = host_from_stream_offset_versioned(version_id,
> + f, addr, flags);
> + if (load_xbrle(f, addr, host) < 0) {
> + ret = -EINVAL;
> + goto done;
> + }
> }
> +
> if (qemu_file_has_error(f)) {
> - return -EIO;
> + ret = -EIO;
> + goto done;
> }
> } while (!(flags & RAM_SAVE_FLAG_EOS));
>
> - return 0;
> +done:
> + DPRINTF("Completed load of VM with exit code %d seq
> iteration %ld\n",
> + ret, seq_iter);
> + return ret;
> }
>
> void qemu_service_io(void)
> diff --git a/block-migration.c b/block-migration.c
> index 3e66f49..004fc12 100644
> --- a/block-migration.c
> +++ b/block-migration.c
> @@ -689,7 +689,8 @@ static int block_load(QEMUFile *f, void
> *opaque, int version_id)
> return 0;
> }
>
> -static void block_set_params(int blk_enable, int
> shared_base, void *opaque)
> +static void block_set_params(int blk_enable, int shared_base,
> + int use_xbrle, int64_t xbrle_cache_size, void *opaque)
> {
> block_mig_state.blk_enable = blk_enable;
> block_mig_state.shared_base = shared_base;
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index e5585ba..e49d5be 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -717,24 +717,27 @@ ETEXI
>
> {
> .name = "migrate",
> - .args_type = "detach:-d,blk:-b,inc:-i,uri:s",
> - .params = "[-d] [-b] [-i] uri",
> - .help = "migrate to URI (using -d to not wait
> for completion)"
> - "\n\t\t\t -b for migration without
> shared storage with"
> - " full copy of disk\n\t\t\t -i for
> migration without "
> - "shared storage with incremental copy of disk "
> - "(base image shared between src and
> destination)",
> + .args_type = "detach:-d,blk:-b,inc:-i,xbrle:-x,uri:s",
> + .params = "[-d] [-b] [-i] [-x] uri",
> + .help = "migrate to URI"
> + "\n\t -d to not wait for completion"
> + "\n\t -b for migration without shared
> storage with"
> + " full copy of disk"
> + "\n\t -i for migration without"
> + " shared storage with incremental copy of disk"
> + " (base image shared between source
> and destination)"
> + "\n\t -x to use XBRLE page delta compression",
> .user_print = monitor_user_noop,
> .mhandler.cmd_new = do_migrate,
> },
>
> -
> STEXI
> address@hidden migrate [-d] [-b] [-i] @var{uri}
> address@hidden migrate [-d] [-b] [-i] [-x] @var{uri}
> @findex migrate
> Migrate to @var{uri} (using -d to not wait for completion).
> -b for migration with full copy of disk
> -i for migration with incremental copy of disk (base
> image is shared)
> + -x to use XBRLE page delta compression
> ETEXI
>
> {
> @@ -753,10 +756,23 @@ Cancel the current VM migration.
> ETEXI
>
> {
> + .name = "migrate_set_cachesize",
> + .args_type = "value:s",
> + .params = "value",
> + .help = "set cache size (in MB) for XBRLE migrations",
> + .mhandler.cmd = do_migrate_set_cachesize,
> + },
> +
> +STEXI
> address@hidden migrate_set_cachesize @var{value}
> +Set cache size (in MB) for xbrle migrations.
> +ETEXI
> +
> + {
> .name = "migrate_set_speed",
> .args_type = "value:o",
> .params = "value",
> - .help = "set maximum speed (in bytes) for migrations. "
> + .help = "set maximum XBRLE cache size (in
> bytes) for migrations. "
> "Defaults to MB if no size suffix is specified, ie.
> B/K/M/G/T",
> .user_print = monitor_user_noop,
> .mhandler.cmd_new = do_migrate_set_speed,
> diff --git a/hw/hw.h b/hw/hw.h
> index 9d2cfc2..de9e5a6 100644
> --- a/hw/hw.h
> +++ b/hw/hw.h
> @@ -239,7 +239,8 @@ static inline void
> qemu_get_sbe64s(QEMUFile *f, int64_t *pv)
> int64_t qemu_ftell(QEMUFile *f);
> int64_t qemu_fseek(QEMUFile *f, int64_t pos, int whence);
>
> -typedef void SaveSetParamsHandler(int blk_enable, int
> shared, void * opaque);
> +typedef void SaveSetParamsHandler(int blk_enable, int shared,
> + int use_xbrle, int64_t xbrle_cache_size, void * opaque);
> typedef void SaveStateHandler(QEMUFile *f, void *opaque);
> typedef int SaveLiveStateHandler(Monitor *mon, QEMUFile *f,
> int stage,
> void *opaque);
> diff --git a/migration-exec.c b/migration-exec.c
> index 14718dd..fe8254a 100644
> --- a/migration-exec.c
> +++ b/migration-exec.c
> @@ -67,7 +67,9 @@ MigrationState
> *exec_start_outgoing_migration(Monitor *mon,
> int64_t bandwidth_limit,
> int detach,
> int blk,
> - int inc)
> + int inc,
> + int use_xbrle,
> + int64_t xbrle_cache_size)
> {
> FdMigrationState *s;
> FILE *f;
> @@ -99,6 +101,8 @@ MigrationState
> *exec_start_outgoing_migration(Monitor *mon,
>
> s->mig_state.blk = blk;
> s->mig_state.shared = inc;
> + s->mig_state.use_xbrle = use_xbrle;
> + s->mig_state.xbrle_cache_size = xbrle_cache_size;
>
> s->state = MIG_STATE_ACTIVE;
> s->mon = NULL;
> diff --git a/migration-fd.c b/migration-fd.c
> index 6d14505..4a1ddbd 100644
> --- a/migration-fd.c
> +++ b/migration-fd.c
> @@ -56,7 +56,9 @@ MigrationState
> *fd_start_outgoing_migration(Monitor *mon,
> int64_t bandwidth_limit,
> int detach,
> int blk,
> - int inc)
> + int inc,
> + int use_xbrle,
> + int64_t xbrle_cache_size)
> {
> FdMigrationState *s;
>
> @@ -82,6 +84,8 @@ MigrationState
> *fd_start_outgoing_migration(Monitor *mon,
>
> s->mig_state.blk = blk;
> s->mig_state.shared = inc;
> + s->mig_state.use_xbrle = use_xbrle;
> + s->mig_state.xbrle_cache_size = xbrle_cache_size;
>
> s->state = MIG_STATE_ACTIVE;
> s->mon = NULL;
> diff --git a/migration-tcp.c b/migration-tcp.c
> index b55f419..4ca5bf6 100644
> --- a/migration-tcp.c
> +++ b/migration-tcp.c
> @@ -81,7 +81,9 @@ MigrationState
> *tcp_start_outgoing_migration(Monitor *mon,
> int64_t bandwidth_limit,
> int detach,
> int blk,
> - int inc)
> + int inc,
> + int use_xbrle,
> + int64_t xbrle_cache_size)
> {
> struct sockaddr_in addr;
> FdMigrationState *s;
> @@ -101,6 +103,8 @@ MigrationState
> *tcp_start_outgoing_migration(Monitor *mon,
>
> s->mig_state.blk = blk;
> s->mig_state.shared = inc;
> + s->mig_state.use_xbrle = use_xbrle;
> + s->mig_state.xbrle_cache_size = xbrle_cache_size;
>
> s->state = MIG_STATE_ACTIVE;
> s->mon = NULL;
> diff --git a/migration-unix.c b/migration-unix.c
> index 57232c0..0813902 100644
> --- a/migration-unix.c
> +++ b/migration-unix.c
> @@ -80,7 +80,9 @@ MigrationState
> *unix_start_outgoing_migration(Monitor *mon,
> int64_t bandwidth_limit,
> int detach,
> int blk,
> - int inc)
> + int inc,
> + int use_xbrle,
> + int64_t xbrle_cache_size)
> {
> FdMigrationState *s;
> struct sockaddr_un addr;
> @@ -100,6 +102,8 @@ MigrationState
> *unix_start_outgoing_migration(Monitor *mon,
>
> s->mig_state.blk = blk;
> s->mig_state.shared = inc;
> + s->mig_state.use_xbrle = use_xbrle;
> + s->mig_state.xbrle_cache_size = xbrle_cache_size;
>
> s->state = MIG_STATE_ACTIVE;
> s->mon = NULL;
> diff --git a/migration.c b/migration.c
> index 9ee8b17..b5b530b 100644
> --- a/migration.c
> +++ b/migration.c
> @@ -34,6 +34,9 @@
> /* Migration speed throttling */
> static uint32_t max_throttle = (32 << 20);
>
> +/* Migration XBRLE cache size */
> +static int64_t migrate_cache_size = 0x8000000; /* 256 MB size */
> +
> static MigrationState *current_migration;
>
> int qemu_start_incoming_migration(const char *uri)
> @@ -80,6 +83,7 @@ int do_migrate(Monitor *mon, const QDict
> *qdict, QObject **ret_data)
> int detach = qdict_get_try_bool(qdict, "detach", 0);
> int blk = qdict_get_try_bool(qdict, "blk", 0);
> int inc = qdict_get_try_bool(qdict, "inc", 0);
> + int use_xbrle = qdict_get_try_bool(qdict, "xbrle", 0);
> const char *uri = qdict_get_str(qdict, "uri");
>
> if (current_migration &&
> @@ -90,17 +94,21 @@ int do_migrate(Monitor *mon, const QDict
> *qdict, QObject **ret_data)
>
> if (strstart(uri, "tcp:", &p)) {
> s = tcp_start_outgoing_migration(mon, p,
> max_throttle, detach,
> - blk, inc);
> + blk, inc, use_xbrle,
> + migrate_cache_size);
> #if !defined(WIN32)
> } else if (strstart(uri, "exec:", &p)) {
> s = exec_start_outgoing_migration(mon, p,
> max_throttle, detach,
> - blk, inc);
> + blk, inc, use_xbrle,
> + migrate_cache_size);
> } else if (strstart(uri, "unix:", &p)) {
> s = unix_start_outgoing_migration(mon, p,
> max_throttle, detach,
> - blk, inc);
> + blk, inc, use_xbrle,
> + migrate_cache_size);
> } else if (strstart(uri, "fd:", &p)) {
> s = fd_start_outgoing_migration(mon, p, max_throttle, detach,
> - blk, inc);
> + blk, inc, use_xbrle,
> + migrate_cache_size);
> #endif
> } else {
> monitor_printf(mon, "unknown migration protocol: %s\n", uri);
> @@ -341,7 +349,8 @@ void migrate_fd_connect(FdMigrationState *s)
>
> DPRINTF("beginning savevm\n");
> ret = qemu_savevm_state_begin(s->mon, s->file, s->mig_state.blk,
> - s->mig_state.shared);
> + s->mig_state.shared,
> s->mig_state.use_xbrle,
> + s->mig_state.xbrle_cache_size);
> if (ret < 0) {
> DPRINTF("failed, %d\n", ret);
> migrate_fd_error(s);
> @@ -448,3 +457,17 @@ int migrate_fd_close(void *opaque)
> qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
> return s->close(s);
> }
> +
> +void do_migrate_set_cachesize(Monitor *mon, const QDict *qdict)
> +{
> + ssize_t bytes;
> + char *ptr;
> + const char *value = qdict_get_str(qdict, "value");
> +
> + bytes = strtod(value, &ptr);
> + if (bytes > 0) {
> + migrate_cache_size = bytes;
> + monitor_printf(mon, "Cache size set to %ld bytes\n", bytes);
> + }
> +}
> +
> diff --git a/migration.h b/migration.h
> index d13ed4f..62be2f8 100644
> --- a/migration.h
> +++ b/migration.h
> @@ -32,6 +32,8 @@ struct MigrationState
> void (*release)(MigrationState *s);
> int blk;
> int shared;
> + int use_xbrle;
> + int64_t xbrle_cache_size;
> };
>
> typedef struct FdMigrationState FdMigrationState;
> @@ -76,7 +78,9 @@ MigrationState
> *exec_start_outgoing_migration(Monitor *mon,
> int64_t bandwidth_limit,
> int detach,
> int blk,
> - int inc);
> + int inc,
> + int use_xbrle,
> + int64_t xbrle_cache_size);
>
> int tcp_start_incoming_migration(const char *host_port);
>
> @@ -85,7 +89,9 @@ MigrationState
> *tcp_start_outgoing_migration(Monitor *mon,
> int64_t bandwidth_limit,
> int detach,
> int blk,
> - int inc);
> + int inc,
> + int use_xbrle,
> + int64_t xbrle_cache_size);
>
> int unix_start_incoming_migration(const char *path);
>
> @@ -94,7 +100,9 @@ MigrationState
> *unix_start_outgoing_migration(Monitor *mon,
> int64_t bandwidth_limit,
> int detach,
> int blk,
> - int inc);
> + int inc,
> + int use_xbrle,
> + int64_t xbrle_cache_size);
>
> int fd_start_incoming_migration(const char *path);
>
> @@ -103,7 +111,9 @@ MigrationState
> *fd_start_outgoing_migration(Monitor *mon,
> int64_t bandwidth_limit,
> int detach,
> int blk,
> - int inc);
> + int inc,
> + int use_xbrle,
> + int64_t xbrle_cache_size);
>
> void migrate_fd_monitor_suspend(FdMigrationState *s, Monitor *mon);
>
> @@ -134,4 +144,9 @@ static inline FdMigrationState
> *migrate_to_fms(MigrationState *mig_state)
> return container_of(mig_state, FdMigrationState, mig_state);
> }
>
> +void do_migrate_set_cachesize(Monitor *mon, const QDict *qdict);
> +
> +void arch_set_params(int blk_enable, int shared_base,
> + int use_xbrle, int64_t xbrle_cache_size, void *opaque);
> +
> #endif
> diff --git a/qmp-commands.hx b/qmp-commands.hx
> index 793cf1c..8fbe64b 100644
> --- a/qmp-commands.hx
> +++ b/qmp-commands.hx
> @@ -431,13 +431,16 @@ EQMP
>
> {
> .name = "migrate",
> - .args_type = "detach:-d,blk:-b,inc:-i,uri:s",
> - .params = "[-d] [-b] [-i] uri",
> - .help = "migrate to URI (using -d to not wait
> for completion)"
> - "\n\t\t\t -b for migration without
> shared storage with"
> - " full copy of disk\n\t\t\t -i for
> migration without "
> - "shared storage with incremental copy of disk "
> - "(base image shared between src and
> destination)",
> + .args_type = "detach:-d,blk:-b,inc:-i,xbrle:-x,uri:s",
> + .params = "[-d] [-b] [-i] [-x] uri",
> + .help = "migrate to URI"
> + "\n\t -d to not wait for completion"
> + "\n\t -b for migration without shared
> storage with"
> + " full copy of disk"
> + "\n\t -i for migration without"
> + " shared storage with incremental copy of disk"
> + " (base image shared between source
> and destination)"
> + "\n\t -x to use XBRLE page delta compression",
> .user_print = monitor_user_noop,
> .mhandler.cmd_new = do_migrate,
> },
> @@ -453,6 +456,7 @@ Arguments:
> - "blk": block migration, full disk copy (json-bool, optional)
> - "inc": incremental disk copy (json-bool, optional)
> - "uri": Destination URI (json-string)
> +- "xbrle": to use XBRLE page delta compression
>
> Example:
>
> @@ -494,6 +498,31 @@ Example:
> EQMP
>
> {
> + .name = "migrate_set_cachesize",
> + .args_type = "value:s",
> + .params = "value",
> + .help = "set cache size (in MB) for xbrle migrations",
> + .mhandler.cmd = do_migrate_set_cachesize,
> + },
> +
> +SQMP
> +migrate_set_cachesize
> +---------------------
> +
> +Set cache size to be used by XBRLE migration
> +
> +Arguments:
> +
> +- "value": cache size in bytes (json-number)
> +
> +Example:
> +
> +-> { "execute": "migrate_set_cachesize", "arguments": {
> "value": 500M } }
> +<- { "return": {} }
> +
> +EQMP
> +
> + {
> .name = "migrate_set_speed",
> .args_type = "value:f",
> .params = "value",
> diff --git a/savevm.c b/savevm.c
> index 4e49765..93b512b 100644
> --- a/savevm.c
> +++ b/savevm.c
> @@ -1141,7 +1141,8 @@ int register_savevm(DeviceState *dev,
> void *opaque)
> {
> return register_savevm_live(dev, idstr, instance_id, version_id,
> - NULL, NULL, save_state,
> load_state, opaque);
> + arch_set_params, NULL, save_state,
> + load_state, opaque);
> }
>
> void unregister_savevm(DeviceState *dev, const char *idstr,
> void *opaque)
> @@ -1428,15 +1429,17 @@ static int vmstate_save(QEMUFile *f,
> SaveStateEntry *se)
> #define QEMU_VM_SUBSECTION 0x05
>
> int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int
> blk_enable,
> - int shared)
> + int shared, int use_xbrle,
> + int64_t xbrle_cache_size)
> {
> SaveStateEntry *se;
>
> QTAILQ_FOREACH(se, &savevm_handlers, entry) {
> if(se->set_params == NULL) {
> continue;
> - }
> - se->set_params(blk_enable, shared, se->opaque);
> + }
> + se->set_params(blk_enable, shared, use_xbrle,
> xbrle_cache_size,
> + se->opaque);
> }
>
> qemu_put_be32(f, QEMU_VM_FILE_MAGIC);
> @@ -1577,7 +1580,7 @@ static int qemu_savevm_state(Monitor
> *mon, QEMUFile *f)
>
> bdrv_flush_all();
>
> - ret = qemu_savevm_state_begin(mon, f, 0, 0);
> + ret = qemu_savevm_state_begin(mon, f, 0, 0, 0, 0);
> if (ret < 0)
> goto out;
>
> diff --git a/sysemu.h b/sysemu.h
> index b81a70e..15a0664 100644
> --- a/sysemu.h
> +++ b/sysemu.h
> @@ -74,7 +74,8 @@ void qemu_announce_self(void);
> void main_loop_wait(int nonblocking);
>
> int qemu_savevm_state_begin(Monitor *mon, QEMUFile *f, int
> blk_enable,
> - int shared);
> + int shared, int use_xbrle,
> + int64_t xbrle_cache_size);
> int qemu_savevm_state_iterate(Monitor *mon, QEMUFile *f);
> int qemu_savevm_state_complete(Monitor *mon, QEMUFile *f);
> void qemu_savevm_state_cancel(Monitor *mon, QEMUFile *f);
>
>
>
>
- Re: [Qemu-devel] [PATCH] XBRLE page delta compression for live migration of large memory apps,
Shribman, Aidan <=