qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v9 06/10] Add XBZRLE to ram_save_block and ram_s


From: Juan Quintela
Subject: Re: [Qemu-devel] [PATCH v9 06/10] Add XBZRLE to ram_save_block and ram_save_live
Date: Wed, 18 Apr 2012 18:54:40 +0200
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/24.0.93 (gnu/linux)

Orit Wasserman <address@hidden> wrote:

> @@ -104,6 +104,7 @@ const uint32_t arch_type = QEMU_ARCH;
>  #define RAM_SAVE_FLAG_PAGE     0x08
>  #define RAM_SAVE_FLAG_EOS      0x10
>  #define RAM_SAVE_FLAG_CONTINUE 0x20
> +#define RAM_SAVE_FLAG_XBZRLE  0x40

missing space for alignment?

>  
>  #ifdef __ALTIVEC__
>  #include <altivec.h>
> @@ -165,6 +166,15 @@ static unsigned long cache_get_cache_pos(ram_addr_t 
> address);
>  static CacheItem *cache_item_get(unsigned long pos, int item);
>  static void cache_resize(int64_t new_size);
>  
> +/* XBZRLE (Xor Based Zero Length Encoding */
> +typedef struct __attribute__((packed)) XBZRLEHeader {
> +    uint8_t xh_flags;
> +    uint16_t xh_len;
> +    uint32_t xh_cksum;
> +} XBZRLEHeader;

We shouldn't use packed here. Explanation later.
And if we are worried about space, it is much better ordering to do it
by size:

typedef struct {
    uint32_t xh_cksum;
    uint16_t xh_len;
    uint8_t xh_flags;
} XBZRLEHeader;


Once here, are we using the xh_cksum at all?


> +static uint8 *prev_cache_page;

Move this also to the bucket struct?

> @@ -359,19 +374,74 @@ static void save_block_hdr(QEMUFile *f, RAMBlock 
> *block, ram_addr_t offset,
>  
>  }
>  
> +#define ENCODING_FLAG_XBZRLE 0x1
> +
> +static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
> +                            ram_addr_t current_addr, RAMBlock *block,
> +                            ram_addr_t offset, int cont)
> +{
> +    int cache_location = -1, slot = -1, encoded_len = 0, bytes_sent = 0;
> +    XBZRLEHeader hdr = {0};
> +    CacheItem *it;
> +    uint8_t *encoded_buf = NULL;
> +
> +    /* get location */
> +    slot = cache_is_cached(current_addr);
> +    if (slot == -1) {
> +        cache_insert(current_addr, current_data, 0);
> +        goto done;
> +    }
> +    cache_location = cache_get_cache_pos(current_addr);
> +
> +    /* abort if page changed too much */
> +    it = cache_item_get(cache_location, slot);

Comment and code don't match?

> +
> +    /* we need to copy the current data before encoding so it won't change
> +       during encoding. cache_insert copies the data.
> +    */
> +    memcpy(it->it_data, prev_cache_page, TARGET_PAGE_SIZE);
> +    cache_insert(current_addr, current_data, 0);
> +
> +    /* XBZRLE encoding (if there is no overflow) */
> +    encoded_buf = (uint8_t *) g_malloc(TARGET_PAGE_SIZE);

Cast not needed.  Can we have a pagge allocated for this in Bucket
struct, XBRLE struct or whatever we want to call it?  Having to do a
malloc for each page thaht we sent looks excesive?

> +    encoded_len = encode_page(prev_cache_page, it->it_data, TARGET_PAGE_SIZE,
> +                              encoded_buf, TARGET_PAGE_SIZE);
> +    if (encoded_len < 0) {
> +        DPRINTF("Unmodifed page - skipping\n");
> +        goto done;
> +    }
> +
> +    hdr.xh_len = encoded_len;
> +    hdr.xh_flags |= ENCODING_FLAG_XBZRLE;
> +
> +    /* Send XBZRLE based compressed page */
> +    save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
> +    qemu_put_buffer(f, (uint8_t *) &hdr, sizeof(hdr));

This is wrong (same for load part), we are breking inter-arch migration.

qemu_put_byte(f, hdr->xh_flags);
qemu_put_be16(f, hdr->xh_len);
qemu_put_be32(f, hdr->xh_cksum);

And the inverse on the load side.

> +    qemu_put_buffer(f, encoded_buf, encoded_len);
> +    bytes_sent = encoded_len + sizeof(hdr);
> +
> +done:
> +    g_free(encoded_buf);
> +    return bytes_sent;
> +}
> +
>  static RAMBlock *last_block;
>  static ram_addr_t last_offset;
>  
> -static int ram_save_block(QEMUFile *f)
> +static int ram_save_block(QEMUFile *f, int stage)
>  {
>      RAMBlock *block = last_block;
>      ram_addr_t offset = last_offset;
>      int bytes_sent = 0;
>      MemoryRegion *mr;
> +    ram_addr_t current_addr;
> +    int use_xbzrle =  migrate_use_xbzrle();
>  
>      if (!block)
>          block = QLIST_FIRST(&ram_list.blocks);
>  
> +    current_addr = block->offset + offset;
> +
>      do {
>          mr = block->mr;
>          if (memory_region_get_dirty(mr, offset, TARGET_PAGE_SIZE,
> @@ -388,7 +458,14 @@ static int ram_save_block(QEMUFile *f)
>                  save_block_hdr(f, block, offset, cont, 
> RAM_SAVE_FLAG_COMPRESS);
>                  qemu_put_byte(f, *p);
>                  bytes_sent = 1;
> -            } else {
> +            } else if (stage == 2 && use_xbzrle) {
here
> +                bytes_sent = save_xbzrle_page(f, p, current_addr, block,
> +                    offset, cont);
> +            } else if (use_xbzrle && stage == 1) {
and here, can we used the check in the same order?

> +                cache_insert(current_addr, p, 0);
> +            }

Or even better, change the code to something like:

else if (use_xbzrle) {
     if (stage == 1) {
        cache_insert(current_addr, p, 0);
     } else if (cache == 2) {
        bytes_sent = save_xbzrle_page(...);
     }

And I don't understand why this function care at all about the stages.
Normal code sent a page on stage == 1, here it is only saved on the
cache and sent as a normal page (without compression).  And on stage==3,
it is also always sent without compression.  I guess there is a method
on this, but a comment will help?

> +
> +            if (!bytes_sent) {
>                  save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
>                  qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
>                  bytes_sent = TARGET_PAGE_SIZE;
> @@ -492,6 +569,9 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
>  
>      if (stage < 0) {
>          memory_global_dirty_log_stop();
> +        if (migrate_use_xbzrle()) {
> +            cache_fini();
> +        }
>          return 0;
>      }
>  
> @@ -504,6 +584,10 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
>          last_offset = 0;
>          sort_ram_list();
>  
> +        if (migrate_use_xbzrle()) {
> +            cache_init(migrate_xbzrle_cache_size());
> +        }
> +
>          /* Make sure all dirty bits are set */
>          QLIST_FOREACH(block, &ram_list.blocks, next) {
>              for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
> @@ -531,9 +615,11 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)

>      while ((ret = qemu_file_rate_limit(f)) == 0) {
>          int bytes_sent;
>  
> -        bytes_sent = ram_save_block(f);
> -        bytes_transferred += bytes_sent;
> -        if (bytes_sent == 0) { /* no more blocks */
> +        bytes_sent = ram_save_block(f, stage);
> +        /* bytes_sent -1 represent unchanged page */
Interesting represtantion.  Yes, I know that zero is already used (would
be the obvious here).  Perhaps switching it with the old zero value, and
-1 pass to mean no more blocks?

> +        if (bytes_sent > 0) {
> +            bytes_transferred += bytes_sent;
> +        } else if (bytes_sent == 0) { /* no more blocks */
>              break;
>          }
>      }
> @@ -556,19 +642,67 @@ int ram_save_live(QEMUFile *f, int stage, void *opaque)
>          int bytes_sent;
>  
>          /* flush all remaining blocks regardless of rate limiting */
> -        while ((bytes_sent = ram_save_block(f)) != 0) {
> +        while ((bytes_sent = ram_save_block(f, stage)) != 0) {
>              bytes_transferred += bytes_sent;
>          }
>          memory_global_dirty_log_stop();
> +        if (migrate_use_xbzrle()) {
> +            cache_fini();
> +        }
>      }

Not that this problem is introduced on this series, but could we get
this into a function migration_end/fini() that it just called in the two
places where a migration can end.

>      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>  
>      expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
>  
> +    DPRINTF("ram_save_live: expected(%ld) <= max(%ld)?\n", expected_time,
> +        migrate_max_downtime());
> +
>      return (stage == 2) && (expected_time <= migrate_max_downtime());
>  }
>  
> +static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
> +{
> +    int ret, rc = -1;
> +    uint8_t *xbzrle_buf = NULL;
> +    XBZRLEHeader hdr = {0};
> +
> +    /* extract RLE header */
> +    qemu_get_buffer(f, (uint8_t *) &hdr, sizeof(hdr));

This needs to be done by hand as told.

> +    if (!(hdr.xh_flags & ENCODING_FLAG_XBZRLE)) {
> +        fprintf(stderr, "Failed to load XBZRLE page - wrong compression!\n");
> +        goto done;
> +    }
> +
> +    if (hdr.xh_len > TARGET_PAGE_SIZE) {
> +        fprintf(stderr, "Failed to load XBZRLE page - len overflow!\n");
> +        goto done;
> +    }
> +
> +    /* load data and decode */
> +    xbzrle_buf = (uint8_t *) g_malloc0(TARGET_PAGE_SIZE);

cast not needed.  Again, this can be done only _once_ for the whole
migration.

> +    qemu_get_buffer(f, xbzrle_buf, hdr.xh_len);
> +
> +    /* decode RLE */
> +    ret = decode_page(xbzrle_buf, hdr.xh_len, host, TARGET_PAGE_SIZE);
> +    if (ret == -1) {
> +        fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
> +        goto done;
> +    }
> +
> +    if (ret > TARGET_PAGE_SIZE) {
> +        fprintf(stderr, "Failed to load XBZRLE page - size %d exceeds %d!\n",
> +                ret, TARGET_PAGE_SIZE);
> +        goto done;
> +    }
> +
> +    rc = 0;
> +
> +done:
> +    g_free(xbzrle_buf);
> +    return rc;
> +}
> +
>  static inline void *host_from_stream_offset(QEMUFile *f,
>                                              ram_addr_t offset,
>                                              int flags)
> @@ -614,14 +748,18 @@ static inline void 
> *host_from_stream_offset_versioned(int version_id,
>  int ram_load(QEMUFile *f, void *opaque, int version_id)
>  {
>      ram_addr_t addr;
> -    int flags;
> +    int flags, ret = 0;
>      int error;
> +    static uint64_t seq_iter;
> +
> +    seq_iter++;
>  
>      if (version_id < 4 || version_id > 4) {
>          return -EINVAL;
>      }
>  
>      do {
> +        void *host;
>          addr = qemu_get_be64(f);
>  
>          flags = addr & ~TARGET_PAGE_MASK;
> @@ -645,8 +783,10 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>  
>                      QLIST_FOREACH(block, &ram_list.blocks, next) {
>                          if (!strncmp(id, block->idstr, sizeof(id))) {
> -                            if (block->length != length)
> -                                return -EINVAL;
> +                            if (block->length != length) {
> +                                ret =  -EINVAL;
> +                                goto done;
> +                            }
>                              break;
>                          }
>                      }
> @@ -654,7 +794,8 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>                      if (!block) {
>                          fprintf(stderr, "Unknown ramblock \"%s\", cannot "
>                                  "accept migration\n", id);
> -                        return -EINVAL;
> +                        ret = -EINVAL;
> +                        goto done;
>                      }
>  
>                      total_ram_bytes -= length;
> @@ -693,14 +834,25 @@ int ram_load(QEMUFile *f, void *opaque, int version_id)
>                  return -EINVAL;
>              }
>              qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
> +        } else if (flags & RAM_SAVE_FLAG_XBZRLE) {
> +            host = host_from_stream_offset_versioned(version_id,
> +                            f, addr, flags);
> +            if (load_xbzrle(f, addr, host) < 0) {
> +                ret = -EINVAL;
> +                goto done;
> +            }

Why we don't needto check that host is NULL in this case?

[ I assume that encode/decode functions work ok, head hurts]

Can we change the name to xbzrle_page_encode()/decode()?  name sounds
really too general. As they are not related to pages at all, we could
also call it: xbrzle_buffer_encode()/decode?

Thanks, Juan.



reply via email to

[Prev in Thread] Current Thread [Next in Thread]