qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 9/9] block-migration: add named dirty bitmaps mi


From: Paolo Bonzini
Subject: Re: [Qemu-devel] [PATCH 9/9] block-migration: add named dirty bitmaps migration
Date: Mon, 12 Jan 2015 17:48:10 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Thunderbird/31.3.0


On 11/12/2014 15:17, Vladimir Sementsov-Ogievskiy wrote:
> Just migrate parts of dirty bitmaps, corresponding to the block being
> migrated. Also, skip block migration if it is disabled (blk parameter
> of migrate command is false).

So I have misread the patch. blk here:

+static void blk_store_bitmaps(BlkMigBlock *blk)
+{
+    int i;
+    for (i = 0; i < blk->nr_bitmaps; ++i) {
+        bdrv_dbm_store_data(blk->dirty_bitmaps[i].bitmap,
+                            blk->dirty_bitmaps[i].buf,
+                            blk->sector, blk->nr_sectors);
+    }

refers to a BlkMigBlock, not a BlockBackend.

Still, I need an explanation of the operation of this patch.

During the bulk phase you migrate all of the dirty bitmap.  This is okay.

Then, during the iterative phase you mgirate parts of the dirty bitmap
corresponding to sectors that are dirty for block migration.  This means
parts of the dirty bitmap that have become 1.

How do you track which parts of the disk have been acted upon (e.g.
mirrored in the case of the drive-mirror command), so that they have
become 0?

This, strictly speaking, is conservative so it is not incorrect, but it
basically throws away all the work done by the block job during the
block migration---which might take several minutes.  Is a non-live
solution really that bad?  If it is, and it is not feasible to just
migrate the bitmaps non-live, can we do better to migrate bits of the
dirty bitmap that have become 0?

Paolo

> Skipping shared sectors: bitmaps are migrated independently of this,
> just send blk without block data.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <address@hidden>
> ---
>  block-migration.c | 173 
> +++++++++++++++++++++++++++++++++++++++++++++++-------
>  savevm.c          |   1 +
>  2 files changed, 154 insertions(+), 20 deletions(-)
> 
> diff --git a/block-migration.c b/block-migration.c
> index 908a66d..95d54a1 100644
> --- a/block-migration.c
> +++ b/block-migration.c
> @@ -32,6 +32,8 @@
>  #define BLK_MIG_FLAG_EOS                0x02
>  #define BLK_MIG_FLAG_PROGRESS           0x04
>  #define BLK_MIG_FLAG_ZERO_BLOCK         0x08
> +#define BLK_MIG_FLAG_HAS_BLOCK          0x10
> +#define BLK_MIG_FLAG_HAS_BITMAPS        0x20
>  
>  #define MAX_IS_ALLOCATED_SEARCH 65536
>  
> @@ -51,6 +53,8 @@ typedef struct BlkMigDevState {
>      int shared_base;
>      int64_t total_sectors;
>      QSIMPLEQ_ENTRY(BlkMigDevState) entry;
> +    int nr_bitmaps;
> +    BdrvDirtyBitmap **dirty_bitmaps;
>  
>      /* Only used by migration thread.  Does not need a lock.  */
>      int bulk_completed;
> @@ -64,6 +68,11 @@ typedef struct BlkMigDevState {
>      Error *blocker;
>  } BlkMigDevState;
>  
> +typedef struct BlkMigDirtyBitmap {
> +    BdrvDirtyBitmap *bitmap;
> +    uint8_t *buf;
> +} BlkMigDirtyBitmap;
> +
>  typedef struct BlkMigBlock {
>      /* Only used by migration thread.  */
>      uint8_t *buf;
> @@ -74,6 +83,9 @@ typedef struct BlkMigBlock {
>      QEMUIOVector qiov;
>      BlockAIOCB *aiocb;
>  
> +    int nr_bitmaps;
> +    BlkMigDirtyBitmap *dirty_bitmaps;
> +
>      /* Protected by block migration lock.  */
>      int ret;
>      QSIMPLEQ_ENTRY(BlkMigBlock) entry;
> @@ -83,6 +95,7 @@ typedef struct BlkMigState {
>      /* Written during setup phase.  Can be read without a lock.  */
>      int blk_enable;
>      int shared_base;
> +    int dbm_enable;
>      QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
>      int64_t total_sector_sum;
>      bool zero_blocks;
> @@ -116,27 +129,63 @@ static void blk_mig_unlock(void)
>  /* Only allocating and initializing structure fields, not copying any data. 
> */
>  
>  static BlkMigBlock *blk_create(BlkMigDevState *bmds, int64_t sector,
> -                                int nr_sectors)
> +                               int nr_sectors, bool only_bitmaps)
>  {
> +    int i;
>      BlkMigBlock *blk = g_new(BlkMigBlock, 1);
> -    blk->buf = g_malloc(BLOCK_SIZE);
> +    blk->buf = only_bitmaps ? NULL : g_malloc(BLOCK_SIZE);
>      blk->bmds = bmds;
>      blk->sector = sector;
>      blk->nr_sectors = nr_sectors;
> +    blk->dirty_bitmaps = NULL;
> +    blk->nr_bitmaps = 0;
>  
>      blk->iov.iov_base = blk->buf;
>      blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
>      qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
>  
> +    if (block_mig_state.dbm_enable) {
> +        BlkMigDirtyBitmap *mig_bm;
> +
> +        blk->nr_bitmaps = bmds->nr_bitmaps;
> +        mig_bm = blk->dirty_bitmaps = g_new(BlkMigDirtyBitmap,
> +                                            bmds->nr_bitmaps);
> +        for (i = 0; i < bmds->nr_bitmaps; ++i) {
> +            BdrvDirtyBitmap *bm = bmds->dirty_bitmaps[i];
> +            mig_bm->buf = g_malloc(bdrv_dbm_data_size(bm, nr_sectors));
> +            mig_bm->bitmap = bm;
> +            mig_bm++;
> +        }
> +    }
> +
>      return blk;
>  }
>  
>  static void blk_free(BlkMigBlock *blk)
>  {
> +    int i;
>      g_free(blk->buf);
> +
> +    if (blk->dirty_bitmaps) {
> +        for (i = 0; i < blk->nr_bitmaps; ++i) {
> +            g_free(blk->dirty_bitmaps[i].buf);
> +        }
> +        g_free(blk->dirty_bitmaps);
> +    }
> +
>      g_free(blk);
>  }
>  
> +static void blk_store_bitmaps(BlkMigBlock *blk)
> +{
> +    int i;
> +    for (i = 0; i < blk->nr_bitmaps; ++i) {
> +        bdrv_dbm_store_data(blk->dirty_bitmaps[i].bitmap,
> +                            blk->dirty_bitmaps[i].buf,
> +                            blk->sector, blk->nr_sectors);
> +    }
> +}
> +
>  /* Must run outside of the iothread lock during the bulk phase,
>   * or the VM will stall.
>   */
> @@ -146,9 +195,15 @@ static void blk_send(QEMUFile *f, BlkMigBlock * blk)
>      int len;
>      uint64_t flags = BLK_MIG_FLAG_DEVICE_BLOCK;
>  
> -    if (block_mig_state.zero_blocks &&
> +    if (block_mig_state.zero_blocks && blk->buf &&
>          buffer_is_zero(blk->buf, BLOCK_SIZE)) {
>          flags |= BLK_MIG_FLAG_ZERO_BLOCK;
> +    } else if (blk->buf) {
> +        flags |= BLK_MIG_FLAG_HAS_BLOCK;
> +    }
> +
> +    if (block_mig_state.dbm_enable) {
> +        flags |= BLK_MIG_FLAG_HAS_BITMAPS;
>      }
>  
>      /* sector number and flags */
> @@ -160,10 +215,27 @@ static void blk_send(QEMUFile *f, BlkMigBlock * blk)
>      qemu_put_byte(f, len);
>      qemu_put_buffer(f, (uint8_t *)bdrv_get_device_name(blk->bmds->bs), len);
>  
> +    /* dirty bitmaps */
> +    if (flags & BLK_MIG_FLAG_HAS_BITMAPS) {
> +        int i;
> +        qemu_put_be32(f, blk->nr_bitmaps);
> +        for (i = 0; i < blk->nr_bitmaps; ++i) {
> +            BdrvDirtyBitmap *bm = blk->dirty_bitmaps[i].bitmap;
> +            int buf_size = bdrv_dbm_data_size(bm, blk->nr_sectors);
> +            const char *name = bdrv_dirty_bitmap_name(bm);
> +            int len = strlen(name);
> +
> +            qemu_put_byte(f, len);
> +            qemu_put_buffer(f, (const uint8_t *)name, len);
> +            qemu_put_buffer(f, blk->dirty_bitmaps[i].buf, buf_size);
> +        }
> +    }
> +
>      /* if a block is zero we need to flush here since the network
>       * bandwidth is now a lot higher than the storage device bandwidth.
> -     * thus if we queue zero blocks we slow down the migration */
> -    if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
> +     * thus if we queue zero blocks we slow down the migration.
> +     * also, skip writing block when migrate only dirty bitmaps. */
> +    if (!(flags & BLK_MIG_FLAG_HAS_BLOCK)) {
>          qemu_fflush(f);
>          return;
>      }
> @@ -282,13 +354,20 @@ static int mig_save_device_bulk(QEMUFile *f, 
> BlkMigDevState *bmds)
>      BlockDriverState *bs = bmds->bs;
>      BlkMigBlock *blk;
>      int nr_sectors;
> +    bool skip_chunk = false;
>  
>      if (bmds->shared_base) {
>          qemu_mutex_lock_iothread();
> -        while (cur_sector < total_sectors &&
> -               !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
> -                                  &nr_sectors)) {
> -            cur_sector += nr_sectors;
> +        if (block_mig_state.dbm_enable) {
> +            bdrv_is_allocated(bs, cur_sector, BDRV_SECTORS_PER_DIRTY_CHUNK,
> +                              &nr_sectors);
> +            skip_chunk = nr_sectors >= BDRV_SECTORS_PER_DIRTY_CHUNK;
> +        } else {
> +            while (cur_sector < total_sectors &&
> +                   !bdrv_is_allocated(bs, cur_sector, 
> MAX_IS_ALLOCATED_SEARCH,
> +                                      &nr_sectors)) {
> +                cur_sector += nr_sectors;
> +            }
>          }
>          qemu_mutex_unlock_iothread();
>      }
> @@ -309,7 +388,8 @@ static int mig_save_device_bulk(QEMUFile *f, 
> BlkMigDevState *bmds)
>          nr_sectors = total_sectors - cur_sector;
>      }
>  
> -    blk = blk_create(bmds, cur_sector, nr_sectors);
> +    blk = blk_create(bmds, cur_sector, nr_sectors,
> +                     skip_chunk || !block_mig_state.blk_enable);
>  
>      blk_mig_lock();
>      block_mig_state.submitted++;
> @@ -317,8 +397,16 @@ static int mig_save_device_bulk(QEMUFile *f, 
> BlkMigDevState *bmds)
>  
>      bdrv_reset_dirty_bitmap(bs, bmds->dirty_bitmap, cur_sector, nr_sectors);
>  
> -    blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
> -                                nr_sectors, blk_mig_read_cb, blk);
> +    if (block_mig_state.dbm_enable) {
> +        blk_store_bitmaps(blk);
> +    }
> +
> +    if (block_mig_state.blk_enable && !skip_chunk) {
> +        blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
> +                                    nr_sectors, blk_mig_read_cb, blk);
> +    } else if (block_mig_state.dbm_enable) {
> +        blk_mig_read_cb(blk, 0);
> +    }
>  
>      bmds->cur_sector = cur_sector + nr_sectors;
>      return (bmds->cur_sector >= total_sectors);
> @@ -403,6 +491,8 @@ static void init_blk_migration(QEMUFile *f)
>              DPRINTF("Start full migration for %s\n", 
> bdrv_get_device_name(bs));
>          }
>  
> +        bmds->dirty_bitmaps = bdrv_dbm_find_all_named(bs, &bmds->nr_bitmaps);
> +
>          QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
>      }
>  }
> @@ -481,20 +571,32 @@ static int mig_save_device_dirty(QEMUFile *f, 
> BlkMigDevState *bmds,
>              } else {
>                  nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
>              }
> -            blk = blk_create(bmds, sector, nr_sectors);
> +            blk = blk_create(bmds, sector, nr_sectors,
> +                             !block_mig_state.blk_enable);
> +
> +            if (block_mig_state.dbm_enable) {
> +                blk_store_bitmaps(blk);
> +            }
>  
>              if (is_async) {
> -                blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
> -                                            nr_sectors, blk_mig_read_cb, 
> blk);
> +                if (block_mig_state.blk_enable) {
> +                    blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
> +                                                nr_sectors, blk_mig_read_cb,
> +                                                blk);
> +                } else {
> +                    blk_mig_read_cb(blk, 0);
> +                }
>  
>                  blk_mig_lock();
>                  block_mig_state.submitted++;
>                  bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
>                  blk_mig_unlock();
>              } else {
> -                ret = bdrv_read(bmds->bs, sector, blk->buf, nr_sectors);
> -                if (ret < 0) {
> -                    goto error;
> +                if (block_mig_state.blk_enable) {
> +                    ret = bdrv_read(bmds->bs, sector, blk->buf, nr_sectors);
> +                    if (ret < 0) {
> +                        goto error;
> +                    }
>                  }
>                  blk_send(f, blk);
>  
> @@ -817,10 +919,39 @@ static int block_load(QEMUFile *f, void *opaque, int 
> version_id)
>                  nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
>              }
>  
> +            /* load dirty bitmaps */
> +            if (flags & BLK_MIG_FLAG_HAS_BITMAPS) {
> +                int i, nr_bitmaps = qemu_get_be32(f);
> +
> +                for (i = 0; i < nr_bitmaps; ++i) {
> +                    char bitmap_name[256];
> +                    BdrvDirtyBitmap *bitmap;
> +                    int buf_size, len;
> +
> +                    len = qemu_get_byte(f);
> +                    qemu_get_buffer(f, (uint8_t *)bitmap_name, len);
> +                    bitmap_name[len] = '\0';
> +
> +                    bitmap = bdrv_find_dirty_bitmap(bs, bitmap_name);
> +                    if (!bitmap) {
> +                        fprintf(stderr, "Error unknown dirty bitmap\
> +                                %s for block device %s\n",
> +                                bitmap_name, device_name);
> +                        return -EINVAL;
> +                    }
> +
> +                    buf_size = bdrv_dbm_data_size(bitmap, nr_sectors);
> +                    buf = g_malloc(buf_size);
> +                    qemu_get_buffer(f, buf, buf_size);
> +                    bdrv_dbm_restore_data(bitmap, buf, addr, nr_sectors);
> +                    g_free(buf);
> +                }
> +            }
> +
>              if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
>                  ret = bdrv_write_zeroes(bs, addr, nr_sectors,
>                                          BDRV_REQ_MAY_UNMAP);
> -            } else {
> +            } else if (flags & BLK_MIG_FLAG_HAS_BLOCK) {
>                  buf = g_malloc(BLOCK_SIZE);
>                  qemu_get_buffer(f, buf, BLOCK_SIZE);
>                  ret = bdrv_write(bs, addr, buf, nr_sectors);
> @@ -855,6 +986,7 @@ static void block_set_params(const MigrationParams 
> *params, void *opaque)
>  {
>      block_mig_state.blk_enable = params->blk;
>      block_mig_state.shared_base = params->shared;
> +    block_mig_state.dbm_enable = params->dirty;
>  
>      /* shared base means that blk_enable = 1 */
>      block_mig_state.blk_enable |= params->shared;
> @@ -862,7 +994,8 @@ static void block_set_params(const MigrationParams 
> *params, void *opaque)
>  
>  static bool block_is_active(void *opaque)
>  {
> -    return block_mig_state.blk_enable == 1;
> +    return block_mig_state.blk_enable == 1 ||
> +           block_mig_state.dbm_enable == 1;
>  }
>  
>  static SaveVMHandlers savevm_block_handlers = {
> diff --git a/savevm.c b/savevm.c
> index a598d1d..1299faa 100644
> --- a/savevm.c
> +++ b/savevm.c
> @@ -983,6 +983,7 @@ int qemu_loadvm_state(QEMUFile *f)
>          }
>      }
>  
> +    bdrv_dbm_restore_finish();
>      cpu_synchronize_all_post_init();
>  
>      ret = 0;
> 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]