qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 17/19] block/parallels: delay writing to BAT til


From: Roman Kagan
Subject: Re: [Qemu-devel] [PATCH 17/19] block/parallels: delay writing to BAT till bdrv_co_flush_to_os
Date: Wed, 14 Jan 2015 16:03:40 +0300
User-agent: Mutt/1.5.23 (2014-03-12)

On Tue, Dec 30, 2014 at 01:07:10PM +0300, Denis V. Lunev wrote:
> The idea is that we do not need to immediately sync BAT to the image as
> from the guest point of view there is a possibility that IO is lost
> even in the physical controller until flush command was finished.
> bdrv_co_flush_to_os is exactly the right place for this purpose.
> 
> Technically the patch aligns writes into BAT to MAX(bdrv_align, 4096),
> which elliminates read-modify-write transactions on BAT update and
> cache ready-to-write content in a special buffer in BDRVParallelsState.
> 
> This buffer possibly contains ParallelsHeader if the first page of the
> image should be modified. The header occupies first 64 bytes of the image
> and the BAT starts immediately after it.
> 
> It is also possible that BAT end is not aligned to the cluster size.
> ParallelsHeader->data_off is not specified for this case. We should write
> only part of the cache in that case.
> 
> This patch speed ups
>   qemu-img create -f parallels -o cluster_size=64k ./1.hds 64G
>   qemu-io -f parallels -c "write -P 0x11 0 1024k" 1.hds
> writing from 50-60 Mb/sec to 80-90 Mb/sec on rotational media and
> from 160 Mb/sec to 190 Mb/sec on SSD disk.
> 
> Signed-off-by: Denis V. Lunev <address@hidden>
> CC: Kevin Wolf <address@hidden>
> CC: Stefan Hajnoczi <address@hidden>
> ---
>  block/parallels.c | 99 
> ++++++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 94 insertions(+), 5 deletions(-)
> 
> diff --git a/block/parallels.c b/block/parallels.c
> index 46cf031..18b9267 100644
> --- a/block/parallels.c
> +++ b/block/parallels.c
> @@ -62,6 +62,11 @@ typedef struct BDRVParallelsState {
>      uint32_t *bat;
>      unsigned int bat_size;
>  
> +    uint32_t *bat_cache;
> +    unsigned bat_cache_size;
> +    int bat_cache_off;
> +    int data_off;
> +
>      unsigned int tracks;
>  
>      unsigned int off_multiplier;
> @@ -148,6 +153,17 @@ static int parallels_open(BlockDriverState *bs, QDict 
> *options, int flags,
>          le32_to_cpus(&s->bat[i]);
>  
>      qemu_co_mutex_init(&s->lock);
> +
> +    s->bat_cache_off = -1;
> +    if (bs->open_flags & BDRV_O_RDWR) {
> +        s->bat_cache_size = MAX(bdrv_opt_mem_align(bs->file), 4096);
> +        s->bat_cache = qemu_blockalign(bs->file, s->bat_cache_size);
> +    }
> +    s->data_off = le32_to_cpu(s->ph.data_off) * BDRV_SECTOR_SIZE;
> +    if (s->data_off == 0) {
> +        s->data_off = ROUND_UP(bat_offset(s->bat_size), BDRV_SECTOR_SIZE);
> +    }
> +
>      return 0;
>  
>  fail_format:
> @@ -171,10 +187,71 @@ static int64_t seek_to_sector(BDRVParallelsState *s, 
> int64_t sector_num)
>      return (uint64_t)s->bat[index] * s->off_multiplier + offset;
>  }
>  
> +static int write_bat_cache(BlockDriverState *bs)
> +{
> +    BDRVParallelsState *s = bs->opaque;
> +    int size, off;
> +
> +    if (s->bat_cache_off == -1) {
> +        /* no cached data */
> +        return 0;
> +    }
> +
> +    size = s->bat_cache_size;
> +    if (size + s->bat_cache_off > s->data_off) {
> +        /* avoid writing to the first data block */
> +        size = s->data_off - s->bat_cache_off;
> +    }
> +
> +    off = s->bat_cache_off;
> +    s->bat_cache_off = -1;
> +    return bdrv_pwrite(bs->file, off, s->bat_cache, size);
> +}
> +
> +static int cache_bat(BlockDriverState *bs, uint32_t idx, uint32_t 
> new_data_off)
> +{
> +    int ret, i, off, cache_off;
> +    int64_t first_idx, last_idx;
> +    BDRVParallelsState *s = bs->opaque;
> +    uint32_t *cache = s->bat_cache;
> +
> +    off = bat_offset(idx);
> +    cache_off = (off / s->bat_cache_size) * s->bat_cache_size;
> +
> +    if (s->bat_cache_off != -1 && s->bat_cache_off != cache_off) {
> +        ret = write_bat_cache(bs);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +    }
> +
> +    first_idx = idx - (off - cache_off) / sizeof(uint32_t);
> +    last_idx = first_idx + s->bat_cache_size / sizeof(uint32_t);
> +    if (first_idx < 0) {
> +        memcpy(s->bat_cache, &s->ph, sizeof(s->ph));
> +        first_idx = 0;
> +        cache = s->bat_cache + sizeof(s->ph) / sizeof(uint32_t);
> +    }
> +
> +    if (last_idx > s->bat_size) {
> +        memset(cache + s->bat_size - first_idx, 0,
> +               sizeof(uint32_t) * (last_idx - s->bat_size));
> +    }
> +
> +    for (i = 0; i < last_idx - first_idx; i++) {
> +        cache[i] = cpu_to_le32(s->bat[first_idx + i]);
> +    }

You're re-populating the bat_cache on every bat update.  Why?

Shouldn't this be done only with empty cache, i.e. under
if (s->bat_cache_off == -1)?

> +    cache[idx - first_idx] = cpu_to_le32(new_data_off);
> +    s->bat[idx] = new_data_off;
> +
> +    s->bat_cache_off = cache_off;
> +    return 0;
> +}
> +
>  static int64_t allocate_sector(BlockDriverState *bs, int64_t sector_num)
>  {
>      BDRVParallelsState *s = bs->opaque;
> -    uint32_t idx, offset, tmp;
> +    uint32_t idx, offset;
>      int64_t pos;
>      int ret;
>  
> @@ -190,17 +267,27 @@ static int64_t allocate_sector(BlockDriverState *bs, 
> int64_t sector_num)
>  
>      pos = bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS;
>      bdrv_truncate(bs->file, (pos + s->tracks) << BDRV_SECTOR_BITS);
> -    s->bat[idx] = pos / s->off_multiplier;
> -
> -    tmp = cpu_to_le32(s->bat[idx]);
>  
> -    ret = bdrv_pwrite(bs->file, bat_offset(idx), &tmp, sizeof(tmp));
> +    ret = cache_bat(bs, idx, pos / s->off_multiplier);
>      if (ret < 0) {
>          return ret;
>      }
>      return (uint64_t)s->bat[idx] * s->off_multiplier + offset;
>  }
>  
> +static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
> +{
> +    BDRVParallelsState *s = bs->opaque;
> +    int ret;
> +
> +    qemu_co_mutex_lock(&s->lock);
> +    ret = write_bat_cache(bs);
> +    qemu_co_mutex_unlock(&s->lock);
> +
> +    return ret;
> +}
> +
> +
>  static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num,
>          int nb_sectors)
>  {
> @@ -387,6 +474,7 @@ exit:
>  static void parallels_close(BlockDriverState *bs)
>  {
>      BDRVParallelsState *s = bs->opaque;
> +    qemu_vfree(s->bat_cache);

Don't you need to flush the bat cache here first?

>      g_free(s->bat);
>  }
>  
> @@ -416,6 +504,7 @@ static BlockDriver bdrv_parallels = {
>      .bdrv_open               = parallels_open,
>      .bdrv_close              = parallels_close,
>      .bdrv_co_get_block_status = parallels_co_get_block_status,
> +    .bdrv_co_flush_to_os      = parallels_co_flush_to_os,
>      .bdrv_co_readv  = parallels_co_readv,
>      .bdrv_co_writev = parallels_co_writev,

Roman.



reply via email to

[Prev in Thread] Current Thread [Next in Thread]