qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 1/2] Make cow_co_is_allocated and cow_update_bit


From: Paolo Bonzini
Subject: Re: [Qemu-devel] [PATCH 1/2] Make cow_co_is_allocated and cow_update_bitmap more efficient
Date: Tue, 20 Aug 2013 22:48:40 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130805 Thunderbird/17.0.8

Il 20/08/2013 20:34, Charlie Shepherd ha scritto:
> cow_co_is_allocated and cow_update_bitmap set bits by reading the relevant
> word, setting the specific bit in it and writing it back. These functions set
> a number of contiguous bits however, so this is an extremely inefficient way
> of doing this. This patch converts them to read the whole bitmap they need in
> one go, update it and then write it out, which should be much more more
> efficient.
> 
> This patch has a startling effect on the speed reads and writes, as measured
> by a simple benchmark. This performance increase could likely still
> be improved quite easily.
> 
> Before:
> $ qemu-io -c 'write 0 1M' test.cow
> Average over 3 runs: 34Kb/s
> 
> $ qemu-io -c 'read 0 1M' test.cow
> Average over 3 runs:28Mb/s
> 
> $ qemu-io -c 'read 0 100M' test.cow
> Average over 3 runs:28Mb/s
> 
> After:
> $ qemu-io -c 'write 0 1M' test.cow
> Average over 3 runs: 10 Mb/s
> 
> $ qemu-io -c 'read 0 1M' test.cow
> Average over 3 runs: 351 Mb/s
> 
> $ qemu-io -c 'read 0 100M' test.cow
> Average over 3 runs: 426 Mb/s
> 
> Which is a ~300x increase in write speed and a ~10x increase in read speed.
> 
> Signed-off-by: Charlie Shepherd <address@hidden>
> ---
>  block/cow.c | 119 
> ++++++++++++++++++++++++++++++++++--------------------------
>  1 file changed, 68 insertions(+), 51 deletions(-)
> 
> diff --git a/block/cow.c b/block/cow.c
> index 1cc2e89..cd411ec 100644
> --- a/block/cow.c
> +++ b/block/cow.c
> @@ -102,42 +102,10 @@ static int cow_open(BlockDriverState *bs, QDict 
> *options, int flags)
>      return ret;
>  }
>  
> -/*
> - * XXX(hch): right now these functions are extremely inefficient.
> - * We should just read the whole bitmap we'll need in one go instead.
> - */
> -static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum)
> -{
> -    uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8;
> -    uint8_t bitmap;
> -    int ret;
> -
> -    ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
> -    if (ret < 0) {
> -       return ret;
> -    }
> -
> -    bitmap |= (1 << (bitnum % 8));
> -
> -    ret = bdrv_pwrite_sync(bs->file, offset, &bitmap, sizeof(bitmap));
> -    if (ret < 0) {
> -       return ret;
> -    }
> -    return 0;
> -}
> -
> -static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum)
> +/* Cannot use bitmap.c on big-endian machines.  */
> +static int cow_test_bit(int64_t bitnum, const uint8_t *bitmap)
>  {
> -    uint64_t offset = sizeof(struct cow_header_v2) + bitnum / 8;
> -    uint8_t bitmap;
> -    int ret;
> -
> -    ret = bdrv_pread(bs->file, offset, &bitmap, sizeof(bitmap));
> -    if (ret < 0) {
> -       return ret;
> -    }
> -
> -    return !!(bitmap & (1 << (bitnum % 8)));
> +    return (bitmap[bitnum / 8] & (1 << (bitnum & 7))) != 0;
>  }
>  
>  /* Return true if first block has been changed (ie. current version is
> @@ -146,40 +114,82 @@ static inline int is_bit_set(BlockDriverState *bs, 
> int64_t bitnum)
>  static int coroutine_fn cow_co_is_allocated(BlockDriverState *bs,
>          int64_t sector_num, int nb_sectors, int *num_same)
>  {
> -    int changed;
> +    int ret, changed;
> +    uint64_t offset = sizeof(struct cow_header_v2) + sector_num / 8;
> +
> +    int init_bits = (sector_num % 8) ? (8 - (sector_num % 8)) : 0;
> +    int remaining = sector_num - init_bits;
> +    int full_bytes = remaining / 8;
> +    int trail = remaining % 8;
> +
> +    int len = !!init_bits + full_bytes + !!trail;
> +    uint8_t bitmap[len];

This is a basically unbounded allocation on the stack.  You should split
this in smaller ranges using the "num_same" argument, which is what I
did in my patch.

>      if (nb_sectors == 0) {
> -     *num_same = nb_sectors;
> -     return 0;
> +        *num_same = nb_sectors;
> +        return 0;
>      }
>  
> -    changed = is_bit_set(bs, sector_num);
> -    if (changed < 0) {
> -        return 0; /* XXX: how to return I/O errors? */
> +    ret = bdrv_pread(bs->file, offset, bitmap, len);
> +    if (ret < 0) {
> +        return ret;
>      }
>  
> +    changed = cow_test_bit(sector_num, bitmap);
>      for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) {
> -     if (is_bit_set(bs, sector_num + *num_same) != changed)
> -         break;
> +        if (cow_test_bit(sector_num + *num_same, bitmap) != changed) {
> +            break;
> +        }
>      }
>  
>      return changed;
>  }
>  
> +/* Set the bits from sector_num to sector_num + nb_sectors in the bitmap of
> + * bs->file. */
>  static int cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
>          int nb_sectors)
>  {
> -    int error = 0;
> -    int i;
> +    int ret;
> +    uint64_t offset = sizeof(struct cow_header_v2) + sector_num / 8;
>  
> -    for (i = 0; i < nb_sectors; i++) {
> -        error = cow_set_bit(bs, sector_num + i);
> -        if (error) {
> -            break;
> -        }
> +    int init_bits = (sector_num % 8) ? (8 - (sector_num % 8)) : 0;
> +    int remaining = sector_num - init_bits;
> +    int full_bytes = remaining / 8;
> +    int trail = remaining % 8;
> +
> +    int len = !!init_bits + full_bytes + !!trail;
> +    uint8_t buf[len];

Here your patch has indeed an improvement over mine.  However, this is
another basically unbounded allocation on the stack.  You should split
bitmap updates in smaller parts (doing 512-byte aligned writes is fine,
each covers 2MB in the file and writes this big are very rare!).

> +    ret = bdrv_pread(bs->file, offset, buf, len);
> +    if (ret < 0) { 
> +        return ret;
> +    }
> +
> +    /* Do sector_num -> nearest byte boundary */
> +    if (init_bits) {
> +        /* This sets the highest init_bits bits in the byte */
> +        uint8_t bits = ((1 << init_bits) - 1) << (8 - init_bits);
> +        buf[0] |= bits;
> +    }
> +
> +    if (full_bytes) {
> +        memset(&buf[!!init_bits], ~0, full_bytes);
> +    }
> +
> +    /* Set the trailing bits in the final byte */
> +    if (trail) {
> +        /* This sets the lowest trail bits in the byte */
> +        uint8_t bits = (1 << trail) - 1;
> +        buf[len - 1] |= bits;
> +    }

... and you should also check if there is a change in the bits, and skip
the flush if there is no change.  Flushing a multi-megabyte write is
very expensive.  It basically makes format=cow as slow as
format=raw,cache=writethrough.

> +    ret = bdrv_pwrite(bs->file, offset, buf, len);
> +    if (ret < 0) {
> +        return ret;
>      }
>  
> -    return error;
> +    return 0;
>  }
>  
>  static int coroutine_fn cow_read(BlockDriverState *bs, int64_t sector_num,
> @@ -237,6 +247,13 @@ static int cow_write(BlockDriverState *bs, int64_t 
> sector_num,
>          return ret;
>      }
>  
> +    /* We need to flush the data before writing the metadata so that there is
> +     * no chance of metadata referring to data that doesn't exist. */
> +    ret = bdrv_flush(bs->file);
> +    if (ret < 0) {
> +        return ret;
> +    }

See above about this flush.

Paolo

>      return cow_update_bitmap(bs, sector_num, nb_sectors);
>  }
>  
> 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]