qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v2] support add-cow format


From: Kevin Wolf
Subject: Re: [Qemu-devel] [PATCH v2] support add-cow format
Date: Fri, 23 Sep 2011 14:20:39 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:6.0.2) Gecko/20110906 Thunderbird/6.0.2

Am 21.09.2011 10:56, schrieb Dong Xu Wang:
> ubuntu.img is a disk image which has been installed OS.
> (1) Create a raw image with the same size of ubuntu.img
> qemu-img create -f raw test.raw 8G
> (2) Create a add-cow image which will store dirty bitmap
> qemu-img create -f add-cow test.add-cow -o 
> backing_file=ubuntu.img,image_file=test.raw
> (3) Run qemu with add-cow image
> qemu -drive if=virtio,file=test.add-cow -m 1024
> 
> Signed-off-by: Dong Xu Wang <address@hidden>
> ---
>  Makefile.objs   |    1 +
>  block.c         |    4 +-
>  block.h         |    1 +
>  block/add-cow.c |  403 
> +++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  block_int.h     |    1 +
>  qemu-img.c      |    2 +-
>  6 files changed, 409 insertions(+), 3 deletions(-)
>  create mode 100644 block/add-cow.c
> 
> diff --git a/Makefile.objs b/Makefile.objs
> index 1c65087..2521f0d 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -31,6 +31,7 @@ block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
>  
>  block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o 
> vpc.o vvfat.o
>  block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
> qcow2-cache.o
> +block-nested-y += add-cow.o
>  block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
>  block-nested-y += qed-check.o
>  block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
> diff --git a/block.c b/block.c
> index e3fe97f..66f2d39 100644
> --- a/block.c
> +++ b/block.c
> @@ -106,7 +106,7 @@ int is_windows_drive(const char *filename)
>  #endif
>  
>  /* check if the path starts with "<protocol>:" */
> -static int path_has_protocol(const char *path)
> +int path_has_protocol(const char *path)
>  {
>  #ifdef _WIN32
>      if (is_windows_drive(path) ||
> @@ -875,7 +875,7 @@ int bdrv_commit(BlockDriverState *bs)
>  
>      if (!drv)
>          return -ENOMEDIUM;
> -    
> +
>      if (!bs->backing_hd) {
>          return -ENOTSUP;
>      }

This hunk is unrelated.

> diff --git a/block.h b/block.h
> index 16bfa0a..8b09f12 100644
> --- a/block.h
> +++ b/block.h
> @@ -256,6 +256,7 @@ char *bdrv_snapshot_dump(char *buf, int buf_size, 
> QEMUSnapshotInfo *sn);
>  
>  char *get_human_readable_size(char *buf, int buf_size, int64_t size);
>  int path_is_absolute(const char *path);
> +int path_has_protocol(const char *path);
>  void path_combine(char *dest, int dest_size,
>                    const char *base_path,
>                    const char *filename);
> diff --git a/block/add-cow.c b/block/add-cow.c
> new file mode 100644
> index 0000000..d27be6e
> --- /dev/null
> +++ b/block/add-cow.c
> @@ -0,0 +1,403 @@
> +#include "qemu-common.h"
> +#include "block_int.h"
> +#include "module.h"
> +
> +#define ADD_COW_MAGIC  (((uint64_t)'A' << 56) | ((uint64_t)'D' << 48) | \
> +                        ((uint64_t)'D' << 40) | ((uint64_t)'_' << 32) | \
> +                        ((uint64_t)'C' << 24) | ((uint64_t)'O' << 16) | \
> +                        ((uint64_t)'W' << 8) | 0xFF)
> +#define ADD_COW_VERSION 1
> +
> +typedef struct AddCowHeader {
> +    uint64_t magic;
> +    uint32_t version;
> +    char backing_file[1024];
> +    char image_file[1024];
> +    uint64_t size;
> +    uint32_t sectorsize;
> +} QEMU_PACKED AddCowHeader;

sectorsize is unused. What is it meant to be used for?

I think it would also make sense to write a file format specification
that can be included. docs/specs/qcow2.txt is an example of what this
could look like.

> +
> +typedef struct BDRVAddCowState {
> +    CoMutex lock;
> +    CoMutex bitmap_lock;
> +    char image_file[1024];
> +    BlockDriverState *image_hd;
> +    uint8_t *bitmap;
> +    uint64_t bitmap_size;
> +} BDRVAddCowState;
> +
> +static int add_cow_probe(const uint8_t *buf, int buf_size, const char 
> *filename)
> +{
> +    const AddCowHeader *header = (const void *)buf;
> +
> +    if (be64_to_cpu(header->magic) == ADD_COW_MAGIC &&
> +        be32_to_cpu(header->version) == ADD_COW_VERSION) {
> +        return 100;
> +    } else {
> +        return 0;
> +    }
> +}
> +
> +static int add_cow_open(BlockDriverState *bs, int flags)
> +{
> +    AddCowHeader header;
> +    int64_t size;
> +    char image_filename[1024];
> +    int image_flags;
> +    BlockDriver *image_drv = NULL;
> +    int ret;
> +    BDRVAddCowState *state = (BDRVAddCowState *)(bs->opaque);
> +
> +    if (bdrv_pread(bs->file, 0, &header, sizeof(header)) != sizeof(header)) {
> +        goto fail;
> +    }
> +
> +    if (be64_to_cpu(header.magic) != ADD_COW_MAGIC ||
> +        be32_to_cpu(header.version) != ADD_COW_VERSION) {
> +        goto fail;
> +    }
> +
> +    size = be64_to_cpu(header.size);
> +    bs->total_sectors = size / 512;

Please use BDRV_SECTOR_SIZE instead of 512.

> +
> +    pstrcpy(bs->backing_file, sizeof(bs->backing_file),
> +            header.backing_file);
> +    pstrcpy(state->image_file, sizeof(state->image_file),
> +            header.image_file);

This assumes that state->image_file and header.image_file are of the
same size. We could use QEMU_BUILD_BUG_ON to verify this.

> +
> +    state->bitmap_size = ((bs->total_sectors + 7) >> 3);
> +    if (!state->bitmap) {
> +        state->bitmap = g_malloc0(state->bitmap_size);
> +    }

Is there any case where state->bitmap != NULL, so that the allocation
would not be executed?

> +    if (bdrv_pread(bs->file, sizeof(header), state->bitmap,  \

Unnecessary backslash.

> +        state->bitmap_size) != state->bitmap_size) {
> +        goto fail;
> +    }
> +   /* if there is a image_file, must be together with backing_file */
> +    if (state->image_file[0] != '\0') {

What happens if there is no image file? Shouldn't it be an error?

> +        state->image_hd = bdrv_new("");
> +        if (path_has_protocol(state->image_file)) {
> +            pstrcpy(image_filename, sizeof(image_filename),
> +                    state->image_file);
> +        } else {
> +            path_combine(image_filename, sizeof(image_filename),
> +                         bs->filename, state->image_file);
> +        }

This part makes the image file path in the add-cow file relative to the
image instead of relative to the working directory. Backing files behave
the same, but they have also shown that this behaviour is confusing users.

We need to discuss if it is a good idea to do it with add-cow.

> +
> +        image_drv = bdrv_find_format("raw");
> +        image_flags =
> +             (flags & (~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING))) | 
> BDRV_O_RDWR;
> +        state->image_hd->keep_read_only = 0;
> +
> +        ret = bdrv_open(state->image_hd, image_filename, image_flags, \
> +                image_drv);
> +        if (ret < 0) {
> +            bdrv_close(bs);
> +            goto fail;
> +        }
> +    }
> +    return 0;
> + fail:
> +    if (state->bitmap) {
> +        g_free(state->bitmap);
> +        state->bitmap = NULL;
> +    }
> +    return -1;

This shouldn't be -1, but a negative errno value. If the reason for
failure was a function like bdrv_pread() that returns an error code,
this error code should be passed on.

> +}
> +
> +static inline void add_cow_set_bit(BlockDriverState *bs, int64_t bitnum)
> +{
> +    uint64_t offset = sizeof(AddCowHeader) + bitnum / 8;
> +    uint8_t bitmap;
> +    BDRVAddCowState *state = (BDRVAddCowState *)(bs->opaque);
> +
> +    qemu_co_mutex_lock(&state->bitmap_lock);
> +    bitmap = (state->bitmap[offset]) |= (1 << (bitnum % 8));
> +    qemu_co_mutex_unlock(&state->bitmap_lock);
> +}

Variable bitmap seems to be unused. For the locking, see below.

> +
> +static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum)
> +{
> +    BDRVAddCowState *state = (BDRVAddCowState *)(bs->opaque);
> +    qemu_co_mutex_lock(&state->bitmap_lock);
> +    uint64_t offset = sizeof(AddCowHeader) + bitnum / 8;
> +    qemu_co_mutex_unlock(&state->bitmap_lock);
> +
> +    return !!(state->bitmap[offset] & (1 << (bitnum % 8)));
> +}

Your locks remain without effect because they will always be free when
you come here (you don't yield anywhere while holding the lock). So
leaving them out is safe.

That said, even when assuming real threads instead of coroutines I can't
see what you're protecting here...

> +
> +static int add_cow_is_allocated(BlockDriverState *bs, int64_t sector_num,
> +        int nb_sectors, int *num_same)
> +{
> +    int changed;
> +
> +    if (nb_sectors == 0) {
> +        *num_same = nb_sectors;
> +        return 0;
> +    }
> +
> +    changed = is_bit_set(bs, sector_num);
> +    if (changed < 0) {
> +        return 0;
> +    }
> +
> +    for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) {
> +        if (is_bit_set(bs, sector_num + *num_same) != changed) {
> +            break;
> +        }
> +    }
> +
> +    return changed;
> +}

You need to check sector_num and nb_sectors so that you don't run beyond
the end of your bitmap.

> +
> +static int add_cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
> +        int nb_sectors)
> +{
> +    int error = 0;
> +    int i, ret;
> +    BDRVAddCowState *state = (BDRVAddCowState *)(bs->opaque);
> +
> +    for (i = 0; i < nb_sectors; i++) {
> +        add_cow_set_bit(bs, sector_num + i);
> +    }
> +    ret = bdrv_pwrite_sync(bs->file, sizeof(AddCowHeader), \
> +            state->bitmap, state->bitmap_size);

It would be nicer to write out only a part of the bitmap instead of the
whole thing. I think it can become quite large.

On that note, why did you choose 512 bytes as the unit in which you
track COW? For example, qcow2 and QED have a default cluster size of 64k
which is much larger.


> +    if (ret < 0) {
> +        return ret;
> +    }
> +    return error;
> +}
> +
> +static void add_cow_close(BlockDriverState *bs)
> +{
> +    BDRVAddCowState *state = (BDRVAddCowState *)(bs->opaque);
> +    if (state->bitmap) {
> +        g_free(state->bitmap);
> +        state->bitmap = NULL;
> +    }
> +}
> +
> +static int add_cow_create(const char *filename, QEMUOptionParameter *options)
> +{
> +    AddCowHeader header;
> +    int64_t image_sectors = 0;
> +    const char *backing_filename = NULL;
> +    const char *image_filename = NULL;
> +    int ret;
> +    BlockDriverState *bs, *image_bs = NULL;
> +
> +    while (options && options->name) {
> +        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
> +            image_sectors = options->value.n / 512;
> +        } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
> +            backing_filename = options->value.s;
> +        } else if (!strcmp(options->name, BLOCK_OPT_IMAGE_FILE)) {
> +            image_filename = options->value.s;
> +        }
> +        options++;
> +    }
> +
> +    if (!backing_filename || !image_filename) {
> +        return -EINVAL;
> +    }
> +    /* Make sure image file exists */
> +    BlockDriver *raw_drv = bdrv_find_format("raw");

If you need to access the raw driver directly, you're doing something
wrong. But it looks unused anyway.

> +    assert(raw_drv != NULL);
> +    ret = bdrv_file_open(&image_bs, image_filename, BDRV_O_RDWR | \
> +            BDRV_O_CACHE_WB);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +    bdrv_close(image_bs);
> +    ret = bdrv_create_file(filename, NULL);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    ret = bdrv_file_open(&bs, filename, BDRV_O_RDWR);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    memset(&header, 0, sizeof(header));
> +    header.magic = cpu_to_be64(ADD_COW_MAGIC);
> +    header.version = cpu_to_be32(ADD_COW_VERSION);
> +    pstrcpy(header.backing_file, \
> +                sizeof(header.backing_file), backing_filename);
> +    pstrcpy(header.image_file, sizeof(header.image_file),
> +                image_filename);
> +
> +    header.sectorsize = cpu_to_be32(512);
> +    header.size = cpu_to_be64(image_sectors * 512);
> +
> +    ret = bdrv_pwrite(bs, 0, &header, sizeof(header));
> +    if (ret < 0) {
> +        return ret;
> +    }
> +    bdrv_close(bs);
> +
> +    BlockDriver *drv = bdrv_find_format("add-cow");
> +    assert(drv != NULL);
> +    ret = bdrv_open(bs, filename, BDRV_O_RDWR | BDRV_O_NO_FLUSH, drv);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    ret = bdrv_truncate(bs, ((image_sectors + 7) >> 3));
> +    if (ret < 0) {
> +        return ret;
> +    }

bs is leaked (also consider error cases with early returns).

> +    return ret;
> +}
> +
> +
> +static int add_cow_co_readv(BlockDriverState *bs, int64_t sector_num,
> +                         int remaining_sectors, QEMUIOVector *qiov)
> +{
> +    BDRVAddCowState *s = bs->opaque;
> +    int cur_nr_sectors;
> +    uint64_t bytes_done = 0;
> +    QEMUIOVector hd_qiov;
> +    int n, ret = 0;
> +
> +    qemu_iovec_init(&hd_qiov, qiov->niov);
> +    qemu_co_mutex_lock(&s->lock);
> +    while (remaining_sectors != 0) {
> +        cur_nr_sectors = remaining_sectors;
> +        if (add_cow_is_allocated(bs, sector_num, cur_nr_sectors, &n)) {
> +            cur_nr_sectors = n;
> +            qemu_iovec_reset(&hd_qiov);
> +            qemu_iovec_copy(&hd_qiov, qiov, bytes_done,
> +                            cur_nr_sectors * 512);
> +            qemu_co_mutex_unlock(&s->lock);
> +            ret = bdrv_co_readv(s->image_hd, sector_num, n, &hd_qiov);
> +            qemu_co_mutex_lock(&s->lock);
> +            if (ret < 0) {
> +                goto fail;
> +            }
> +        } else {
> +            cur_nr_sectors = n;
> +            if (bs->backing_hd) {
> +                qemu_iovec_reset(&hd_qiov);
> +                qemu_iovec_copy(&hd_qiov, qiov, bytes_done,
> +                            cur_nr_sectors * 512);
> +                qemu_co_mutex_unlock(&s->lock);
> +                ret = bdrv_co_readv(bs->backing_hd, sector_num,
> +                                    n, &hd_qiov);
> +                qemu_co_mutex_lock(&s->lock);
> +                if (ret < 0) {
> +                    goto fail;
> +                }
> +            } else {
> +                qemu_iovec_memset(&hd_qiov, 0, 512 * cur_nr_sectors);

hd_qiov hasn't been updated here, it still is either empty or points to
old data.

> +            }
> +        }
> +        remaining_sectors -= cur_nr_sectors;
> +        sector_num += cur_nr_sectors;
> +        bytes_done += cur_nr_sectors * 512;
> +    }
> +fail:
> +    qemu_co_mutex_unlock(&s->lock);
> +    qemu_iovec_destroy(&hd_qiov);
> +    return ret;
> +}
> +
> +static int add_cow_co_writev(BlockDriverState *bs, int64_t sector_num,
> +                          int remaining_sectors, QEMUIOVector *qiov)
> +{
> +    BDRVAddCowState *s = bs->opaque;
> +    int ret;
> +    int cur_nr_sectors = remaining_sectors;
> +    QEMUIOVector hd_qiov;
> +    uint64_t bytes_done = 0;
> +    qemu_iovec_init(&hd_qiov, qiov->niov);
> +    qemu_co_mutex_lock(&s->lock);
> +
> +    qemu_iovec_reset(&hd_qiov);
> +    qemu_iovec_copy(&hd_qiov, qiov, bytes_done, cur_nr_sectors * 512);

bytes_done is always 0.

> +    qemu_co_mutex_unlock(&s->lock);
> +    ret = bdrv_co_writev(s->image_hd,
> +                     sector_num,
> +                     cur_nr_sectors, &hd_qiov);
> +    qemu_co_mutex_lock(&s->lock);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    qemu_co_mutex_unlock(&s->lock);
> +    add_cow_update_bitmap(bs, sector_num, remaining_sectors);
> +    ret = 0;
> +fail:
> +    qemu_iovec_destroy(&hd_qiov);
> +    return ret;
> +}

Can you explain the locking in add_cow_co_readv/writev? I can't see how
it protects anything.

> +
> +static int bdrv_add_cow_truncate(BlockDriverState *bs, int64_t offset)
> +{
> +    int ret;
> +    ret = bdrv_truncate(bs->file, offset + sizeof(AddCowHeader));

bs->file only contains metadata, so why is this correct? Shoudln't you
update the header with the new size and truncate s->image_hd?

> +    if (ret < 0) {
> +        return ret;
> +    }
> +
> +    return 0;
> +}
> +
> +static QEMUOptionParameter add_cow_create_options[] = {
> +    {
> +        .name = BLOCK_OPT_SIZE,
> +        .type = OPT_SIZE,
> +        .help = "Virtual disk size"
> +    },
> +    {
> +        .name = BLOCK_OPT_BACKING_FILE,
> +        .type = OPT_STRING,
> +        .help = "File name of a base image"
> +    },
> +    {
> +        .name = BLOCK_OPT_IMAGE_FILE,
> +        .type = OPT_STRING,
> +        .help = "File name of a image file"
> +    },
> +    { NULL }
> +};
> +
> +static int add_cow_flush(BlockDriverState *bs)
> +{
> +    BDRVAddCowState *state = bs->opaque;
> +    return bdrv_flush(bs->file) | bdrv_flush(state->image_hd);
> +}
> +
> +static BlockDriverAIOCB *add_cow_aio_flush(BlockDriverState *bs,
> +        BlockDriverCompletionFunc *cb, void *opaque)
> +{
> +    BDRVAddCowState *state = bs->opaque;
> +    bdrv_aio_flush(state->image_hd, cb, opaque);
> +    return bdrv_aio_flush(bs->file, cb, opaque);

This will invoke the callback twice, and opaque will probably be invalid
after the first callback. Not good.

This code is also lacking error handling for the ifrst bdrv_aio_flush.

Kevin



reply via email to

[Prev in Thread] Current Thread [Next in Thread]