qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v6] block:add-cow file format


From: Stefan Hajnoczi
Subject: Re: [Qemu-devel] [PATCH v6] block:add-cow file format
Date: Fri, 30 Dec 2011 14:09:04 +0000
User-agent: Mutt/1.5.21 (2010-09-15)

On Thu, Dec 29, 2011 at 05:36:59PM +0800, Dong Xu Wang wrote:

Some comments on everything but the I/O path, which I haven't reviewed
yet:

> diff --git a/block/add-cow.c b/block/add-cow.c
> new file mode 100644
> index 0000000..95af5b7
> --- /dev/null
> +++ b/block/add-cow.c
> @@ -0,0 +1,429 @@

Missing GPL or LGPL license header.

> +#include "qemu-common.h"
> +#include "block_int.h"
> +#include "module.h"
> +
> +#define ADD_COW_MAGIC       (((uint64_t)'A' << 56) | ((uint64_t)'D' << 48) | 
> \
> +                            ((uint64_t)'D' << 40) | ((uint64_t)'_' << 32) | \
> +                            ((uint64_t)'C' << 24) | ((uint64_t)'O' << 16) | \
> +                            ((uint64_t)'W' << 8) | 0xFF)
> +#define ADD_COW_VERSION     1
> +#define ADD_COW_FILE_LEN    1024
> +
> +typedef struct AddCowHeader {
> +    uint64_t        magic;
> +    uint32_t        version;
> +    char            backing_file[ADD_COW_FILE_LEN];
> +    char            image_file[ADD_COW_FILE_LEN];
> +    uint64_t        size;
> +    char            reserved[492];
> +} QEMU_PACKED AddCowHeader;
> +
> +typedef struct BDRVAddCowState {
> +    char                image_file[ADD_COW_FILE_LEN];

Why is this field needed?

> +    BlockDriverState    *image_hd;
> +    uint8_t             *bitmap;
> +    uint64_t            bitmap_size;
> +    CoMutex             lock;
> +} BDRVAddCowState;
> +
> +static int add_cow_probe(const uint8_t *buf, int buf_size, const char 
> *filename)
> +{
> +    const AddCowHeader *header = (const void *)buf;
> +
> +    if (be64_to_cpu(header->magic) == ADD_COW_MAGIC &&
> +        be32_to_cpu(header->version) == ADD_COW_VERSION) {
> +        return 100;
> +    } else {
> +        return 0;
> +    }
> +}
> +
> +static int add_cow_open(BlockDriverState *bs, int flags)
> +{
> +    AddCowHeader        header;
> +    int64_t             size;
> +    char                image_filename[ADD_COW_FILE_LEN];
> +    BlockDriver         *image_drv = NULL;
> +    int                 ret;
> +    BDRVAddCowState     *s = bs->opaque;
> +    BlockDriverState    *backing_bs = NULL;
> +
> +    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
> +    if (ret != sizeof(header)) {
> +        goto fail;
> +    }
> +
> +    if (be64_to_cpu(header.magic) != ADD_COW_MAGIC) {
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +    if (be32_to_cpu(header.version) != ADD_COW_VERSION) {
> +        char version[64];
> +        snprintf(version, sizeof(version), "ADD-COW version %d", 
> header.version);
> +        qerror_report(QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
> +            bs->device_name, "add-cow", version);
> +        ret = -ENOTSUP;
> +        goto fail;
> +    }
> +
> +    size = be64_to_cpu(header.size);
> +    bs->total_sectors = size / BDRV_SECTOR_SIZE;
> +
> +    QEMU_BUILD_BUG_ON(sizeof(bs->backing_file) != 
> sizeof(header.backing_file));
> +    QEMU_BUILD_BUG_ON(sizeof(s->image_file) != sizeof(header.image_file));
> +    pstrcpy(bs->backing_file, sizeof(bs->backing_file),
> +            header.backing_file);
> +    pstrcpy(s->image_file, sizeof(s->image_file),
> +            header.image_file);

This assumes that header.backing_file and header.image_file is
NUL-terminated.  If the file happened to have the magic number and
version but does not include '\0' bytes in the header.backing_file then
we may crash here when trying to read beyond the end of the header
struct.

Image format code should be robust.  Please use strncpy(3) carefully
instead of pstrcpy().

Also please update the file format specification to either make these
fields NUL-terminated or NUL-terminated unless the length is 1024
characters (in which case there is no NUL but the string still ends).

> +
> +    s->bitmap_size = ((bs->total_sectors + 7) >> 3);
> +    s->bitmap = qemu_blockalign(bs, s->bitmap_size);
> +
> +    ret = bdrv_pread(bs->file, sizeof(header), s->bitmap,
> +            s->bitmap_size);
> +    if (ret != s->bitmap_size) {
> +        goto fail;
> +    }
> +
> +    if (s->image_file[0] == '\0') {
> +        ret = -ENOENT;
> +        goto fail;
> +    }
> +
> +    ret = bdrv_file_open(&backing_bs, bs->backing_file, 0);
> +    if (ret < 0) {
> +        return ret;
> +    }
> +    bdrv_delete(backing_bs);

What does this do?  (It leaks s->bitmap when it fails.)

> +
> +    s->image_hd = bdrv_new("");
> +
> +    if (path_has_protocol(s->image_file)) {
> +        pstrcpy(image_filename, sizeof(image_filename),
> +                s->image_file);
> +    } else {
> +        path_combine(image_filename, sizeof(image_filename),
> +                     bs->filename, s->image_file);
> +    }
> +
> +    image_drv = bdrv_find_format("raw");
> +    ret = bdrv_open(s->image_hd, image_filename, flags, image_drv);
> +    if (ret < 0) {
> +        bdrv_delete(s->image_hd);
> +        s->image_hd = NULL;
> +        goto fail;
> +    }

TODO

> +
> +    qemu_co_mutex_init(&s->lock);
> +    return 0;
> + fail:
> +    g_free(s->bitmap);
> +    return ret;
> +}
> +
> +static inline void add_cow_set_bit(BlockDriverState *bs, int64_t bitnum)
> +{
> +    uint64_t offset = bitnum >> 3;
> +    BDRVAddCowState *s = bs->opaque;
> +    s->bitmap[offset] |= (1 << (bitnum % 8));
> +}
> +
> +static inline int is_bit_set(BlockDriverState *bs, int64_t bitnum)
> +{
> +    BDRVAddCowState *s = bs->opaque;
> +    uint64_t offset = bitnum >> 3;
> +    return !!(s->bitmap[offset] & (1 << (bitnum % 8)));
> +}

Please use bool instead of int.  Then you can also drop the !!.

> +
> +static coroutine_fn int add_cow_is_allocated(BlockDriverState *bs,
> +        int64_t sector_num, int nb_sectors, int *num_same)
> +{
> +    int changed;
> +    BDRVAddCowState *s = bs->opaque;
> +
> +    /* Beyond the end of bitmap, return error or read from backing_file? */
> +    if (((sector_num + nb_sectors + 7) >> 3) > s->bitmap_size) {
> +        return 0;
> +    }

If the bitmap size is tied to bs->total_sectors then you do not need
this check since bdrv_co_is_allocated() already handles this case (and
sets *num_same to 0).

> +
> +    if (nb_sectors == 0) {
> +        *num_same = nb_sectors;
> +        return 0;
> +    }
> +
> +    changed = is_bit_set(bs, sector_num);
> +    for (*num_same = 1; *num_same < nb_sectors; (*num_same)++) {
> +        if (is_bit_set(bs, sector_num + *num_same) != changed) {
> +            break;
> +        }
> +    }
> +
> +    return changed;
> +}
> +
> +static int add_cow_update_bitmap(BlockDriverState *bs, int64_t sector_num,
> +        int nb_sectors)
> +{
> +    int i, ret = 0;
> +    bool changed = false;
> +    BDRVAddCowState *s = bs->opaque;
> +    uint64_t start_pos = (sector_num  >> 3) & (~511);
> +    uint64_t end_pos = (((sector_num + nb_sectors - 1)  >> 3) + 511) & 
> (~511);
> +    uint64_t sectors_to_write = MIN(end_pos - start_pos,
> +                s->bitmap_size - start_pos);
> +
> +    if (start_pos > s->bitmap_size) {
> +        return 0;
> +    }
> +
> +    for (i = 0; i < nb_sectors; i++) {
> +        if (!changed) {
> +            changed = true;
> +        }
> +        add_cow_set_bit(bs, sector_num + i);
> +    }
> +
> +    if (changed) {
> +        ret = bdrv_pwrite(bs->file, sizeof(AddCowHeader) + start_pos,
> +            s->bitmap + start_pos, sectors_to_write);
> +    }
> +    return ret;
> +}
> +
> +static void add_cow_close(BlockDriverState *bs)
> +{
> +    BDRVAddCowState *s = bs->opaque;
> +    g_free(s->bitmap);

We also need to free s->image_hd.

> diff --git a/docs/specs/add-cow.txt b/docs/specs/add-cow.txt
> new file mode 100644
> index 0000000..33f358e
> --- /dev/null
> +++ b/docs/specs/add-cow.txt
> @@ -0,0 +1,72 @@
> +== General ==
> +
> +Raw file format does not support backing_file and copy on write feature. Then
> +you can use add-cow file to implement these features.

I suggest replacing the second sentence with these:

"The add-cow image format makes it possible to use backing files with raw
image by keeping a separate .add-cow metadata file.  Once all sectors
have been written to in the raw image it is safe to discard the .add-cow
and backing files and instead use the raw image directly."

They describe what add-cow does and how it can be used in a little more
detail.

> +
> +When using add-cow, procedures may like this:
> +(ubuntu.img is a disk image which has been installed OS.)
> +    1)  Create a raw image with the same size of ubuntu.img
> +            qemu-img create -f raw test.raw 8G
> +    2)  Create a add-cow image which will store dirty bitmap
> +            qemu-img create -f add-cow test.add-cow -o 
> backing_file=ubuntu.img,image_file=test.raw
> +    3)  Run qemu with add-cow image
> +            qemu -drive if=virtio,file=test.add-cow
> +
> +While QEMU is running, virtual size of image_file and backing_file must be 
> the
> +same. So if image_file does not have the same virtual size as backing_file's 
> in
> +step 2), qemu-img will truncate it.

This limitation is unnecessary.  QCOW2 and QED will read zeroes if the
image is larger than the backing file.  Please implement the same
behavior so that add-cow behaves consistently with how users know QCOW2
and QED work.

> +
> +=Specification=
> +
> +The file format looks like this:
> +
> + +---------------+--------------------------+
> + |     Header    |           Data           |
> + +---------------+--------------------------+
> +
> +All numbers in add-cow are stored in Big Endian byte order.

New file formats and protocols often choose little endian because x86
and arm are very popular nowadays.  It's an arbitrary decision but I
just wanted to suggest it.

> +== Header ==
> +
> +The Header is included in the first bytes:
> +
> +    Byte  0 -  7:       magic
> +                        add-cow magic string ("ADD_COW\xff")
> +
> +          8 -  11:      version
> +                        Version number (only valid value is 1 now)
> +
> +          12 - 1035:    backing_file
> +                        backing_file file name related to add-cow file. While
> +                        using backing_file, must together with image_file. 
> All
> +                        unused bytes are padded with zeros.

"While using backing_file, must together with image_file"

What does this mean?

> +
> +         1036 - 2059:   image_file
> +                        image_file is a raw file, While using image_file, 
> must
> +                        together with image_file. All unused bytes are padded

"While using image_file, must together with image_file"

What does this mean?

> +                        with zeros.
> +
> +         2060 - 2067:   size
> +                        Virtual disk size of image_file in bytes.

Why is a field required to store the virtual disk size?  The image_file
size should always tell us the virtual disk image.

> +
> +         2068 - 2559:   The Reserved field is used to make sure Data field 
> starts
> +                        at the multiple of 512, not used currently. All 
> bytes are
> +                        filled with 0.
> +
> +== Data ==
> +
> +The Data field starts at the 2560th byte, stores a bitmap related to 
> backing_file
> +and image_file. The bitmap will track whether the sector in backing_file is 
> dirty
> +or not.
> +
> +
> +Each bit in the bitmap indicates one sector's status. So the size of bitmap 
> is
> +calculated according to virtual size of backing_file. In each byte, bit 0 to 
> 7
> +will track the 1st to 7th sector in sequence, bit orders in one byte look 
> like:
> + +----+----+----+----+----+----+----+----+
> + | b7 | b6 | b5 | b4 | b3 | b2 | b1 | b0 |
> + +----+----+----+----+----+----+----+----+
> +
> +If the bit is 0, indicates the sector has not been allocated in image_file, 
> data
> +should be loaded from backing_file while reading; if the bit is 1,  
> indicates the
> +related sector has been dirty, should be loaded from image_file while 
> reading.

Perhaps also add the following so we define both read and write
semantics:

Writing to a sector causes the corresponding bit to be set to 1.



reply via email to

[Prev in Thread] Current Thread [Next in Thread]