qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v2 3/5] block: initial VHDX driver support frame


From: Kevin Wolf
Subject: Re: [Qemu-devel] [PATCH v2 3/5] block: initial VHDX driver support framework - supports open and probe
Date: Thu, 25 Apr 2013 15:04:23 +0200
User-agent: Mutt/1.5.21 (2010-09-15)

Am 23.04.2013 um 16:24 hat Jeff Cody geschrieben:
> This is the initial block driver framework for VHDX image support (
> i.e. Hyper-V image file formats), that supports opening VHDX files, and
> parsing the headers.
> 
> This commit does not yet enable:
>     - reading
>     - writing
>     - updating the header
>     - differencing files (images with parents)
>     - log replay / dirty logs (only clean images)
> 
> This is based on Microsoft's VHDX specification:
>     "VHDX Format Specification v0.95", published 4/12/2012
>     https://www.microsoft.com/en-us/download/details.aspx?id=29681
> 
> Signed-off-by: Jeff Cody <address@hidden>

Okay, now I'm starting to actually compare the spec to your
implementation. Comments inline.

>  block/Makefile.objs |   1 +
>  block/vhdx.c        | 769 
> ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  block/vhdx.h        |  25 ++
>  3 files changed, 795 insertions(+)
>  create mode 100644 block/vhdx.c
> 
> diff --git a/block/Makefile.objs b/block/Makefile.objs
> index 6c4b5bc..5f0358a 100644
> --- a/block/Makefile.objs
> +++ b/block/Makefile.objs
> @@ -2,6 +2,7 @@ block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o 
> bochs.o vpc.o vvfat
>  block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
> qcow2-cache.o
>  block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
>  block-obj-y += qed-check.o
> +block-obj-y += vhdx.o
>  block-obj-y += parallels.o blkdebug.o blkverify.o
>  block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
>  block-obj-$(CONFIG_POSIX) += raw-posix.o
> diff --git a/block/vhdx.c b/block/vhdx.c
> new file mode 100644
> index 0000000..b0ea2ba
> --- /dev/null
> +++ b/block/vhdx.c
> @@ -0,0 +1,769 @@
> +/*
> + * Block driver for Hyper-V VHDX Images
> + *
> + * Copyright (c) 2013 Red Hat, Inc.,
> + *
> + * Authors:
> + *  Jeff Cody <address@hidden>
> + *
> + *  This is based on the "VHDX Format Specification v0.95", published 
> 4/12/2012
> + *  by Microsoft:
> + *      https://www.microsoft.com/en-us/download/details.aspx?id=29681
> + *
> + * This work is licensed under the terms of the GNU LGPL, version 2 or later.
> + * See the COPYING.LIB file in the top-level directory.
> + *
> + */
> +
> +#include "qemu-common.h"
> +#include "block/block_int.h"
> +#include "qemu/module.h"
> +#include "qemu/crc32c.h"
> +#include "block/vhdx.h"
> +
> +
> +/* Several metadata and region table data entries are identified by
> + * guids in  a MS-specific GUID format. */
> +
> +
> +/* ------- Known Region Table GUIDs ---------------------- */
> +static const ms_guid bat_guid =      { .data1 = 0x2dc27766,
> +                                       .data2 = 0xf623,
> +                                       .data3 = 0x4200,
> +                                       .data4 = { 0x9d, 0x64, 0x11, 0x5e,
> +                                                  0x9b, 0xfd, 0x4a, 0x08} };
> +
> +static const ms_guid metadata_guid = { .data1 = 0x8b7ca206,
> +                                       .data2 = 0x4790,
> +                                       .data3 = 0x4b9a,
> +                                       .data4 = { 0xb8, 0xfe, 0x57, 0x5f,
> +                                                  0x05, 0x0f, 0x88, 0x6e} };
> +
> +
> +
> +/* ------- Known Metadata Entry GUIDs ---------------------- */
> +static const ms_guid file_param_guid =   { .data1 = 0xcaa16737,
> +                                           .data2 = 0xfa36,
> +                                           .data3 = 0x4d43,
> +                                           .data4 = { 0xb3, 0xb6, 0x33, 0xf0,
> +                                                      0xaa, 0x44, 0xe7, 
> 0x6b} };
> +
> +static const ms_guid virtual_size_guid = { .data1 = 0x2FA54224,
> +                                           .data2 = 0xcd1b,
> +                                           .data3 = 0x4876,
> +                                           .data4 = { 0xb2, 0x11, 0x5d, 0xbe,
> +                                                      0xd8, 0x3b, 0xf4, 
> 0xb8} };
> +
> +static const ms_guid page83_guid =       { .data1 = 0xbeca12ab,
> +                                           .data2 = 0xb2e6,
> +                                           .data3 = 0x4523,
> +                                           .data4 = { 0x93, 0xef, 0xc3, 0x09,
> +                                                      0xe0, 0x00, 0xc7, 
> 0x46} };
> +
> +
> +static const ms_guid phys_sector_guid =  { .data1 = 0xcda348c7,
> +                                           .data2 = 0x445d,
> +                                           .data3 = 0x4471,
> +                                           .data4 = { 0x9c, 0xc9, 0xe9, 0x88,
> +                                                      0x52, 0x51, 0xc5, 
> 0x56} };
> +
> +static const ms_guid parent_locator_guid = { .data1 = 0xa8d35f2d,
> +                                             .data2 = 0xb30b,
> +                                             .data3 = 0x454d,
> +                                             .data4 = { 0xab, 0xf7, 0xd3,
> +                                                        0xd8, 0x48, 0x34,
> +                                                        0xab, 0x0c} };
> +
> +static const ms_guid logical_sector_guid = { .data1 = 0x8141bf1d,
> +                                             .data2 = 0xa96f,
> +                                             .data3 = 0x4709,
> +                                             .data4 = { 0xba, 0x47, 0xf2,
> +                                                        0x33, 0xa8, 0xfa,
> +                                                        0xab, 0x5f} };
> +
> +/* Each parent type must have a valid GUID; this is for parent images
> + * of type 'VHDX'.  If we were to allow e.g. a QCOW2 parent, we would
> + * need to make up our own QCOW2 GUID type */
> +static const ms_guid parent_vhdx_guid = { .data1 = 0xb04aefb7,
> +                                          .data2 = 0xd19e,
> +                                          .data3 = 0x4a81,
> +                                          .data4 = { 0xb7, 0x89, 0x25, 0xb8,
> +                                                     0xe9, 0x44, 0x59, 0x13} 
> };
> +
> +
> +#define META_FILE_PARAMETER_PRESENT      0x01
> +#define META_VIRTUAL_DISK_SIZE_PRESENT   0x02
> +#define META_PAGE_83_PRESENT             0x04
> +#define META_LOGICAL_SECTOR_SIZE_PRESENT 0x08
> +#define META_PHYS_SECTOR_SIZE_PRESENT    0x10
> +#define META_PARENT_LOCATOR_PRESENT      0x20
> +
> +#define META_ALL_PRESENT    \
> +    (META_FILE_PARAMETER_PRESENT | META_VIRTUAL_DISK_SIZE_PRESENT | \
> +     META_PAGE_83_PRESENT | META_LOGICAL_SECTOR_SIZE_PRESENT | \
> +     META_PHYS_SECTOR_SIZE_PRESENT)
> +
> +typedef struct vhdx_metadata_entries {
> +    vhdx_metadata_table_entry file_parameters_entry;
> +    vhdx_metadata_table_entry virtual_disk_size_entry;
> +    vhdx_metadata_table_entry page83_data_entry;
> +    vhdx_metadata_table_entry logical_sector_size_entry;
> +    vhdx_metadata_table_entry phys_sector_size_entry;
> +    vhdx_metadata_table_entry parent_locator_entry;
> +    uint16_t present;
> +} vhdx_metadata_entries;
> +
> +
> +typedef struct BDRVVHDXState {
> +    CoMutex lock;
> +
> +    int curr_header;
> +    vhdx_header *headers[2];
> +
> +    vhdx_region_table_header rt;
> +    vhdx_region_table_entry bat_rt;         /* region table for the BAT */
> +    vhdx_region_table_entry metadata_rt;    /* region table for the metadata 
> */
> +
> +    vhdx_metadata_table_header  metadata_hdr;
> +    vhdx_metadata_entries metadata_entries;
> +
> +    vhdx_file_parameters params;
> +    uint32_t block_size;
> +    uint32_t block_size_bits;
> +    uint32_t sectors_per_block;
> +    uint32_t sectors_per_block_bits;
> +
> +    uint64_t virtual_disk_size;
> +    uint32_t logical_sector_size;
> +    uint32_t physical_sector_size;
> +
> +    uint64_t chunk_ratio;
> +    uint32_t chunk_ratio_bits;
> +    uint32_t logical_sector_size_bits;
> +
> +    uint32_t bat_entries;
> +    vhdx_bat_entry *bat;
> +    uint64_t bat_offset;
> +
> +    vhdx_parent_locator_header parent_header;
> +    vhdx_parent_locator_entry *parent_entries;
> +
> +} BDRVVHDXState;
> +
> +uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
> +                            int crc_offset)
> +{
> +    uint32_t crc_new;
> +    uint32_t crc_orig;
> +    assert(buf != NULL);
> +
> +    if (crc_offset > 0) {
> +        memcpy(&crc_orig, buf+crc_offset, sizeof(crc_orig));
> +        memset(buf+crc_offset, 0, sizeof(crc_orig));
> +    }
> +
> +    crc_new = crc32c(crc, buf, size);
> +    if (crc_offset > 0) {
> +        memcpy(buf+crc_offset, &crc_orig, sizeof(crc_orig));
> +    }
> +
> +    return crc_new;
> +}
> +
> +/* Validates the checksum of the buffer, with an in-place CRC.
> + *
> + * Zero is substituted during crc calculation for the original crc field,
> + * and the crc field is restored afterwards.  But the buffer will be modifed
> + * during the calculation, so this may not be not suitable for multi-threaded
> + * use.
> + *
> + * crc_offset: byte offset in buf of the buffer crc
> + * buf: buffer pointer
> + * size: size of buffer (must be > crc_offset+4)
> + *
> + * returns true if checksum is valid, false otherwise
> + */
> +bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset)
> +{
> +    uint32_t crc_orig;
> +    uint32_t crc;
> +
> +    assert(buf != NULL);
> +    assert(size > (crc_offset+4));
> +
> +    memcpy(&crc_orig, buf+crc_offset, sizeof(crc_orig));
> +    crc_orig = le32_to_cpu(crc_orig);
> +
> +    crc = vhdx_checksum_calc(0xffffffff, buf, size, crc_offset);
> +
> +    return crc == crc_orig;
> +}
> +
> +
> +/*
> + * Per the MS VHDX Specification, for every VHDX file:
> + *      - The header section is fixed size - 1 MB
> + *      - The header section is always the first "object"
> + *      - The first 64KB of the header is the File Identifier
> + *      - The first uint64 (8 bytes) is the VHDX Signature ("vhdxfile")
> + *      - The following 512 bytes constitute a UTF-16 string identifiying the
> + *        software that created the file, and is optional and diagnostic 
> only.
> + *
> + *  Therefore, we probe by looking for the vhdxfile signature "vhdxfile"
> + */
> +static int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename)
> +{
> +    if (buf_size >= 8 && !memcmp(buf, "vhdxfile", 8)) {
> +        return 100;
> +    }
> +    return 0;
> +}
> +
> +/* All VHDX structures on disk are little endian */
> +static void vhdx_header_le_import(vhdx_header *h)
> +{
> +    assert(h != NULL);
> +
> +    le32_to_cpus(&h->signature);
> +    le32_to_cpus(&h->checksum);
> +    le64_to_cpus(&h->sequence_number);
> +
> +    leguid_to_cpus(&h->file_write_guid);
> +    leguid_to_cpus(&h->data_write_guid);
> +    leguid_to_cpus(&h->log_guid);
> +
> +    le16_to_cpus(&h->log_version);
> +    le16_to_cpus(&h->version);
> +    le32_to_cpus(&h->log_length);
> +    le64_to_cpus(&h->log_offset);
> +}
> +
> +
> +/* opens the specified header block from the VHDX file header section */
> +static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s)
> +{
> +    int ret = 0;
> +    vhdx_header *header1;
> +    vhdx_header *header2;
> +    uint64_t h1_seq = 0;
> +    uint64_t h2_seq = 0;
> +    uint8_t *buffer;

Makes sense to use uint8_t* (or possible rather void*) here, but it
means that struct vhdx_header_padded is unused and could be dropped from
the header file.

> +    header1 = qemu_blockalign(bs, sizeof(vhdx_header));
> +    header2 = qemu_blockalign(bs, sizeof(vhdx_header));
> +
> +    buffer = qemu_blockalign(bs, VHDX_HEADER_SIZE);
> +
> +    s->headers[0] = header1;
> +    s->headers[1] = header2;

The spec requires that the signature field of the File Type Identifier
must be validated when loading a VHDX file. We only have the check in
vhdx_probe(), but not in vhdx_open().

(This means that our struct vhdx_file_identifier is unused.)

> +    /* We have to read the whole VHDX_HEADER_SIZE instead of
> +     * sizeof(vhdx_header), because the checksum is over the whole
> +     * region */
> +    ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer, 
> VHDX_HEADER_SIZE);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +    /* copy over just the relevant portion that we need */
> +    memcpy(header1, buffer, sizeof(vhdx_header));
> +    vhdx_header_le_import(header1);
> +
> +    if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
> +        header1->signature == VHDX_HDR_MAGIC) {
> +        h1_seq = header1->sequence_number;
> +    }
> +
> +    ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, 
> VHDX_HEADER_SIZE);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +    /* copy over just the relevant portion that we need */
> +    memcpy(header2, buffer, sizeof(vhdx_header));
> +    vhdx_header_le_import(header2);
> +
> +    if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
> +        header2->signature == VHDX_HDR_MAGIC) {
> +        h2_seq = header2->sequence_number;
> +    }
> +
> +    if (h1_seq > h2_seq) {
> +        s->curr_header = 0;
> +    } else if (h2_seq > h1_seq) {
> +        s->curr_header = 1;
> +    } else {
> +        printf("NO VALID HEADER\n");
> +        ret = -EINVAL;
> +        goto fail;
> +    }

This may be nit-picking, but according to the spec, I think a header
with a sequence number of 0 can be valid (if the other header is
invalid). This code would get 0 for both and error out.

What does a new VHDX file look like in practice? Sequence numbers 1
and 2?

> +
> +    ret = 0;
> +
> +    goto exit;
> +
> +fail:
> +    qemu_vfree(header1);
> +    qemu_vfree(header2);
> +    s->headers[0] = NULL;
> +    s->headers[1] = NULL;
> +exit:
> +    qemu_vfree(buffer);
> +    return ret;
> +}
> +
> +
> +static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
> +{
> +    int ret = 0;
> +    uint8_t *buffer;
> +    int offset = 0;
> +    vhdx_region_table_entry rt_entry;
> +    int i;
> +
> +    /* We have to read the whole 64KB block, because the crc32 is over the
> +     * whole block */
> +    buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE);
> +
> +    ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET, buffer,
> +                    VHDX_HEADER_BLOCK_SIZE);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +    memcpy(&s->rt, buffer, sizeof(s->rt));
> +    le32_to_cpus(&s->rt.signature);
> +    le32_to_cpus(&s->rt.checksum);
> +    le32_to_cpus(&s->rt.entry_count);
> +    le32_to_cpus(&s->rt.reserved);
> +    offset += sizeof(s->rt);

Maybe it would safer with respect to forgotting endianness conversion if
we didn't copy and then convert fields, but only copy converted values.
Like:

vhdx_region_table_header *rt = buffer;
s->rt = (vhdx_region_table_header) {
    .signature  = le32_to_cpu(rt->signature),
    .checksum   = le32_to_cpu(rt->checksum),
    ...
};

Matter of taste, I guess, so this is just a suggestion, not a request.
You could do this is quite a few places, I think.

> +
> +    if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4) ||
> +        s->rt.signature != VHDX_RT_MAGIC) {
> +        ret = -EINVAL;
> +        goto fail;
> +    }
> +
> +    for (i = 0; i < s->rt.entry_count; i++) {
> +        memcpy(&rt_entry, buffer+offset, sizeof(rt_entry));
> +        offset += sizeof(rt_entry);
> +
> +        leguid_to_cpus(&rt_entry.guid);
> +        le64_to_cpus(&rt_entry.file_offset);
> +        le32_to_cpus(&rt_entry.length);
> +        le32_to_cpus(&rt_entry.data_bits);
> +
> +        /* see if we recognize the entry */
> +        if (guid_eq(rt_entry.guid, bat_guid)) {
> +            s->bat_rt = rt_entry;
> +            continue;
> +        }
> +
> +        if (guid_eq(rt_entry.guid, metadata_guid)) {
> +            s->metadata_rt = rt_entry;
> +            continue;
> +        }

If the same regions occurs multiple times, the latest wins. Such images
aren't valid, but what should we do with such cases? Is this good enough
or should we detect it?

> +        if (rt_entry.data_bits & VHDX_REGION_ENTRY_REQUIRED) {
> +            /* cannot read vhdx file - required region table entry that
> +             * we do not understand.  per spec, we must fail to open */
> +            ret = -ENOTSUP;
> +            goto fail;
> +        }
> +    }
> +    ret = 0;
> +
> +fail:
> +    qemu_vfree(buffer);
> +    return ret;
> +}
> +
> +
> +
> +/* Metadata initial parser
> + *
> + * This loads all the metadata entry fields.  This may cause additional
> + * fields to be processed (e.g. parent locator, etc..).
> + *
> + * There are 5 Metadata items that are always required:
> + *      - File Parameters (block size, has a parent)
> + *      - Virtual Disk Size (size, in bytes, of the virtual drive)
> + *      - Page 83 Data (scsi page 83 guid)
> + *      - Logical Sector Size (logical sector size in bytes, either 512 or
> + *                             4096.  We only support 512 currently)
> + *      - Physical Sector Size (512 or 4096)
> + *
> + * Also, if the File Parameters indicate this is a differencing file,
> + * we must also look for the Parent Locator metadata item.
> + */
> +static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
> +{
> +    int ret = 0;
> +    uint8_t *buffer;
> +    int offset = 0;
> +    int i = 0;
> +    uint32_t block_size, sectors_per_block, logical_sector_size;
> +    uint64_t chunk_ratio;
> +    vhdx_metadata_table_entry md_entry;
> +
> +    buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE);
> +
> +    ret = bdrv_pread(bs->file, s->metadata_rt.file_offset, buffer,
> +                     VHDX_METADATA_TABLE_MAX_SIZE);
> +    if (ret < 0) {
> +        goto exit;
> +    }
> +    memcpy(&s->metadata_hdr, buffer, sizeof(s->metadata_hdr));
> +    offset += sizeof(s->metadata_hdr);
> +
> +    le64_to_cpus(&s->metadata_hdr.signature);
> +    le16_to_cpus(&s->metadata_hdr.reserved);
> +    le16_to_cpus(&s->metadata_hdr.entry_count);
> +
> +    if (s->metadata_hdr.signature != VHDX_METADATA_MAGIC) {
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +
> +    s->metadata_entries.present = 0;
> +
> +    if ((s->metadata_hdr.entry_count * sizeof(md_entry)) >
> +        (VHDX_METADATA_TABLE_MAX_SIZE - offset)) {
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +
> +    for (i = 0; i < s->metadata_hdr.entry_count; i++) {
> +        memcpy(&md_entry, buffer+offset, sizeof(md_entry));
> +        offset += sizeof(md_entry);
> +
> +        leguid_to_cpus(&md_entry.item_id);
> +        le32_to_cpus(&md_entry.offset);
> +        le32_to_cpus(&md_entry.length);
> +        le32_to_cpus(&md_entry.data_bits);
> +        le32_to_cpus(&md_entry.reserved2);
> +
> +        if (guid_eq(md_entry.item_id, file_param_guid)) {
> +            s->metadata_entries.file_parameters_entry = md_entry;
> +            s->metadata_entries.present |= META_FILE_PARAMETER_PRESENT;
> +            continue;
> +        }
> +
> +        if (guid_eq(md_entry.item_id, virtual_size_guid)) {
> +            s->metadata_entries.virtual_disk_size_entry = md_entry;
> +            s->metadata_entries.present |= META_VIRTUAL_DISK_SIZE_PRESENT;
> +            continue;
> +        }
> +
> +        if (guid_eq(md_entry.item_id, page83_guid)) {
> +            s->metadata_entries.page83_data_entry = md_entry;
> +            s->metadata_entries.present |= META_PAGE_83_PRESENT;
> +            continue;
> +        }
> +
> +        if (guid_eq(md_entry.item_id, logical_sector_guid)) {
> +            s->metadata_entries.logical_sector_size_entry = md_entry;
> +            s->metadata_entries.present |= META_LOGICAL_SECTOR_SIZE_PRESENT;
> +            continue;
> +        }
> +
> +        if (guid_eq(md_entry.item_id, phys_sector_guid)) {
> +            s->metadata_entries.phys_sector_size_entry = md_entry;
> +            s->metadata_entries.present |= META_PHYS_SECTOR_SIZE_PRESENT;
> +            continue;
> +        }
> +
> +        if (guid_eq(md_entry.item_id, parent_locator_guid)) {
> +            s->metadata_entries.parent_locator_entry = md_entry;
> +            s->metadata_entries.present |= META_PARENT_LOCATOR_PRESENT;
> +            continue;
> +        }
> +
> +        if (md_entry.data_bits & VHDX_META_FLAGS_IS_REQUIRED) {
> +            /* cannot read vhdx file - required region table entry that
> +             * we do not understand.  per spec, we must fail to open */
> +            ret = -ENOTSUP;
> +            goto exit;
> +        }
> +    }
> +
> +    if (s->metadata_entries.present != META_ALL_PRESENT) {
> +        ret = -ENOTSUP;
> +        goto exit;
> +    }
> +
> +    ret = bdrv_pread(bs->file,
> +                     s->metadata_entries.file_parameters_entry.offset
> +                                         + s->metadata_rt.file_offset,
> +                     &s->params,
> +                     sizeof(s->params));
> +
> +    if (ret < 0) {
> +        goto exit;
> +    }
> +
> +    le32_to_cpus(&s->params.block_size);
> +    le32_to_cpus(&s->params.data_bits);
> +
> +
> +    /* We now have the file parameters, so we can tell if this is a
> +     * differencing file (i.e.. has_parent), is dynamic or fixed
> +     * sized (leave_blocks_allocated), and the block size */
> +
> +    /* The parent locator required iff the file parameters has_parent set */
> +    if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) {
> +        if (s->metadata_entries.present & ~META_PARENT_LOCATOR_PRESENT) {

Not sure what you're trying to achieve here. We get -ENOTSUP if any
metadata entry except the parent locator is present, and -EINVAL if
there is none?

Of course, this doesn't matter currently, because above we've verified
that s->metadata_entries.present == META_ALL_PRESENT, so we'll always
get the -ENOTSUP case.

> +            /* TODO: parse  parent locator fields */
> +            ret = -ENOTSUP; /* temp, until differencing files are supported 
> */
> +            goto exit;
> +        } else {
> +            /* if has_parent is set, but there is not parent locator present,
> +             * then that is an invalid combination */
> +            ret = -EINVAL;
> +            goto exit;
> +        }
> +    }
> +
> +    /* determine virtual disk size, logical sector size,
> +     * and phys sector size */
> +
> +    ret = bdrv_pread(bs->file,
> +                     s->metadata_entries.virtual_disk_size_entry.offset
> +                                           + s->metadata_rt.file_offset,
> +                     &s->virtual_disk_size,
> +                     sizeof(uint64_t));
> +    if (ret < 0) {
> +        goto exit;
> +    }
> +    ret = bdrv_pread(bs->file,
> +                     s->metadata_entries.logical_sector_size_entry.offset
> +                                             + s->metadata_rt.file_offset,
> +                     &s->logical_sector_size,
> +                     sizeof(uint32_t));
> +    if (ret < 0) {
> +        goto exit;
> +    }
> +    ret = bdrv_pread(bs->file,
> +                     s->metadata_entries.phys_sector_size_entry.offset
> +                                          + s->metadata_rt.file_offset,
> +                     &s->physical_sector_size,
> +                     sizeof(uint32_t));
> +    if (ret < 0) {
> +        goto exit;
> +    }
> +
> +    le64_to_cpus(&s->virtual_disk_size);
> +    le32_to_cpus(&s->logical_sector_size);
> +    le32_to_cpus(&s->physical_sector_size);
> +
> +    if (s->logical_sector_size == 0 || s->params.block_size == 0) {
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +
> +    /* both block_size and sector_size are guaranteed powers of 2 */
> +    s->sectors_per_block = s->params.block_size / s->logical_sector_size;
> +    s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) *
> +                     (uint64_t)s->logical_sector_size /
> +                     (uint64_t)s->params.block_size;
> +
> +    /* These values are ones we will want to use for division / 
> multiplication
> +     * later on, and they are all guaranteed (per the spec) to be powers of 
> 2,
> +     * so we can take advantage of that for shift operations during
> +     * reads/writes */
> +    logical_sector_size = s->logical_sector_size;
> +    if (logical_sector_size & (logical_sector_size - 1)) {
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +
> +    sectors_per_block = s->sectors_per_block;
> +    if (sectors_per_block & (sectors_per_block - 1)) {
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +    chunk_ratio = s->chunk_ratio;
> +    if (chunk_ratio & (chunk_ratio - 1)) {
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +    block_size = s->params.block_size;
> +    if (block_size & (block_size - 1)) {
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +
> +    while (logical_sector_size >>= 1) {
> +        s->logical_sector_size_bits++;
> +    }
> +    while (sectors_per_block >>= 1) {
> +        s->sectors_per_block_bits++;
> +    }
> +    while (chunk_ratio >>= 1) {
> +        s->chunk_ratio_bits++;
> +    }
> +    while (block_size >>= 1) {
> +        s->block_size_bits++;
> +    }
> +
> +    if (s->logical_sector_size != BDRV_SECTOR_SIZE) {
> +        printf("VHDX error - QEMU only supports 512 byte sector sizes\n");

This is not true. It depends on the device model, and in particular the
qdev properties of it. The block layer doesn't have access to them, so
checking that it matches the image file isn't trivial. It would have to
be the device model that checks that the BlockDriverState is valid for
the device it emulates.

I suggest dropping this check; otherwise make it (q)error_report at
least.

> +        ret = -ENOTSUP;
> +        goto exit;
> +    }
> +
> +    ret = 0;
> +
> +exit:
> +    qemu_vfree(buffer);
> +    return ret;
> +}

Kevin



reply via email to

[Prev in Thread] Current Thread [Next in Thread]