qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v2 3/5] block: initial VHDX driver support frame


From: Jeff Cody
Subject: Re: [Qemu-devel] [PATCH v2 3/5] block: initial VHDX driver support framework - supports open and probe
Date: Thu, 25 Apr 2013 11:03:38 -0400
User-agent: Mutt/1.5.21 (2010-09-15)

On Thu, Apr 25, 2013 at 03:04:23PM +0200, Kevin Wolf wrote:
> Am 23.04.2013 um 16:24 hat Jeff Cody geschrieben:
> > This is the initial block driver framework for VHDX image support (
> > i.e. Hyper-V image file formats), that supports opening VHDX files, and
> > parsing the headers.
> > 
> > This commit does not yet enable:
> >     - reading
> >     - writing
> >     - updating the header
> >     - differencing files (images with parents)
> >     - log replay / dirty logs (only clean images)
> > 
> > This is based on Microsoft's VHDX specification:
> >     "VHDX Format Specification v0.95", published 4/12/2012
> >     https://www.microsoft.com/en-us/download/details.aspx?id=29681
> > 
> > Signed-off-by: Jeff Cody <address@hidden>
> 
> Okay, now I'm starting to actually compare the spec to your
> implementation. Comments inline.
> 
> >  block/Makefile.objs |   1 +
> >  block/vhdx.c        | 769 
> > ++++++++++++++++++++++++++++++++++++++++++++++++++++
> >  block/vhdx.h        |  25 ++
> >  3 files changed, 795 insertions(+)
> >  create mode 100644 block/vhdx.c
> > 
> > diff --git a/block/Makefile.objs b/block/Makefile.objs
> > index 6c4b5bc..5f0358a 100644
> > --- a/block/Makefile.objs
> > +++ b/block/Makefile.objs
> > @@ -2,6 +2,7 @@ block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o 
> > dmg.o bochs.o vpc.o vvfat
> >  block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
> > qcow2-cache.o
> >  block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
> >  block-obj-y += qed-check.o
> > +block-obj-y += vhdx.o
> >  block-obj-y += parallels.o blkdebug.o blkverify.o
> >  block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
> >  block-obj-$(CONFIG_POSIX) += raw-posix.o
> > diff --git a/block/vhdx.c b/block/vhdx.c
> > new file mode 100644
> > index 0000000..b0ea2ba
> > --- /dev/null
> > +++ b/block/vhdx.c
> > @@ -0,0 +1,769 @@
> > +/*
> > + * Block driver for Hyper-V VHDX Images
> > + *
> > + * Copyright (c) 2013 Red Hat, Inc.,
> > + *
> > + * Authors:
> > + *  Jeff Cody <address@hidden>
> > + *
> > + *  This is based on the "VHDX Format Specification v0.95", published 
> > 4/12/2012
> > + *  by Microsoft:
> > + *      https://www.microsoft.com/en-us/download/details.aspx?id=29681
> > + *
> > + * This work is licensed under the terms of the GNU LGPL, version 2 or 
> > later.
> > + * See the COPYING.LIB file in the top-level directory.
> > + *
> > + */
> > +
> > +#include "qemu-common.h"
> > +#include "block/block_int.h"
> > +#include "qemu/module.h"
> > +#include "qemu/crc32c.h"
> > +#include "block/vhdx.h"
> > +
> > +
> > +/* Several metadata and region table data entries are identified by
> > + * guids in  a MS-specific GUID format. */
> > +
> > +
> > +/* ------- Known Region Table GUIDs ---------------------- */
> > +static const ms_guid bat_guid =      { .data1 = 0x2dc27766,
> > +                                       .data2 = 0xf623,
> > +                                       .data3 = 0x4200,
> > +                                       .data4 = { 0x9d, 0x64, 0x11, 0x5e,
> > +                                                  0x9b, 0xfd, 0x4a, 0x08} 
> > };
> > +
> > +static const ms_guid metadata_guid = { .data1 = 0x8b7ca206,
> > +                                       .data2 = 0x4790,
> > +                                       .data3 = 0x4b9a,
> > +                                       .data4 = { 0xb8, 0xfe, 0x57, 0x5f,
> > +                                                  0x05, 0x0f, 0x88, 0x6e} 
> > };
> > +
> > +
> > +
> > +/* ------- Known Metadata Entry GUIDs ---------------------- */
> > +static const ms_guid file_param_guid =   { .data1 = 0xcaa16737,
> > +                                           .data2 = 0xfa36,
> > +                                           .data3 = 0x4d43,
> > +                                           .data4 = { 0xb3, 0xb6, 0x33, 
> > 0xf0,
> > +                                                      0xaa, 0x44, 0xe7, 
> > 0x6b} };
> > +
> > +static const ms_guid virtual_size_guid = { .data1 = 0x2FA54224,
> > +                                           .data2 = 0xcd1b,
> > +                                           .data3 = 0x4876,
> > +                                           .data4 = { 0xb2, 0x11, 0x5d, 
> > 0xbe,
> > +                                                      0xd8, 0x3b, 0xf4, 
> > 0xb8} };
> > +
> > +static const ms_guid page83_guid =       { .data1 = 0xbeca12ab,
> > +                                           .data2 = 0xb2e6,
> > +                                           .data3 = 0x4523,
> > +                                           .data4 = { 0x93, 0xef, 0xc3, 
> > 0x09,
> > +                                                      0xe0, 0x00, 0xc7, 
> > 0x46} };
> > +
> > +
> > +static const ms_guid phys_sector_guid =  { .data1 = 0xcda348c7,
> > +                                           .data2 = 0x445d,
> > +                                           .data3 = 0x4471,
> > +                                           .data4 = { 0x9c, 0xc9, 0xe9, 
> > 0x88,
> > +                                                      0x52, 0x51, 0xc5, 
> > 0x56} };
> > +
> > +static const ms_guid parent_locator_guid = { .data1 = 0xa8d35f2d,
> > +                                             .data2 = 0xb30b,
> > +                                             .data3 = 0x454d,
> > +                                             .data4 = { 0xab, 0xf7, 0xd3,
> > +                                                        0xd8, 0x48, 0x34,
> > +                                                        0xab, 0x0c} };
> > +
> > +static const ms_guid logical_sector_guid = { .data1 = 0x8141bf1d,
> > +                                             .data2 = 0xa96f,
> > +                                             .data3 = 0x4709,
> > +                                             .data4 = { 0xba, 0x47, 0xf2,
> > +                                                        0x33, 0xa8, 0xfa,
> > +                                                        0xab, 0x5f} };
> > +
> > +/* Each parent type must have a valid GUID; this is for parent images
> > + * of type 'VHDX'.  If we were to allow e.g. a QCOW2 parent, we would
> > + * need to make up our own QCOW2 GUID type */
> > +static const ms_guid parent_vhdx_guid = { .data1 = 0xb04aefb7,
> > +                                          .data2 = 0xd19e,
> > +                                          .data3 = 0x4a81,
> > +                                          .data4 = { 0xb7, 0x89, 0x25, 
> > 0xb8,
> > +                                                     0xe9, 0x44, 0x59, 
> > 0x13} };
> > +
> > +
> > +#define META_FILE_PARAMETER_PRESENT      0x01
> > +#define META_VIRTUAL_DISK_SIZE_PRESENT   0x02
> > +#define META_PAGE_83_PRESENT             0x04
> > +#define META_LOGICAL_SECTOR_SIZE_PRESENT 0x08
> > +#define META_PHYS_SECTOR_SIZE_PRESENT    0x10
> > +#define META_PARENT_LOCATOR_PRESENT      0x20
> > +
> > +#define META_ALL_PRESENT    \
> > +    (META_FILE_PARAMETER_PRESENT | META_VIRTUAL_DISK_SIZE_PRESENT | \
> > +     META_PAGE_83_PRESENT | META_LOGICAL_SECTOR_SIZE_PRESENT | \
> > +     META_PHYS_SECTOR_SIZE_PRESENT)
> > +
> > +typedef struct vhdx_metadata_entries {
> > +    vhdx_metadata_table_entry file_parameters_entry;
> > +    vhdx_metadata_table_entry virtual_disk_size_entry;
> > +    vhdx_metadata_table_entry page83_data_entry;
> > +    vhdx_metadata_table_entry logical_sector_size_entry;
> > +    vhdx_metadata_table_entry phys_sector_size_entry;
> > +    vhdx_metadata_table_entry parent_locator_entry;
> > +    uint16_t present;
> > +} vhdx_metadata_entries;
> > +
> > +
> > +typedef struct BDRVVHDXState {
> > +    CoMutex lock;
> > +
> > +    int curr_header;
> > +    vhdx_header *headers[2];
> > +
> > +    vhdx_region_table_header rt;
> > +    vhdx_region_table_entry bat_rt;         /* region table for the BAT */
> > +    vhdx_region_table_entry metadata_rt;    /* region table for the 
> > metadata */
> > +
> > +    vhdx_metadata_table_header  metadata_hdr;
> > +    vhdx_metadata_entries metadata_entries;
> > +
> > +    vhdx_file_parameters params;
> > +    uint32_t block_size;
> > +    uint32_t block_size_bits;
> > +    uint32_t sectors_per_block;
> > +    uint32_t sectors_per_block_bits;
> > +
> > +    uint64_t virtual_disk_size;
> > +    uint32_t logical_sector_size;
> > +    uint32_t physical_sector_size;
> > +
> > +    uint64_t chunk_ratio;
> > +    uint32_t chunk_ratio_bits;
> > +    uint32_t logical_sector_size_bits;
> > +
> > +    uint32_t bat_entries;
> > +    vhdx_bat_entry *bat;
> > +    uint64_t bat_offset;
> > +
> > +    vhdx_parent_locator_header parent_header;
> > +    vhdx_parent_locator_entry *parent_entries;
> > +
> > +} BDRVVHDXState;
> > +
> > +uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
> > +                            int crc_offset)
> > +{
> > +    uint32_t crc_new;
> > +    uint32_t crc_orig;
> > +    assert(buf != NULL);
> > +
> > +    if (crc_offset > 0) {
> > +        memcpy(&crc_orig, buf+crc_offset, sizeof(crc_orig));
> > +        memset(buf+crc_offset, 0, sizeof(crc_orig));
> > +    }
> > +
> > +    crc_new = crc32c(crc, buf, size);
> > +    if (crc_offset > 0) {
> > +        memcpy(buf+crc_offset, &crc_orig, sizeof(crc_orig));
> > +    }
> > +
> > +    return crc_new;
> > +}
> > +
> > +/* Validates the checksum of the buffer, with an in-place CRC.
> > + *
> > + * Zero is substituted during crc calculation for the original crc field,
> > + * and the crc field is restored afterwards.  But the buffer will be 
> > modifed
> > + * during the calculation, so this may not be not suitable for 
> > multi-threaded
> > + * use.
> > + *
> > + * crc_offset: byte offset in buf of the buffer crc
> > + * buf: buffer pointer
> > + * size: size of buffer (must be > crc_offset+4)
> > + *
> > + * returns true if checksum is valid, false otherwise
> > + */
> > +bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset)
> > +{
> > +    uint32_t crc_orig;
> > +    uint32_t crc;
> > +
> > +    assert(buf != NULL);
> > +    assert(size > (crc_offset+4));
> > +
> > +    memcpy(&crc_orig, buf+crc_offset, sizeof(crc_orig));
> > +    crc_orig = le32_to_cpu(crc_orig);
> > +
> > +    crc = vhdx_checksum_calc(0xffffffff, buf, size, crc_offset);
> > +
> > +    return crc == crc_orig;
> > +}
> > +
> > +
> > +/*
> > + * Per the MS VHDX Specification, for every VHDX file:
> > + *      - The header section is fixed size - 1 MB
> > + *      - The header section is always the first "object"
> > + *      - The first 64KB of the header is the File Identifier
> > + *      - The first uint64 (8 bytes) is the VHDX Signature ("vhdxfile")
> > + *      - The following 512 bytes constitute a UTF-16 string identifiying 
> > the
> > + *        software that created the file, and is optional and diagnostic 
> > only.
> > + *
> > + *  Therefore, we probe by looking for the vhdxfile signature "vhdxfile"
> > + */
> > +static int vhdx_probe(const uint8_t *buf, int buf_size, const char 
> > *filename)
> > +{
> > +    if (buf_size >= 8 && !memcmp(buf, "vhdxfile", 8)) {
> > +        return 100;
> > +    }
> > +    return 0;
> > +}
> > +
> > +/* All VHDX structures on disk are little endian */
> > +static void vhdx_header_le_import(vhdx_header *h)
> > +{
> > +    assert(h != NULL);
> > +
> > +    le32_to_cpus(&h->signature);
> > +    le32_to_cpus(&h->checksum);
> > +    le64_to_cpus(&h->sequence_number);
> > +
> > +    leguid_to_cpus(&h->file_write_guid);
> > +    leguid_to_cpus(&h->data_write_guid);
> > +    leguid_to_cpus(&h->log_guid);
> > +
> > +    le16_to_cpus(&h->log_version);
> > +    le16_to_cpus(&h->version);
> > +    le32_to_cpus(&h->log_length);
> > +    le64_to_cpus(&h->log_offset);
> > +}
> > +
> > +
> > +/* opens the specified header block from the VHDX file header section */
> > +static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s)
> > +{
> > +    int ret = 0;
> > +    vhdx_header *header1;
> > +    vhdx_header *header2;
> > +    uint64_t h1_seq = 0;
> > +    uint64_t h2_seq = 0;
> > +    uint8_t *buffer;
> 
> Makes sense to use uint8_t* (or possible rather void*) here, but it
> means that struct vhdx_header_padded is unused and could be dropped from
> the header file.
>

OK, I'll drop it from the header file (or maybe convert the meaning of
it into a comment in the header, to preserve the info).

> > +    header1 = qemu_blockalign(bs, sizeof(vhdx_header));
> > +    header2 = qemu_blockalign(bs, sizeof(vhdx_header));
> > +
> > +    buffer = qemu_blockalign(bs, VHDX_HEADER_SIZE);
> > +
> > +    s->headers[0] = header1;
> > +    s->headers[1] = header2;
> 
> The spec requires that the signature field of the File Type Identifier
> must be validated when loading a VHDX file. We only have the check in
> vhdx_probe(), but not in vhdx_open().
> 

Yes, true - I'll add that in first.

> (This means that our struct vhdx_file_identifier is unused.)
> 
> > +    /* We have to read the whole VHDX_HEADER_SIZE instead of
> > +     * sizeof(vhdx_header), because the checksum is over the whole
> > +     * region */
> > +    ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer, 
> > VHDX_HEADER_SIZE);
> > +    if (ret < 0) {
> > +        goto fail;
> > +    }
> > +    /* copy over just the relevant portion that we need */
> > +    memcpy(header1, buffer, sizeof(vhdx_header));
> > +    vhdx_header_le_import(header1);
> > +
> > +    if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
> > +        header1->signature == VHDX_HDR_MAGIC) {
> > +        h1_seq = header1->sequence_number;
> > +    }
> > +
> > +    ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, 
> > VHDX_HEADER_SIZE);
> > +    if (ret < 0) {
> > +        goto fail;
> > +    }
> > +    /* copy over just the relevant portion that we need */
> > +    memcpy(header2, buffer, sizeof(vhdx_header));
> > +    vhdx_header_le_import(header2);
> > +
> > +    if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
> > +        header2->signature == VHDX_HDR_MAGIC) {
> > +        h2_seq = header2->sequence_number;
> > +    }
> > +
> > +    if (h1_seq > h2_seq) {
> > +        s->curr_header = 0;
> > +    } else if (h2_seq > h1_seq) {
> > +        s->curr_header = 1;
> > +    } else {
> > +        printf("NO VALID HEADER\n");
> > +        ret = -EINVAL;
> > +        goto fail;
> > +    }
> 
> This may be nit-picking, but according to the spec, I think a header
> with a sequence number of 0 can be valid (if the other header is
> invalid). This code would get 0 for both and error out.
> 

I think you are right.  The other header would need to have either an
invalid signature or checksum, but if that was the case technically 0
could indicate the active header.

> What does a new VHDX file look like in practice? Sequence numbers 1
> and 2?
> 

I'll check that out - my Hyper-V test server is at home, and I am
working at a co-working place today.  I'll check it this evening -
it'll have to be on a freshly created image before even opening it up
with Hyper-V.

> > +
> > +    ret = 0;
> > +
> > +    goto exit;
> > +
> > +fail:
> > +    qemu_vfree(header1);
> > +    qemu_vfree(header2);
> > +    s->headers[0] = NULL;
> > +    s->headers[1] = NULL;
> > +exit:
> > +    qemu_vfree(buffer);
> > +    return ret;
> > +}
> > +
> > +
> > +static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
> > +{
> > +    int ret = 0;
> > +    uint8_t *buffer;
> > +    int offset = 0;
> > +    vhdx_region_table_entry rt_entry;
> > +    int i;
> > +
> > +    /* We have to read the whole 64KB block, because the crc32 is over the
> > +     * whole block */
> > +    buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE);
> > +
> > +    ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET, buffer,
> > +                    VHDX_HEADER_BLOCK_SIZE);
> > +    if (ret < 0) {
> > +        goto fail;
> > +    }
> > +    memcpy(&s->rt, buffer, sizeof(s->rt));
> > +    le32_to_cpus(&s->rt.signature);
> > +    le32_to_cpus(&s->rt.checksum);
> > +    le32_to_cpus(&s->rt.entry_count);
> > +    le32_to_cpus(&s->rt.reserved);
> > +    offset += sizeof(s->rt);
> 
> Maybe it would safer with respect to forgotting endianness conversion if
> we didn't copy and then convert fields, but only copy converted values.
> Like:
> 
> vhdx_region_table_header *rt = buffer;
> s->rt = (vhdx_region_table_header) {
>     .signature  = le32_to_cpu(rt->signature),
>     .checksum   = le32_to_cpu(rt->checksum),
>     ...
> };
> 
> Matter of taste, I guess, so this is just a suggestion, not a request.
> You could do this is quite a few places, I think.
> 
> > +
> > +    if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4) ||
> > +        s->rt.signature != VHDX_RT_MAGIC) {
> > +        ret = -EINVAL;
> > +        goto fail;
> > +    }
> > +
> > +    for (i = 0; i < s->rt.entry_count; i++) {
> > +        memcpy(&rt_entry, buffer+offset, sizeof(rt_entry));
> > +        offset += sizeof(rt_entry);
> > +
> > +        leguid_to_cpus(&rt_entry.guid);
> > +        le64_to_cpus(&rt_entry.file_offset);
> > +        le32_to_cpus(&rt_entry.length);
> > +        le32_to_cpus(&rt_entry.data_bits);
> > +
> > +        /* see if we recognize the entry */
> > +        if (guid_eq(rt_entry.guid, bat_guid)) {
> > +            s->bat_rt = rt_entry;
> > +            continue;
> > +        }
> > +
> > +        if (guid_eq(rt_entry.guid, metadata_guid)) {
> > +            s->metadata_rt = rt_entry;
> > +            continue;
> > +        }
> 
> If the same regions occurs multiple times, the latest wins. Such images
> aren't valid, but what should we do with such cases? Is this good enough
> or should we detect it?

I think such images are more undefined rather than explicitly invalid.
I don't think the spec touches on the idea of multiple regions of the
same type.  That said, I don't what to make of an image file with
multiple BAT regions, for instance - I think error is the only sane
option.

Maybe keep a list of all entries, and if there are duplicates error
out with -EINVAL?  

> 
> > +        if (rt_entry.data_bits & VHDX_REGION_ENTRY_REQUIRED) {
> > +            /* cannot read vhdx file - required region table entry that
> > +             * we do not understand.  per spec, we must fail to open */
> > +            ret = -ENOTSUP;
> > +            goto fail;
> > +        }
> > +    }
> > +    ret = 0;
> > +
> > +fail:
> > +    qemu_vfree(buffer);
> > +    return ret;
> > +}
> > +
> > +
> > +
> > +/* Metadata initial parser
> > + *
> > + * This loads all the metadata entry fields.  This may cause additional
> > + * fields to be processed (e.g. parent locator, etc..).
> > + *
> > + * There are 5 Metadata items that are always required:
> > + *      - File Parameters (block size, has a parent)
> > + *      - Virtual Disk Size (size, in bytes, of the virtual drive)
> > + *      - Page 83 Data (scsi page 83 guid)
> > + *      - Logical Sector Size (logical sector size in bytes, either 512 or
> > + *                             4096.  We only support 512 currently)
> > + *      - Physical Sector Size (512 or 4096)
> > + *
> > + * Also, if the File Parameters indicate this is a differencing file,
> > + * we must also look for the Parent Locator metadata item.
> > + */
> > +static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
> > +{
> > +    int ret = 0;
> > +    uint8_t *buffer;
> > +    int offset = 0;
> > +    int i = 0;
> > +    uint32_t block_size, sectors_per_block, logical_sector_size;
> > +    uint64_t chunk_ratio;
> > +    vhdx_metadata_table_entry md_entry;
> > +
> > +    buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE);
> > +
> > +    ret = bdrv_pread(bs->file, s->metadata_rt.file_offset, buffer,
> > +                     VHDX_METADATA_TABLE_MAX_SIZE);
> > +    if (ret < 0) {
> > +        goto exit;
> > +    }
> > +    memcpy(&s->metadata_hdr, buffer, sizeof(s->metadata_hdr));
> > +    offset += sizeof(s->metadata_hdr);
> > +
> > +    le64_to_cpus(&s->metadata_hdr.signature);
> > +    le16_to_cpus(&s->metadata_hdr.reserved);
> > +    le16_to_cpus(&s->metadata_hdr.entry_count);
> > +
> > +    if (s->metadata_hdr.signature != VHDX_METADATA_MAGIC) {
> > +        ret = -EINVAL;
> > +        goto exit;
> > +    }
> > +
> > +    s->metadata_entries.present = 0;
> > +
> > +    if ((s->metadata_hdr.entry_count * sizeof(md_entry)) >
> > +        (VHDX_METADATA_TABLE_MAX_SIZE - offset)) {
> > +        ret = -EINVAL;
> > +        goto exit;
> > +    }
> > +
> > +    for (i = 0; i < s->metadata_hdr.entry_count; i++) {
> > +        memcpy(&md_entry, buffer+offset, sizeof(md_entry));
> > +        offset += sizeof(md_entry);
> > +
> > +        leguid_to_cpus(&md_entry.item_id);
> > +        le32_to_cpus(&md_entry.offset);
> > +        le32_to_cpus(&md_entry.length);
> > +        le32_to_cpus(&md_entry.data_bits);
> > +        le32_to_cpus(&md_entry.reserved2);
> > +
> > +        if (guid_eq(md_entry.item_id, file_param_guid)) {
> > +            s->metadata_entries.file_parameters_entry = md_entry;
> > +            s->metadata_entries.present |= META_FILE_PARAMETER_PRESENT;
> > +            continue;
> > +        }
> > +
> > +        if (guid_eq(md_entry.item_id, virtual_size_guid)) {
> > +            s->metadata_entries.virtual_disk_size_entry = md_entry;
> > +            s->metadata_entries.present |= META_VIRTUAL_DISK_SIZE_PRESENT;
> > +            continue;
> > +        }
> > +
> > +        if (guid_eq(md_entry.item_id, page83_guid)) {
> > +            s->metadata_entries.page83_data_entry = md_entry;
> > +            s->metadata_entries.present |= META_PAGE_83_PRESENT;
> > +            continue;
> > +        }
> > +
> > +        if (guid_eq(md_entry.item_id, logical_sector_guid)) {
> > +            s->metadata_entries.logical_sector_size_entry = md_entry;
> > +            s->metadata_entries.present |= 
> > META_LOGICAL_SECTOR_SIZE_PRESENT;
> > +            continue;
> > +        }
> > +
> > +        if (guid_eq(md_entry.item_id, phys_sector_guid)) {
> > +            s->metadata_entries.phys_sector_size_entry = md_entry;
> > +            s->metadata_entries.present |= META_PHYS_SECTOR_SIZE_PRESENT;
> > +            continue;
> > +        }
> > +
> > +        if (guid_eq(md_entry.item_id, parent_locator_guid)) {
> > +            s->metadata_entries.parent_locator_entry = md_entry;
> > +            s->metadata_entries.present |= META_PARENT_LOCATOR_PRESENT;
> > +            continue;
> > +        }
> > +
> > +        if (md_entry.data_bits & VHDX_META_FLAGS_IS_REQUIRED) {
> > +            /* cannot read vhdx file - required region table entry that
> > +             * we do not understand.  per spec, we must fail to open */
> > +            ret = -ENOTSUP;
> > +            goto exit;
> > +        }
> > +    }
> > +
> > +    if (s->metadata_entries.present != META_ALL_PRESENT) {
> > +        ret = -ENOTSUP;
> > +        goto exit;
> > +    }
> > +
> > +    ret = bdrv_pread(bs->file,
> > +                     s->metadata_entries.file_parameters_entry.offset
> > +                                         + s->metadata_rt.file_offset,
> > +                     &s->params,
> > +                     sizeof(s->params));
> > +
> > +    if (ret < 0) {
> > +        goto exit;
> > +    }
> > +
> > +    le32_to_cpus(&s->params.block_size);
> > +    le32_to_cpus(&s->params.data_bits);
> > +
> > +
> > +    /* We now have the file parameters, so we can tell if this is a
> > +     * differencing file (i.e.. has_parent), is dynamic or fixed
> > +     * sized (leave_blocks_allocated), and the block size */
> > +
> > +    /* The parent locator required iff the file parameters has_parent set 
> > */
> > +    if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) {
> > +        if (s->metadata_entries.present & ~META_PARENT_LOCATOR_PRESENT) {
> 
> Not sure what you're trying to achieve here. We get -ENOTSUP if any
> metadata entry except the parent locator is present, and -EINVAL if
> there is none?
>

That is a mistake - that check should be sans tilde:
  if (s->metadata_entries.present & META_PARENT_LOCATOR_PRESENT) 


There are currently 2 failure cases (while parent locators are
unsupported):

    1.) There is a parent present, and a parent locator field.
            - This is proper per the spec.  Normally, we would parse
              the parent locator field.  But this is not supported, so
              we return -ENOTSUP.

    2.) There is a parent present, but not parent locator field.
            - This is invalid, because if the parent is present the
              parent locator field per spec is a required field.  So,
              return -EINVAL to indicate invalid field.


> Of course, this doesn't matter currently, because above we've verified
> that s->metadata_entries.present == META_ALL_PRESENT, so we'll always
> get the -ENOTSUP case.
> 

Once we support parent locator fields, that check should be masked
against META_ALL_PRESENT, so that we are only checking for the base
required fields.  It wouldn't hurt to got ahead and do it now.

> > +            /* TODO: parse  parent locator fields */
> > +            ret = -ENOTSUP; /* temp, until differencing files are 
> > supported */
> > +            goto exit;
> > +        } else {
> > +            /* if has_parent is set, but there is not parent locator 
> > present,
> > +             * then that is an invalid combination */
> > +            ret = -EINVAL;
> > +            goto exit;
> > +        }
> > +    }
> > +
> > +    /* determine virtual disk size, logical sector size,
> > +     * and phys sector size */
> > +
> > +    ret = bdrv_pread(bs->file,
> > +                     s->metadata_entries.virtual_disk_size_entry.offset
> > +                                           + s->metadata_rt.file_offset,
> > +                     &s->virtual_disk_size,
> > +                     sizeof(uint64_t));
> > +    if (ret < 0) {
> > +        goto exit;
> > +    }
> > +    ret = bdrv_pread(bs->file,
> > +                     s->metadata_entries.logical_sector_size_entry.offset
> > +                                             + s->metadata_rt.file_offset,
> > +                     &s->logical_sector_size,
> > +                     sizeof(uint32_t));
> > +    if (ret < 0) {
> > +        goto exit;
> > +    }
> > +    ret = bdrv_pread(bs->file,
> > +                     s->metadata_entries.phys_sector_size_entry.offset
> > +                                          + s->metadata_rt.file_offset,
> > +                     &s->physical_sector_size,
> > +                     sizeof(uint32_t));
> > +    if (ret < 0) {
> > +        goto exit;
> > +    }
> > +
> > +    le64_to_cpus(&s->virtual_disk_size);
> > +    le32_to_cpus(&s->logical_sector_size);
> > +    le32_to_cpus(&s->physical_sector_size);
> > +
> > +    if (s->logical_sector_size == 0 || s->params.block_size == 0) {
> > +        ret = -EINVAL;
> > +        goto exit;
> > +    }
> > +
> > +    /* both block_size and sector_size are guaranteed powers of 2 */
> > +    s->sectors_per_block = s->params.block_size / s->logical_sector_size;
> > +    s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) *
> > +                     (uint64_t)s->logical_sector_size /
> > +                     (uint64_t)s->params.block_size;
> > +
> > +    /* These values are ones we will want to use for division / 
> > multiplication
> > +     * later on, and they are all guaranteed (per the spec) to be powers 
> > of 2,
> > +     * so we can take advantage of that for shift operations during
> > +     * reads/writes */
> > +    logical_sector_size = s->logical_sector_size;
> > +    if (logical_sector_size & (logical_sector_size - 1)) {
> > +        ret = -EINVAL;
> > +        goto exit;
> > +    }
> > +
> > +    sectors_per_block = s->sectors_per_block;
> > +    if (sectors_per_block & (sectors_per_block - 1)) {
> > +        ret = -EINVAL;
> > +        goto exit;
> > +    }
> > +    chunk_ratio = s->chunk_ratio;
> > +    if (chunk_ratio & (chunk_ratio - 1)) {
> > +        ret = -EINVAL;
> > +        goto exit;
> > +    }
> > +    block_size = s->params.block_size;
> > +    if (block_size & (block_size - 1)) {
> > +        ret = -EINVAL;
> > +        goto exit;
> > +    }
> > +
> > +    while (logical_sector_size >>= 1) {
> > +        s->logical_sector_size_bits++;
> > +    }
> > +    while (sectors_per_block >>= 1) {
> > +        s->sectors_per_block_bits++;
> > +    }
> > +    while (chunk_ratio >>= 1) {
> > +        s->chunk_ratio_bits++;
> > +    }
> > +    while (block_size >>= 1) {
> > +        s->block_size_bits++;
> > +    }
> > +
> > +    if (s->logical_sector_size != BDRV_SECTOR_SIZE) {
> > +        printf("VHDX error - QEMU only supports 512 byte sector sizes\n");
> 
> This is not true. It depends on the device model, and in particular the
> qdev properties of it. The block layer doesn't have access to them, so
> checking that it matches the image file isn't trivial. It would have to
> be the device model that checks that the BlockDriverState is valid for
> the device it emulates.
> 
> I suggest dropping this check; otherwise make it (q)error_report at
> least.
> 

OK.

> > +        ret = -ENOTSUP;
> > +        goto exit;
> > +    }
> > +
> > +    ret = 0;
> > +
> > +exit:
> > +    qemu_vfree(buffer);
> > +    return ret;
> > +}
> 
> Kevin



reply via email to

[Prev in Thread] Current Thread [Next in Thread]