[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v2 3/5] block: initial VHDX driver support frame
From: |
Jeff Cody |
Subject: |
Re: [Qemu-devel] [PATCH v2 3/5] block: initial VHDX driver support framework - supports open and probe |
Date: |
Thu, 25 Apr 2013 11:03:38 -0400 |
User-agent: |
Mutt/1.5.21 (2010-09-15) |
On Thu, Apr 25, 2013 at 03:04:23PM +0200, Kevin Wolf wrote:
> Am 23.04.2013 um 16:24 hat Jeff Cody geschrieben:
> > This is the initial block driver framework for VHDX image support (
> > i.e. Hyper-V image file formats), that supports opening VHDX files, and
> > parsing the headers.
> >
> > This commit does not yet enable:
> > - reading
> > - writing
> > - updating the header
> > - differencing files (images with parents)
> > - log replay / dirty logs (only clean images)
> >
> > This is based on Microsoft's VHDX specification:
> > "VHDX Format Specification v0.95", published 4/12/2012
> > https://www.microsoft.com/en-us/download/details.aspx?id=29681
> >
> > Signed-off-by: Jeff Cody <address@hidden>
>
> Okay, now I'm starting to actually compare the spec to your
> implementation. Comments inline.
>
> > block/Makefile.objs | 1 +
> > block/vhdx.c | 769
> > ++++++++++++++++++++++++++++++++++++++++++++++++++++
> > block/vhdx.h | 25 ++
> > 3 files changed, 795 insertions(+)
> > create mode 100644 block/vhdx.c
> >
> > diff --git a/block/Makefile.objs b/block/Makefile.objs
> > index 6c4b5bc..5f0358a 100644
> > --- a/block/Makefile.objs
> > +++ b/block/Makefile.objs
> > @@ -2,6 +2,7 @@ block-obj-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o
> > dmg.o bochs.o vpc.o vvfat
> > block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o
> > qcow2-cache.o
> > block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
> > block-obj-y += qed-check.o
> > +block-obj-y += vhdx.o
> > block-obj-y += parallels.o blkdebug.o blkverify.o
> > block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
> > block-obj-$(CONFIG_POSIX) += raw-posix.o
> > diff --git a/block/vhdx.c b/block/vhdx.c
> > new file mode 100644
> > index 0000000..b0ea2ba
> > --- /dev/null
> > +++ b/block/vhdx.c
> > @@ -0,0 +1,769 @@
> > +/*
> > + * Block driver for Hyper-V VHDX Images
> > + *
> > + * Copyright (c) 2013 Red Hat, Inc.,
> > + *
> > + * Authors:
> > + * Jeff Cody <address@hidden>
> > + *
> > + * This is based on the "VHDX Format Specification v0.95", published
> > 4/12/2012
> > + * by Microsoft:
> > + * https://www.microsoft.com/en-us/download/details.aspx?id=29681
> > + *
> > + * This work is licensed under the terms of the GNU LGPL, version 2 or
> > later.
> > + * See the COPYING.LIB file in the top-level directory.
> > + *
> > + */
> > +
> > +#include "qemu-common.h"
> > +#include "block/block_int.h"
> > +#include "qemu/module.h"
> > +#include "qemu/crc32c.h"
> > +#include "block/vhdx.h"
> > +
> > +
> > +/* Several metadata and region table data entries are identified by
> > + * guids in a MS-specific GUID format. */
> > +
> > +
> > +/* ------- Known Region Table GUIDs ---------------------- */
> > +static const ms_guid bat_guid = { .data1 = 0x2dc27766,
> > + .data2 = 0xf623,
> > + .data3 = 0x4200,
> > + .data4 = { 0x9d, 0x64, 0x11, 0x5e,
> > + 0x9b, 0xfd, 0x4a, 0x08}
> > };
> > +
> > +static const ms_guid metadata_guid = { .data1 = 0x8b7ca206,
> > + .data2 = 0x4790,
> > + .data3 = 0x4b9a,
> > + .data4 = { 0xb8, 0xfe, 0x57, 0x5f,
> > + 0x05, 0x0f, 0x88, 0x6e}
> > };
> > +
> > +
> > +
> > +/* ------- Known Metadata Entry GUIDs ---------------------- */
> > +static const ms_guid file_param_guid = { .data1 = 0xcaa16737,
> > + .data2 = 0xfa36,
> > + .data3 = 0x4d43,
> > + .data4 = { 0xb3, 0xb6, 0x33,
> > 0xf0,
> > + 0xaa, 0x44, 0xe7,
> > 0x6b} };
> > +
> > +static const ms_guid virtual_size_guid = { .data1 = 0x2FA54224,
> > + .data2 = 0xcd1b,
> > + .data3 = 0x4876,
> > + .data4 = { 0xb2, 0x11, 0x5d,
> > 0xbe,
> > + 0xd8, 0x3b, 0xf4,
> > 0xb8} };
> > +
> > +static const ms_guid page83_guid = { .data1 = 0xbeca12ab,
> > + .data2 = 0xb2e6,
> > + .data3 = 0x4523,
> > + .data4 = { 0x93, 0xef, 0xc3,
> > 0x09,
> > + 0xe0, 0x00, 0xc7,
> > 0x46} };
> > +
> > +
> > +static const ms_guid phys_sector_guid = { .data1 = 0xcda348c7,
> > + .data2 = 0x445d,
> > + .data3 = 0x4471,
> > + .data4 = { 0x9c, 0xc9, 0xe9,
> > 0x88,
> > + 0x52, 0x51, 0xc5,
> > 0x56} };
> > +
> > +static const ms_guid parent_locator_guid = { .data1 = 0xa8d35f2d,
> > + .data2 = 0xb30b,
> > + .data3 = 0x454d,
> > + .data4 = { 0xab, 0xf7, 0xd3,
> > + 0xd8, 0x48, 0x34,
> > + 0xab, 0x0c} };
> > +
> > +static const ms_guid logical_sector_guid = { .data1 = 0x8141bf1d,
> > + .data2 = 0xa96f,
> > + .data3 = 0x4709,
> > + .data4 = { 0xba, 0x47, 0xf2,
> > + 0x33, 0xa8, 0xfa,
> > + 0xab, 0x5f} };
> > +
> > +/* Each parent type must have a valid GUID; this is for parent images
> > + * of type 'VHDX'. If we were to allow e.g. a QCOW2 parent, we would
> > + * need to make up our own QCOW2 GUID type */
> > +static const ms_guid parent_vhdx_guid = { .data1 = 0xb04aefb7,
> > + .data2 = 0xd19e,
> > + .data3 = 0x4a81,
> > + .data4 = { 0xb7, 0x89, 0x25,
> > 0xb8,
> > + 0xe9, 0x44, 0x59,
> > 0x13} };
> > +
> > +
> > +#define META_FILE_PARAMETER_PRESENT 0x01
> > +#define META_VIRTUAL_DISK_SIZE_PRESENT 0x02
> > +#define META_PAGE_83_PRESENT 0x04
> > +#define META_LOGICAL_SECTOR_SIZE_PRESENT 0x08
> > +#define META_PHYS_SECTOR_SIZE_PRESENT 0x10
> > +#define META_PARENT_LOCATOR_PRESENT 0x20
> > +
> > +#define META_ALL_PRESENT \
> > + (META_FILE_PARAMETER_PRESENT | META_VIRTUAL_DISK_SIZE_PRESENT | \
> > + META_PAGE_83_PRESENT | META_LOGICAL_SECTOR_SIZE_PRESENT | \
> > + META_PHYS_SECTOR_SIZE_PRESENT)
> > +
> > +typedef struct vhdx_metadata_entries {
> > + vhdx_metadata_table_entry file_parameters_entry;
> > + vhdx_metadata_table_entry virtual_disk_size_entry;
> > + vhdx_metadata_table_entry page83_data_entry;
> > + vhdx_metadata_table_entry logical_sector_size_entry;
> > + vhdx_metadata_table_entry phys_sector_size_entry;
> > + vhdx_metadata_table_entry parent_locator_entry;
> > + uint16_t present;
> > +} vhdx_metadata_entries;
> > +
> > +
> > +typedef struct BDRVVHDXState {
> > + CoMutex lock;
> > +
> > + int curr_header;
> > + vhdx_header *headers[2];
> > +
> > + vhdx_region_table_header rt;
> > + vhdx_region_table_entry bat_rt; /* region table for the BAT */
> > + vhdx_region_table_entry metadata_rt; /* region table for the
> > metadata */
> > +
> > + vhdx_metadata_table_header metadata_hdr;
> > + vhdx_metadata_entries metadata_entries;
> > +
> > + vhdx_file_parameters params;
> > + uint32_t block_size;
> > + uint32_t block_size_bits;
> > + uint32_t sectors_per_block;
> > + uint32_t sectors_per_block_bits;
> > +
> > + uint64_t virtual_disk_size;
> > + uint32_t logical_sector_size;
> > + uint32_t physical_sector_size;
> > +
> > + uint64_t chunk_ratio;
> > + uint32_t chunk_ratio_bits;
> > + uint32_t logical_sector_size_bits;
> > +
> > + uint32_t bat_entries;
> > + vhdx_bat_entry *bat;
> > + uint64_t bat_offset;
> > +
> > + vhdx_parent_locator_header parent_header;
> > + vhdx_parent_locator_entry *parent_entries;
> > +
> > +} BDRVVHDXState;
> > +
> > +uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
> > + int crc_offset)
> > +{
> > + uint32_t crc_new;
> > + uint32_t crc_orig;
> > + assert(buf != NULL);
> > +
> > + if (crc_offset > 0) {
> > + memcpy(&crc_orig, buf+crc_offset, sizeof(crc_orig));
> > + memset(buf+crc_offset, 0, sizeof(crc_orig));
> > + }
> > +
> > + crc_new = crc32c(crc, buf, size);
> > + if (crc_offset > 0) {
> > + memcpy(buf+crc_offset, &crc_orig, sizeof(crc_orig));
> > + }
> > +
> > + return crc_new;
> > +}
> > +
> > +/* Validates the checksum of the buffer, with an in-place CRC.
> > + *
> > + * Zero is substituted during crc calculation for the original crc field,
> > + * and the crc field is restored afterwards. But the buffer will be
> > modifed
> > + * during the calculation, so this may not be not suitable for
> > multi-threaded
> > + * use.
> > + *
> > + * crc_offset: byte offset in buf of the buffer crc
> > + * buf: buffer pointer
> > + * size: size of buffer (must be > crc_offset+4)
> > + *
> > + * returns true if checksum is valid, false otherwise
> > + */
> > +bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset)
> > +{
> > + uint32_t crc_orig;
> > + uint32_t crc;
> > +
> > + assert(buf != NULL);
> > + assert(size > (crc_offset+4));
> > +
> > + memcpy(&crc_orig, buf+crc_offset, sizeof(crc_orig));
> > + crc_orig = le32_to_cpu(crc_orig);
> > +
> > + crc = vhdx_checksum_calc(0xffffffff, buf, size, crc_offset);
> > +
> > + return crc == crc_orig;
> > +}
> > +
> > +
> > +/*
> > + * Per the MS VHDX Specification, for every VHDX file:
> > + * - The header section is fixed size - 1 MB
> > + * - The header section is always the first "object"
> > + * - The first 64KB of the header is the File Identifier
> > + * - The first uint64 (8 bytes) is the VHDX Signature ("vhdxfile")
> > + * - The following 512 bytes constitute a UTF-16 string identifiying
> > the
> > + * software that created the file, and is optional and diagnostic
> > only.
> > + *
> > + * Therefore, we probe by looking for the vhdxfile signature "vhdxfile"
> > + */
> > +static int vhdx_probe(const uint8_t *buf, int buf_size, const char
> > *filename)
> > +{
> > + if (buf_size >= 8 && !memcmp(buf, "vhdxfile", 8)) {
> > + return 100;
> > + }
> > + return 0;
> > +}
> > +
> > +/* All VHDX structures on disk are little endian */
> > +static void vhdx_header_le_import(vhdx_header *h)
> > +{
> > + assert(h != NULL);
> > +
> > + le32_to_cpus(&h->signature);
> > + le32_to_cpus(&h->checksum);
> > + le64_to_cpus(&h->sequence_number);
> > +
> > + leguid_to_cpus(&h->file_write_guid);
> > + leguid_to_cpus(&h->data_write_guid);
> > + leguid_to_cpus(&h->log_guid);
> > +
> > + le16_to_cpus(&h->log_version);
> > + le16_to_cpus(&h->version);
> > + le32_to_cpus(&h->log_length);
> > + le64_to_cpus(&h->log_offset);
> > +}
> > +
> > +
> > +/* opens the specified header block from the VHDX file header section */
> > +static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s)
> > +{
> > + int ret = 0;
> > + vhdx_header *header1;
> > + vhdx_header *header2;
> > + uint64_t h1_seq = 0;
> > + uint64_t h2_seq = 0;
> > + uint8_t *buffer;
>
> Makes sense to use uint8_t* (or possible rather void*) here, but it
> means that struct vhdx_header_padded is unused and could be dropped from
> the header file.
>
OK, I'll drop it from the header file (or maybe convert the meaning of
it into a comment in the header, to preserve the info).
> > + header1 = qemu_blockalign(bs, sizeof(vhdx_header));
> > + header2 = qemu_blockalign(bs, sizeof(vhdx_header));
> > +
> > + buffer = qemu_blockalign(bs, VHDX_HEADER_SIZE);
> > +
> > + s->headers[0] = header1;
> > + s->headers[1] = header2;
>
> The spec requires that the signature field of the File Type Identifier
> must be validated when loading a VHDX file. We only have the check in
> vhdx_probe(), but not in vhdx_open().
>
Yes, true - I'll add that in first.
> (This means that our struct vhdx_file_identifier is unused.)
>
> > + /* We have to read the whole VHDX_HEADER_SIZE instead of
> > + * sizeof(vhdx_header), because the checksum is over the whole
> > + * region */
> > + ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer,
> > VHDX_HEADER_SIZE);
> > + if (ret < 0) {
> > + goto fail;
> > + }
> > + /* copy over just the relevant portion that we need */
> > + memcpy(header1, buffer, sizeof(vhdx_header));
> > + vhdx_header_le_import(header1);
> > +
> > + if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
> > + header1->signature == VHDX_HDR_MAGIC) {
> > + h1_seq = header1->sequence_number;
> > + }
> > +
> > + ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer,
> > VHDX_HEADER_SIZE);
> > + if (ret < 0) {
> > + goto fail;
> > + }
> > + /* copy over just the relevant portion that we need */
> > + memcpy(header2, buffer, sizeof(vhdx_header));
> > + vhdx_header_le_import(header2);
> > +
> > + if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
> > + header2->signature == VHDX_HDR_MAGIC) {
> > + h2_seq = header2->sequence_number;
> > + }
> > +
> > + if (h1_seq > h2_seq) {
> > + s->curr_header = 0;
> > + } else if (h2_seq > h1_seq) {
> > + s->curr_header = 1;
> > + } else {
> > + printf("NO VALID HEADER\n");
> > + ret = -EINVAL;
> > + goto fail;
> > + }
>
> This may be nit-picking, but according to the spec, I think a header
> with a sequence number of 0 can be valid (if the other header is
> invalid). This code would get 0 for both and error out.
>
I think you are right. The other header would need to have either an
invalid signature or checksum, but if that was the case technically 0
could indicate the active header.
> What does a new VHDX file look like in practice? Sequence numbers 1
> and 2?
>
I'll check that out - my Hyper-V test server is at home, and I am
working at a co-working place today. I'll check it this evening -
it'll have to be on a freshly created image before even opening it up
with Hyper-V.
> > +
> > + ret = 0;
> > +
> > + goto exit;
> > +
> > +fail:
> > + qemu_vfree(header1);
> > + qemu_vfree(header2);
> > + s->headers[0] = NULL;
> > + s->headers[1] = NULL;
> > +exit:
> > + qemu_vfree(buffer);
> > + return ret;
> > +}
> > +
> > +
> > +static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
> > +{
> > + int ret = 0;
> > + uint8_t *buffer;
> > + int offset = 0;
> > + vhdx_region_table_entry rt_entry;
> > + int i;
> > +
> > + /* We have to read the whole 64KB block, because the crc32 is over the
> > + * whole block */
> > + buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE);
> > +
> > + ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET, buffer,
> > + VHDX_HEADER_BLOCK_SIZE);
> > + if (ret < 0) {
> > + goto fail;
> > + }
> > + memcpy(&s->rt, buffer, sizeof(s->rt));
> > + le32_to_cpus(&s->rt.signature);
> > + le32_to_cpus(&s->rt.checksum);
> > + le32_to_cpus(&s->rt.entry_count);
> > + le32_to_cpus(&s->rt.reserved);
> > + offset += sizeof(s->rt);
>
> Maybe it would safer with respect to forgotting endianness conversion if
> we didn't copy and then convert fields, but only copy converted values.
> Like:
>
> vhdx_region_table_header *rt = buffer;
> s->rt = (vhdx_region_table_header) {
> .signature = le32_to_cpu(rt->signature),
> .checksum = le32_to_cpu(rt->checksum),
> ...
> };
>
> Matter of taste, I guess, so this is just a suggestion, not a request.
> You could do this is quite a few places, I think.
>
> > +
> > + if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4) ||
> > + s->rt.signature != VHDX_RT_MAGIC) {
> > + ret = -EINVAL;
> > + goto fail;
> > + }
> > +
> > + for (i = 0; i < s->rt.entry_count; i++) {
> > + memcpy(&rt_entry, buffer+offset, sizeof(rt_entry));
> > + offset += sizeof(rt_entry);
> > +
> > + leguid_to_cpus(&rt_entry.guid);
> > + le64_to_cpus(&rt_entry.file_offset);
> > + le32_to_cpus(&rt_entry.length);
> > + le32_to_cpus(&rt_entry.data_bits);
> > +
> > + /* see if we recognize the entry */
> > + if (guid_eq(rt_entry.guid, bat_guid)) {
> > + s->bat_rt = rt_entry;
> > + continue;
> > + }
> > +
> > + if (guid_eq(rt_entry.guid, metadata_guid)) {
> > + s->metadata_rt = rt_entry;
> > + continue;
> > + }
>
> If the same regions occurs multiple times, the latest wins. Such images
> aren't valid, but what should we do with such cases? Is this good enough
> or should we detect it?
I think such images are more undefined rather than explicitly invalid.
I don't think the spec touches on the idea of multiple regions of the
same type. That said, I don't what to make of an image file with
multiple BAT regions, for instance - I think error is the only sane
option.
Maybe keep a list of all entries, and if there are duplicates error
out with -EINVAL?
>
> > + if (rt_entry.data_bits & VHDX_REGION_ENTRY_REQUIRED) {
> > + /* cannot read vhdx file - required region table entry that
> > + * we do not understand. per spec, we must fail to open */
> > + ret = -ENOTSUP;
> > + goto fail;
> > + }
> > + }
> > + ret = 0;
> > +
> > +fail:
> > + qemu_vfree(buffer);
> > + return ret;
> > +}
> > +
> > +
> > +
> > +/* Metadata initial parser
> > + *
> > + * This loads all the metadata entry fields. This may cause additional
> > + * fields to be processed (e.g. parent locator, etc..).
> > + *
> > + * There are 5 Metadata items that are always required:
> > + * - File Parameters (block size, has a parent)
> > + * - Virtual Disk Size (size, in bytes, of the virtual drive)
> > + * - Page 83 Data (scsi page 83 guid)
> > + * - Logical Sector Size (logical sector size in bytes, either 512 or
> > + * 4096. We only support 512 currently)
> > + * - Physical Sector Size (512 or 4096)
> > + *
> > + * Also, if the File Parameters indicate this is a differencing file,
> > + * we must also look for the Parent Locator metadata item.
> > + */
> > +static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
> > +{
> > + int ret = 0;
> > + uint8_t *buffer;
> > + int offset = 0;
> > + int i = 0;
> > + uint32_t block_size, sectors_per_block, logical_sector_size;
> > + uint64_t chunk_ratio;
> > + vhdx_metadata_table_entry md_entry;
> > +
> > + buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE);
> > +
> > + ret = bdrv_pread(bs->file, s->metadata_rt.file_offset, buffer,
> > + VHDX_METADATA_TABLE_MAX_SIZE);
> > + if (ret < 0) {
> > + goto exit;
> > + }
> > + memcpy(&s->metadata_hdr, buffer, sizeof(s->metadata_hdr));
> > + offset += sizeof(s->metadata_hdr);
> > +
> > + le64_to_cpus(&s->metadata_hdr.signature);
> > + le16_to_cpus(&s->metadata_hdr.reserved);
> > + le16_to_cpus(&s->metadata_hdr.entry_count);
> > +
> > + if (s->metadata_hdr.signature != VHDX_METADATA_MAGIC) {
> > + ret = -EINVAL;
> > + goto exit;
> > + }
> > +
> > + s->metadata_entries.present = 0;
> > +
> > + if ((s->metadata_hdr.entry_count * sizeof(md_entry)) >
> > + (VHDX_METADATA_TABLE_MAX_SIZE - offset)) {
> > + ret = -EINVAL;
> > + goto exit;
> > + }
> > +
> > + for (i = 0; i < s->metadata_hdr.entry_count; i++) {
> > + memcpy(&md_entry, buffer+offset, sizeof(md_entry));
> > + offset += sizeof(md_entry);
> > +
> > + leguid_to_cpus(&md_entry.item_id);
> > + le32_to_cpus(&md_entry.offset);
> > + le32_to_cpus(&md_entry.length);
> > + le32_to_cpus(&md_entry.data_bits);
> > + le32_to_cpus(&md_entry.reserved2);
> > +
> > + if (guid_eq(md_entry.item_id, file_param_guid)) {
> > + s->metadata_entries.file_parameters_entry = md_entry;
> > + s->metadata_entries.present |= META_FILE_PARAMETER_PRESENT;
> > + continue;
> > + }
> > +
> > + if (guid_eq(md_entry.item_id, virtual_size_guid)) {
> > + s->metadata_entries.virtual_disk_size_entry = md_entry;
> > + s->metadata_entries.present |= META_VIRTUAL_DISK_SIZE_PRESENT;
> > + continue;
> > + }
> > +
> > + if (guid_eq(md_entry.item_id, page83_guid)) {
> > + s->metadata_entries.page83_data_entry = md_entry;
> > + s->metadata_entries.present |= META_PAGE_83_PRESENT;
> > + continue;
> > + }
> > +
> > + if (guid_eq(md_entry.item_id, logical_sector_guid)) {
> > + s->metadata_entries.logical_sector_size_entry = md_entry;
> > + s->metadata_entries.present |=
> > META_LOGICAL_SECTOR_SIZE_PRESENT;
> > + continue;
> > + }
> > +
> > + if (guid_eq(md_entry.item_id, phys_sector_guid)) {
> > + s->metadata_entries.phys_sector_size_entry = md_entry;
> > + s->metadata_entries.present |= META_PHYS_SECTOR_SIZE_PRESENT;
> > + continue;
> > + }
> > +
> > + if (guid_eq(md_entry.item_id, parent_locator_guid)) {
> > + s->metadata_entries.parent_locator_entry = md_entry;
> > + s->metadata_entries.present |= META_PARENT_LOCATOR_PRESENT;
> > + continue;
> > + }
> > +
> > + if (md_entry.data_bits & VHDX_META_FLAGS_IS_REQUIRED) {
> > + /* cannot read vhdx file - required region table entry that
> > + * we do not understand. per spec, we must fail to open */
> > + ret = -ENOTSUP;
> > + goto exit;
> > + }
> > + }
> > +
> > + if (s->metadata_entries.present != META_ALL_PRESENT) {
> > + ret = -ENOTSUP;
> > + goto exit;
> > + }
> > +
> > + ret = bdrv_pread(bs->file,
> > + s->metadata_entries.file_parameters_entry.offset
> > + + s->metadata_rt.file_offset,
> > + &s->params,
> > + sizeof(s->params));
> > +
> > + if (ret < 0) {
> > + goto exit;
> > + }
> > +
> > + le32_to_cpus(&s->params.block_size);
> > + le32_to_cpus(&s->params.data_bits);
> > +
> > +
> > + /* We now have the file parameters, so we can tell if this is a
> > + * differencing file (i.e.. has_parent), is dynamic or fixed
> > + * sized (leave_blocks_allocated), and the block size */
> > +
> > + /* The parent locator required iff the file parameters has_parent set
> > */
> > + if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) {
> > + if (s->metadata_entries.present & ~META_PARENT_LOCATOR_PRESENT) {
>
> Not sure what you're trying to achieve here. We get -ENOTSUP if any
> metadata entry except the parent locator is present, and -EINVAL if
> there is none?
>
That is a mistake - that check should be sans tilde:
if (s->metadata_entries.present & META_PARENT_LOCATOR_PRESENT)
There are currently 2 failure cases (while parent locators are
unsupported):
1.) There is a parent present, and a parent locator field.
- This is proper per the spec. Normally, we would parse
the parent locator field. But this is not supported, so
we return -ENOTSUP.
2.) There is a parent present, but not parent locator field.
- This is invalid, because if the parent is present the
parent locator field per spec is a required field. So,
return -EINVAL to indicate invalid field.
> Of course, this doesn't matter currently, because above we've verified
> that s->metadata_entries.present == META_ALL_PRESENT, so we'll always
> get the -ENOTSUP case.
>
Once we support parent locator fields, that check should be masked
against META_ALL_PRESENT, so that we are only checking for the base
required fields. It wouldn't hurt to got ahead and do it now.
> > + /* TODO: parse parent locator fields */
> > + ret = -ENOTSUP; /* temp, until differencing files are
> > supported */
> > + goto exit;
> > + } else {
> > + /* if has_parent is set, but there is not parent locator
> > present,
> > + * then that is an invalid combination */
> > + ret = -EINVAL;
> > + goto exit;
> > + }
> > + }
> > +
> > + /* determine virtual disk size, logical sector size,
> > + * and phys sector size */
> > +
> > + ret = bdrv_pread(bs->file,
> > + s->metadata_entries.virtual_disk_size_entry.offset
> > + + s->metadata_rt.file_offset,
> > + &s->virtual_disk_size,
> > + sizeof(uint64_t));
> > + if (ret < 0) {
> > + goto exit;
> > + }
> > + ret = bdrv_pread(bs->file,
> > + s->metadata_entries.logical_sector_size_entry.offset
> > + + s->metadata_rt.file_offset,
> > + &s->logical_sector_size,
> > + sizeof(uint32_t));
> > + if (ret < 0) {
> > + goto exit;
> > + }
> > + ret = bdrv_pread(bs->file,
> > + s->metadata_entries.phys_sector_size_entry.offset
> > + + s->metadata_rt.file_offset,
> > + &s->physical_sector_size,
> > + sizeof(uint32_t));
> > + if (ret < 0) {
> > + goto exit;
> > + }
> > +
> > + le64_to_cpus(&s->virtual_disk_size);
> > + le32_to_cpus(&s->logical_sector_size);
> > + le32_to_cpus(&s->physical_sector_size);
> > +
> > + if (s->logical_sector_size == 0 || s->params.block_size == 0) {
> > + ret = -EINVAL;
> > + goto exit;
> > + }
> > +
> > + /* both block_size and sector_size are guaranteed powers of 2 */
> > + s->sectors_per_block = s->params.block_size / s->logical_sector_size;
> > + s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) *
> > + (uint64_t)s->logical_sector_size /
> > + (uint64_t)s->params.block_size;
> > +
> > + /* These values are ones we will want to use for division /
> > multiplication
> > + * later on, and they are all guaranteed (per the spec) to be powers
> > of 2,
> > + * so we can take advantage of that for shift operations during
> > + * reads/writes */
> > + logical_sector_size = s->logical_sector_size;
> > + if (logical_sector_size & (logical_sector_size - 1)) {
> > + ret = -EINVAL;
> > + goto exit;
> > + }
> > +
> > + sectors_per_block = s->sectors_per_block;
> > + if (sectors_per_block & (sectors_per_block - 1)) {
> > + ret = -EINVAL;
> > + goto exit;
> > + }
> > + chunk_ratio = s->chunk_ratio;
> > + if (chunk_ratio & (chunk_ratio - 1)) {
> > + ret = -EINVAL;
> > + goto exit;
> > + }
> > + block_size = s->params.block_size;
> > + if (block_size & (block_size - 1)) {
> > + ret = -EINVAL;
> > + goto exit;
> > + }
> > +
> > + while (logical_sector_size >>= 1) {
> > + s->logical_sector_size_bits++;
> > + }
> > + while (sectors_per_block >>= 1) {
> > + s->sectors_per_block_bits++;
> > + }
> > + while (chunk_ratio >>= 1) {
> > + s->chunk_ratio_bits++;
> > + }
> > + while (block_size >>= 1) {
> > + s->block_size_bits++;
> > + }
> > +
> > + if (s->logical_sector_size != BDRV_SECTOR_SIZE) {
> > + printf("VHDX error - QEMU only supports 512 byte sector sizes\n");
>
> This is not true. It depends on the device model, and in particular the
> qdev properties of it. The block layer doesn't have access to them, so
> checking that it matches the image file isn't trivial. It would have to
> be the device model that checks that the BlockDriverState is valid for
> the device it emulates.
>
> I suggest dropping this check; otherwise make it (q)error_report at
> least.
>
OK.
> > + ret = -ENOTSUP;
> > + goto exit;
> > + }
> > +
> > + ret = 0;
> > +
> > +exit:
> > + qemu_vfree(buffer);
> > + return ret;
> > +}
>
> Kevin
[Qemu-devel] [PATCH v2 5/5] block: add header update capability for VHDX images., Jeff Cody, 2013/04/23
Re: [Qemu-devel] [PATCH v2 5/5] block: add header update capability for VHDX images., Fam Zheng, 2013/04/28