[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-block] [PATCH v3 6/6] vmdk: Update metadata for multiple clust
From: |
Ashijeet Acharya |
Subject: |
Re: [Qemu-block] [PATCH v3 6/6] vmdk: Update metadata for multiple clusters |
Date: |
Sat, 22 Apr 2017 09:43:25 +0530 |
On Fri, Apr 21, 2017 at 1:45 PM, Fam Zheng <address@hidden> wrote:
> On Sat, 04/01 20:14, Ashijeet Acharya wrote:
>> Include a next pointer in VmdkMetaData struct to point to the previous
>> allocated L2 table. Modify vmdk_L2update to start updating metadata for
>> allocation of multiple clusters at once.
>>
>> Signed-off-by: Ashijeet Acharya <address@hidden>
>
> This is the metadata part of the coalesed allocation. I think patch 3 is
> functionally incomplete without these changes, and is perhaps broken because
> metadata is not handled correctly.
>
> Such an "intermediate functional regression" is not good in a series, which we
> need to avoid.
I have moved this patch right after patch 3 because merging both will
result in an unnecessary huge patch. Will that work?
>
>> ---
>> block/vmdk.c | 136
>> ++++++++++++++++++++++++++++++++++++++++++++++++-----------
>> 1 file changed, 111 insertions(+), 25 deletions(-)
>>
>> diff --git a/block/vmdk.c b/block/vmdk.c
>> index 9456ddd..c7675db 100644
>> --- a/block/vmdk.c
>> +++ b/block/vmdk.c
>> @@ -137,6 +137,8 @@ typedef struct VmdkMetaData {
>> int valid;
>> uint32_t *l2_cache_entry;
>> uint32_t nb_clusters;
>> + uint32_t offset;
>> + struct VmdkMetaData *next;
>> } VmdkMetaData;
>>
>> typedef struct VmdkGrainMarker {
>> @@ -263,6 +265,12 @@ static inline uint64_t size_to_clusters(VmdkExtent
>> *extent, uint64_t size)
>> return (DIV_ROUND_UP(size + round_off_size, BDRV_SECTOR_SIZE * 128) -
>> 1);
>> }
>>
>> +static inline int64_t vmdk_align_offset(int64_t offset, int n)
>> +{
>> + offset = (offset + n - 1) & ~(n - 1);
>> + return offset;
>> +}
>> +
>> static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
>> {
>> char *desc;
>> @@ -1037,29 +1045,88 @@ static void vmdk_refresh_limits(BlockDriverState
>> *bs, Error **errp)
>> }
>> }
>>
>> -static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
>> - uint32_t offset)
>> +static int vmdk_alloc_cluster_link_l2(VmdkExtent *extent,
>> + VmdkMetaData *m_data, bool zeroed)
>> {
>> - offset = cpu_to_le32(offset);
>> + int i;
>> + uint32_t offset, temp_offset;
>> + int *l2_table_array;
>> + int l2_array_size;
>> +
>> + if (zeroed) {
>> + temp_offset = VMDK_GTE_ZEROED;
>> + } else {
>> + temp_offset = m_data->offset;
>> + }
>> +
>> + temp_offset = cpu_to_le32(temp_offset);
>> +
>> + l2_array_size = sizeof(uint32_t) * m_data->nb_clusters;
>> + l2_table_array = qemu_try_blockalign(extent->file->bs,
>> + vmdk_align_offset(l2_array_size, 512));
>
> Indentation is off.
>
> Use QEMU_ALIGN_UP, instead of vmdk_align_offset.
>
> 512 is a magic number, use BDRV_SECTOR_SIZE.
Done
>
>> + if (l2_table_array == NULL) {
>> + return VMDK_ERROR;
>> + }
>> + memset(l2_table_array, 0, vmdk_align_offset(l2_array_size, 512));
>> +
>> /* update L2 table */
>> + offset = temp_offset;
>> + for (i = 0; i < m_data->nb_clusters; i++) {
>> + l2_table_array[i] = offset;
>> + if (!zeroed) {
>> + offset += 128;
>
> Something is going wrong here with endianness on BE host, I believe.
I have changed temp_offset to LE above, wouldn't that be enough. I am not sure.
>
>> + }
>> + }
>> +
>> if (bdrv_pwrite_sync(extent->file,
>> - ((int64_t)m_data->l2_offset * 512)
>> - + (m_data->l2_index * sizeof(offset)),
>> - &offset, sizeof(offset)) < 0) {
>> + ((int64_t)m_data->l2_offset * 512)
>> + + ((m_data->l2_index) * sizeof(offset)),
>> + l2_table_array, l2_array_size) < 0) {
>
> You can fix the indentation while changing these lines. If not, don't change
> it,
> or at least don't make it uglier.
I have aligned it, if it still looks ugly in v4, I will revert.
>
>> return VMDK_ERROR;
>> }
>> +
>> /* update backup L2 table */
>> if (extent->l1_backup_table_offset != 0) {
>> m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
>> if (bdrv_pwrite_sync(extent->file,
>> ((int64_t)m_data->l2_offset * 512)
>> - + (m_data->l2_index * sizeof(offset)),
>> - &offset, sizeof(offset)) < 0) {
>> + + ((m_data->l2_index) * sizeof(offset)),
>> + l2_table_array, l2_array_size) < 0) {
>
> Same here.
>
>> return VMDK_ERROR;
>> }
>> }
>> +
>> + offset = temp_offset;
>> if (m_data->l2_cache_entry) {
>> - *m_data->l2_cache_entry = offset;
>> + for (i = 0; i < m_data->nb_clusters; i++) {
>> + *m_data->l2_cache_entry = offset;
>> + m_data->l2_cache_entry++;
>> +
>> + if (!zeroed) {
>> + offset += 128;
>> + }
>> + }
>> + }
>> +
>> + qemu_vfree(l2_table_array);
>> + return VMDK_OK;
>> +}
>> +
>> +static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
>> + bool zeroed)
>> +{
>> + int ret;
>> +
>> + while (m_data->next != NULL) {
>> + VmdkMetaData *next;
>> +
>> + ret = vmdk_alloc_cluster_link_l2(extent, m_data, zeroed);
>> + if (ret < 0) {
>> + return ret;
>> + }
>> +
>> + next = m_data->next;
>> + m_data = next;
>
> Why not simply "m_data = m_data->next" and drop "next" variable?
>> }
>>
>> return VMDK_OK;
>> @@ -1271,7 +1338,7 @@ exit:
>> */
>> static int handle_alloc(BlockDriverState *bs, VmdkExtent *extent,
>> uint64_t offset, uint64_t *cluster_offset,
>> - int64_t *bytes, VmdkMetaData *m_data,
>> + int64_t *bytes, VmdkMetaData **m_data,
>> bool allocate, uint32_t *total_alloc_clusters)
>> {
>> int l1_index, l2_offset, l2_index;
>> @@ -1280,6 +1347,7 @@ static int handle_alloc(BlockDriverState *bs,
>> VmdkExtent *extent,
>> uint32_t nb_clusters;
>> bool zeroed = false;
>> uint64_t skip_start_bytes, skip_end_bytes;
>> + VmdkMetaData *old_m_data;
>> int ret;
>>
>> ret = get_cluster_table(extent, offset, &l1_index, &l2_offset,
>> @@ -1330,13 +1398,21 @@ static int handle_alloc(BlockDriverState *bs,
>> VmdkExtent *extent,
>> if (ret < 0) {
>> return ret;
>> }
>> - if (m_data) {
>> - m_data->valid = 1;
>> - m_data->l1_index = l1_index;
>> - m_data->l2_index = l2_index;
>> - m_data->l2_offset = l2_offset;
>> - m_data->l2_cache_entry = &l2_table[l2_index];
>> - m_data->nb_clusters = nb_clusters;
>> +
>> + if (*m_data) {
>> + old_m_data = *m_data;
>> + *m_data = g_malloc0(sizeof(**m_data));
>> +
>> + **m_data = (VmdkMetaData) {
>> + .valid = 1,
>> + .l1_index = l1_index,
>> + .l2_index = l2_index,
>> + .l2_offset = l2_offset,
>> + .l2_cache_entry = &l2_table[l2_index],
>> + .nb_clusters = nb_clusters,
>> + .offset = cluster_sector,
>> + .next = old_m_data,
>> + };
>
> I think if the new m_data can be merged into the old, there is no need to
> allocate a new one.
Do you mean that if the clusters lie in the same l2 table, then merge
them? I think this case only appears when I leave out the first and
last cluster for COW. If I misunderstood, sorry!
I think I will post v4 without attending this issue and we can discuss
this when you are available after the weekend.
>
>> }
>> }
>> *cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
>> @@ -1365,7 +1441,7 @@ static int handle_alloc(BlockDriverState *bs,
>> VmdkExtent *extent,
>> */
>> static int vmdk_alloc_cluster_offset(BlockDriverState *bs,
>> VmdkExtent *extent,
>> - VmdkMetaData *m_data, uint64_t offset,
>> + VmdkMetaData **m_data, uint64_t offset,
>> bool allocate, uint64_t
>> *cluster_offset,
>> int64_t bytes,
>> uint32_t *total_alloc_clusters)
>> @@ -1385,8 +1461,8 @@ static int vmdk_alloc_cluster_offset(BlockDriverState
>> *bs,
>> new_cluster_offset = 0;
>> *cluster_offset = 0;
>> n_bytes = 0;
>> - if (m_data) {
>> - m_data->valid = 0;
>> + if (*m_data) {
>> + (*m_data)->valid = 0;
>> }
>>
>> /* due to L2 table margins all bytes may not get allocated at once */
>> @@ -1768,9 +1844,11 @@ static int vmdk_pwritev(BlockDriverState *bs,
>> uint64_t offset,
>> uint64_t cluster_offset;
>> uint64_t bytes_done = 0;
>> uint64_t extent_size;
>> - VmdkMetaData m_data;
>> + VmdkMetaData *m_data;
>> uint32_t total_alloc_clusters = 0;
>>
>> + m_data = g_malloc0(sizeof(*m_data));
>> +
[scroll till here] [1] So this allocation will need to move....[2]
>> if (DIV_ROUND_UP(offset, BDRV_SECTOR_SIZE) > bs->total_sectors) {
>> error_report("Wrong offset: offset=0x%" PRIx64
>> " total_sectors=0x%" PRIx64,
>> @@ -1779,6 +1857,7 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t
>> offset,
>> }
>>
>> while (bytes > 0) {
....[2] here. Thus we will need to allocate it again every time we
enter here otherwise the very next line m_data->next=NULL will
segfault.
So maybe its good to free it separately?
I will retain it this way for v4 and change it otherwise if you still
say so after my reasoning in v5.
>> + m_data->next = NULL;
>> extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent);
>> if (!extent) {
>> return -EIO;
>> @@ -1825,7 +1904,7 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t
>> offset,
>> total_alloc_clusters;
>> if (!zero_dry_run) {
>> /* update L2 tables */
>> - if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED)
>> + if (vmdk_L2update(extent, m_data, zeroed)
>> != VMDK_OK) {
>> return -EIO;
>> }
>> @@ -1839,10 +1918,9 @@ static int vmdk_pwritev(BlockDriverState *bs,
>> uint64_t offset,
>> if (ret) {
>> return ret;
>> }
>> - if (m_data.valid) {
>> + if (m_data->valid) {
>> /* update L2 tables */
>> - if (vmdk_L2update(extent, &m_data,
>> - cluster_offset >> BDRV_SECTOR_BITS)
>> + if (vmdk_L2update(extent, m_data, zeroed)
>> != VMDK_OK) {
>> return -EIO;
>> }
>> @@ -1852,6 +1930,13 @@ static int vmdk_pwritev(BlockDriverState *bs,
>> uint64_t offset,
>> offset += n_bytes;
>> bytes_done += n_bytes;
>>
>> + while (m_data->next != NULL) {
>> + VmdkMetaData *next;
>> + next = m_data->next;
>> + g_free(m_data);
>> + m_data = next;
>> + }
>> +
>> /* update CID on the first write every time the virtual disk is
>> * opened */
>> if (!s->cid_updated) {
>> @@ -1862,6 +1947,7 @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t
>> offset,
>> s->cid_updated = true;
>> }
>> }
>> + g_free(m_data);
>
> This is weird, you free all but the last m_data with a while loop, a few lines
> above, and this one with a separate g_free().
>
> Please use one loop:
>
> for (p = m_data; p; p = next) {
> next = p->next;
> g_free(p);
> }
I have a good (maybe good enough) reason for it, if I free it in the
while loop above, then I will need to allocate it again when we enter
the superior while(bytes>0) loop, otherwise we will segfault for
everything from that point onwards....[scroll up __^] [1]
Ashijeet
[Qemu-block] [PATCH v3 6/6] vmdk: Update metadata for multiple clusters, Ashijeet Acharya, 2017/04/01