qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v3 1/2] qcow2: Add qcow2_shrink_l1_and_l2_table


From: Jun Li
Subject: Re: [Qemu-devel] [PATCH v3 1/2] qcow2: Add qcow2_shrink_l1_and_l2_table for qcow2 shrinking
Date: Wed, 15 Oct 2014 01:58:36 +0800
User-agent: Mutt/1.5.23 (2014-03-12)

Please ignore this patch, I have submit v4. Thx.

On Mon, 10/13 13:04, Jun Li wrote:
> This patch is the realization of new function qcow2_shrink_l1_and_l2_table.
> This function will shrink/discard l1 and l2 table when do qcow2 shrinking.
> 
> Signed-off-by: Jun Li <address@hidden>
> ---
> Compared to v2, v3 fixed host cluster leak.
> ---
>  block/qcow2-cluster.c | 173 
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  block/qcow2.c         |  40 ++++++++++--
>  block/qcow2.h         |   2 +
>  3 files changed, 211 insertions(+), 4 deletions(-)
> 
> diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
> index f7dd8c0..2ac3536 100644
> --- a/block/qcow2-cluster.c
> +++ b/block/qcow2-cluster.c
> @@ -29,6 +29,9 @@
>  #include "block/qcow2.h"
>  #include "trace.h"
>  
> +static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
> +                   uint64_t **l2_table);
> +
>  int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
>                          bool exact_size)
>  {
> @@ -135,6 +138,176 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t 
> min_size,
>      return ret;
>  }
>  
> +int qcow2_shrink_l1_and_l2_table(BlockDriverState *bs, uint64_t new_l1_size,
> +                                 int new_l2_index, bool exact_size)
> +{
> +    BDRVQcowState *s = bs->opaque;
> +    int new_l1_size2, ret, i;
> +    uint64_t *new_l1_table;
> +    int64_t new_l1_table_offset;
> +    int64_t old_l1_table_offset, old_l1_size;
> +    uint8_t data[12];
> +
> +    new_l1_size2 = sizeof(uint64_t) * new_l1_size;
> +    new_l1_table = qemu_try_blockalign(bs->file,
> +                                       align_offset(new_l1_size2, 512));
> +    if (new_l1_table == NULL) {
> +        return -ENOMEM;
> +    }
> +    memset(new_l1_table, 0, align_offset(new_l1_size2, 512));
> +
> +    /* shrinking l1 table */
> +    memcpy(new_l1_table, s->l1_table, new_l1_size2);
> +
> +    /* write new table (align to cluster) */
> +    BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE);
> +    new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2);
> +    if (new_l1_table_offset < 0) {
> +        qemu_vfree(new_l1_table);
> +        return new_l1_table_offset;
> +    }
> +
> +    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    /* the L1 position has not yet been updated, so these clusters must
> +     * indeed be completely free */
> +    ret = qcow2_pre_write_overlap_check(bs, 0, new_l1_table_offset,
> +                                        new_l1_size2);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
> +
> +    for (i = 0; i < new_l1_size; i++) {
> +        new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
> +    }
> +
> +    ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset,
> +                           new_l1_table, new_l1_size2);
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    for (i = 0; i < new_l1_size; i++) {
> +        new_l1_table[i] = be64_to_cpu(new_l1_table[i]);
> +    }
> +
> +    /* set new table */
> +    BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
> +    cpu_to_be32w((uint32_t *)data, new_l1_size);
> +    stq_be_p(data + 4, new_l1_table_offset);
> +    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size),
> +                           data, sizeof(data));
> +    if (ret < 0) {
> +        goto fail;
> +    }
> +
> +    old_l1_table_offset = s->l1_table_offset;
> +    s->l1_table_offset = new_l1_table_offset;
> +    uint64_t *old_l1_table = s->l1_table;
> +    s->l1_table = new_l1_table;
> +    old_l1_size = s->l1_size;
> +    s->l1_size = new_l1_size;
> +
> +    int num = old_l1_size - s->l1_size;
> +
> +    while (num >= 0) {
> +        uint64_t l2_offset;
> +        int ret;
> +        uint64_t *l2_table, l2_entry;
> +        int last_free_cluster = 0;
> +
> +        l2_offset = old_l1_table[s->l1_size + num - 1] & L1E_OFFSET_MASK;
> +        if (l2_offset == 0) {
> +            goto retry;
> +        }
> +
> +        if (num == 0) {
> +            if (new_l2_index == 0) {
> +                goto retry;
> +            }
> +            last_free_cluster = new_l2_index + 1;
> +        }
> +
> +        /* load l2_table into cache */
> +        ret = l2_load(bs, l2_offset, &l2_table);
> +
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +
> +        for (i = s->l2_size; i > last_free_cluster; i--) {
> +            l2_entry = be64_to_cpu(l2_table[i - 1]);
> +
> +            switch (qcow2_get_cluster_type(l2_entry)) {
> +            case QCOW2_CLUSTER_UNALLOCATED:
> +                if (!bs->backing_hd) {
> +                    continue;
> +                }
> +                break;
> +
> +            case QCOW2_CLUSTER_ZERO:
> +                continue;
> +
> +            case QCOW2_CLUSTER_NORMAL:
> +            case QCOW2_CLUSTER_COMPRESSED:
> +                break;
> +
> +            default:
> +                abort();
> +            }
> +
> +            qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
> +            if (s->qcow_version >= 3) {
> +                l2_table[i - 1] = cpu_to_be64(QCOW_OFLAG_ZERO);
> +            } else {
> +                l2_table[i - 1] = cpu_to_be64(0);
> +            }
> +
> +            /* before discard the specify item of l2_entry,
> +             * set this entry with 0.
> +             */
> +            uint64_t data64 = cpu_to_be64(0);
> +            ret = bdrv_pwrite_sync(bs->file,
> +                                   l2_offset + (i-1) * sizeof(uint64_t),
> +                                   &data64, sizeof(data64));
> +            if (ret < 0) {
> +                goto fail;
> +            }
> +
> +            /* Then decrease the refcount */
> +            qcow2_free_any_clusters(bs, l2_entry, 1, QCOW2_DISCARD_MAX);
> +        }
> +
> +        ret = qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
> +        if (ret < 0) {
> +            goto fail;
> +        }
> +        if (last_free_cluster == 0) {
> +            qemu_vfree(l2_table);
> +            qcow2_free_clusters(bs, l2_offset, s->cluster_size - 1,
> +                                QCOW2_DISCARD_OTHER);
> +        }
> +retry:
> +        num--;
> +    }
> +
> +    qemu_vfree(old_l1_table);
> +
> +    qcow2_free_clusters(bs, old_l1_table_offset, old_l1_size * 
> sizeof(uint64_t),
> +                        QCOW2_DISCARD_OTHER);
> +    return 0;
> + fail:
> +    qemu_vfree(new_l1_table);
> +    qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2,
> +                        QCOW2_DISCARD_OTHER);
> +    return ret;
> +}
> +
>  /*
>   * l2_load
>   *
> diff --git a/block/qcow2.c b/block/qcow2.c
> index fb28493..4dfc15c 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -2097,7 +2097,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t 
> offset)
>      int64_t new_l1_size;
>      int ret;
>  
> -    if (offset & 511) {
> +    if (offset <= 0 || offset & 511) {
>          error_report("The new size must be a multiple of 512");
>          return -EINVAL;
>      }
> @@ -2108,10 +2108,42 @@ static int qcow2_truncate(BlockDriverState *bs, 
> int64_t offset)
>          return -ENOTSUP;
>      }
>  
> -    /* shrinking is currently not supported */
> +    /* shrinking image */
>      if (offset < bs->total_sectors * 512) {
> -        error_report("qcow2 doesn't support shrinking images yet");
> -        return -ENOTSUP;
> +
> +        /* As l1 table, l2 table, refcount table, refcount block table
> +         * and file header of the qcow2 image need to use some clusters,
> +         * so offset need to be reduced.
> +         */
> +        int64_t nb_l2 = offset / (s->l2_size << s->cluster_bits);
> +        int64_t nb_l1 = (s->l1_size >> 3) >> s->cluster_bits;
> +        int64_t nb_refcount_block_table = offset / ((s->cluster_size >>
> +                                          REFCOUNT_SHIFT) << 
> s->cluster_bits);
> +        int64_t nb_refcount_table = (nb_refcount_block_table << 3) >>
> +                                     s->cluster_bits;
> +        int64_t tatol_nb = nb_l2 + nb_l1 + nb_refcount_block_table +
> +                           nb_refcount_table + 1 + 1000;
> +        int64_t offset_for_shrink = offset - (tatol_nb << s->cluster_bits);
> +        int new_l2_index = offset_to_l2_index(s, offset_for_shrink);
> +
> +        new_l1_size = size_to_l1(s, offset_for_shrink);
> +        ret = qcow2_shrink_l1_and_l2_table(bs, new_l1_size, new_l2_index, 
> true);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +
> +        int64_t actual_size = bdrv_get_allocated_file_size(bs);
> +
> +        if (offset < actual_size) {
> +            int fd = qemu_open(bs->filename, O_WRONLY,
> +                               O_APPEND);
> +            if (fd >= 0) {
> +                if (ftruncate(fd, offset) < 0) {
> +                    return -errno;
> +                }
> +                qemu_close(fd);
> +            }
> +        }
>      }
>  
>      new_l1_size = size_to_l1(s, offset);
> diff --git a/block/qcow2.h b/block/qcow2.h
> index 7d61e61..a9c8acb 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -516,6 +516,8 @@ int qcow2_pre_write_overlap_check(BlockDriverState *bs, 
> int ign, int64_t offset,
>  /* qcow2-cluster.c functions */
>  int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
>                          bool exact_size);
> +int qcow2_shrink_l1_and_l2_table(BlockDriverState *bs, uint64_t new_l1_size,
> +                                 int new_l2_index, bool exact_size);
>  int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
>  void qcow2_l2_cache_reset(BlockDriverState *bs);
>  int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
> -- 
> 1.9.3
> 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]