qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v2 3/4] qcow2: add shrink image support


From: Pavel Butsykin
Subject: Re: [Qemu-devel] [PATCH v2 3/4] qcow2: add shrink image support
Date: Thu, 22 Jun 2017 16:57:07 +0300
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.1.1


On 22.06.2017 01:55, Max Reitz wrote:
On 2017-06-13 14:16, Pavel Butsykin wrote:
This patch add shrinking of the image file for qcow2. As a result, this allows
us to reduce the virtual image size and free up space on the disk without
copying the image. Image can be fragmented and shrink is done by punching holes
in the image file.

Signed-off-by: Pavel Butsykin <address@hidden>
---
  block/qcow2-cluster.c  | 42 ++++++++++++++++++++++++++++++++
  block/qcow2-refcount.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++
  block/qcow2.c          | 40 +++++++++++++++++++++++--------
  block/qcow2.h          |  2 ++
  qapi/block-core.json   |  3 ++-
  5 files changed, 141 insertions(+), 11 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index d779ea19cf..a84b7e607e 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -32,6 +32,48 @@
  #include "qemu/bswap.h"
  #include "trace.h"
+int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t max_size)

It's not really a max_size but always an exact size. You don't want it
to be any smaller than this.

+{
+    BDRVQcow2State *s = bs->opaque;
+    int new_l1_size, i, ret;
+
+    if (max_size >= s->l1_size) {
+        return 0;
+    }
+
+    new_l1_size = max_size;
+
+#ifdef DEBUG_ALLOC2
+    fprintf(stderr, "shrink l1_table from %d to %" PRId64 "\n",
+            s->l1_size, new_l1_size);

new_l1_size is of type int, not int64_t.

+#endif
+
+    BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_WRITE_TABLE);
+    ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset +
+                                       sizeof(uint64_t) * new_l1_size,
+                             (s->l1_size - new_l1_size) * sizeof(uint64_t), 0);
+    if (ret < 0) {
+        return ret;
+    }
+
+    ret = bdrv_flush(bs->file->bs);
+    if (ret < 0) {
+        return ret;
+    }
+
+    BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_FREE_L2_CLUSTERS);
+    for (i = s->l1_size - 1; i > new_l1_size - 1; i--) {
+        if ((s->l1_table[i] & L1E_OFFSET_MASK) == 0) {
+            continue;
+        }
+        qcow2_free_clusters(bs, s->l1_table[i] & L1E_OFFSET_MASK,
+                            s->l2_size * sizeof(uint64_t),

I'm more of a fan of s->cluster_size instead of s->l2_size *
sizeof(uint64_t) but it's not like it matters...

+                            QCOW2_DISCARD_ALWAYS);
+        s->l1_table[i] = 0;

I'd probably clear the overhanging s->l1_table entries before
bdrv_flush() (before you shouldn't really use them after
bdrv_pwrite_zeroes() has returned, even if bdrv_flush() has failed), but
it's not absolutely necessary. As long as they still have a refcount of
at least one, writing to them will just be useless but not destroy any data.


You're right, but If it's not necessary, I would prefer to leave as is..
Just because overhanging s->l1_table entries used to release clusters :)

+    }
+    return 0;
+}
+
  int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
                          bool exact_size)
  {
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 576ab551d6..e98306acd8 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -29,6 +29,7 @@
  #include "block/qcow2.h"
  #include "qemu/range.h"
  #include "qemu/bswap.h"
+#include "qemu/cutils.h"
static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size);
  static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
@@ -2936,3 +2937,67 @@ done:
      qemu_vfree(new_refblock);
      return ret;
  }
+
+int qcow2_shrink_reftable(BlockDriverState *bs)
+{
+    BDRVQcow2State *s = bs->opaque;
+    uint64_t *reftable_tmp =
+        g_try_malloc(sizeof(uint64_t) * s->refcount_table_size);
+    int i, ret;
+
+    if (s->refcount_table_size && reftable_tmp == NULL) {
+        return -ENOMEM;
+    }
+
+    for (i = 0; i < s->refcount_table_size; i++) {
+        int64_t refblock_offs = s->refcount_table[i] & REFT_OFFSET_MASK;
+        void *refblock;
+        bool unused_block;
+
+        if (refblock_offs == 0) {
+            reftable_tmp[i] = 0;
+            continue;
+        }
+        ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offs,
+                              &refblock);
+        if (ret < 0) {
+            goto out;
+        }
+
+        /* the refblock has own reference */
+        if (i == refblock_offs >> (s->refcount_block_bits + s->cluster_bits)) {
+            uint64_t blk_index = (refblock_offs >> s->cluster_bits) &
+                                 (s->refcount_block_size - 1);
+            uint64_t refcount = s->get_refcount(refblock, blk_index);
+
+            s->set_refcount(refblock, blk_index, 0);
+
+            unused_block = buffer_is_zero(refblock, s->refcount_block_size);

s/refcount_block_size/cluster_size/

+
+            s->set_refcount(refblock, blk_index, refcount);
+        } else {
+            unused_block = buffer_is_zero(refblock, s->refcount_block_size);

Same here.

+        }
+        qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
+
+        reftable_tmp[i] = unused_block ? 0 : cpu_to_be64(s->refcount_table[i]);
+    }
+
+    ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset, reftable_tmp,
+                           sizeof(uint64_t) * s->refcount_table_size);
+    if (ret < 0) {
+        goto out;
+    }
+
+    for (i = 0; i < s->refcount_table_size; i++) {
+        if (s->refcount_table[i] && !reftable_tmp[i]) {
+            qcow2_free_clusters(bs, s->refcount_table[i] & REFT_OFFSET_MASK,
+                                s->cluster_size, QCOW2_DISCARD_ALWAYS);

This doesn't feel like a very good idea. The bdrv_pwrite_sync() before
has brought the on-disk refcount structures into a different state than
what we have cached.

It is for this inside qcow2_free_clusters()->update_refcount() the cache
is discarded by qcow2_cache_discard().

OTOH, the bdrv_pwrite_sync() has accessed only the reftable and this
should only access refblocks. So I cannot think of any way this might
actually do something bad. But I guess it'll be better for to revisit
this when it's not in the middle of the night (so on Friday).

+            s->refcount_table[i] = 0;
+        }
+    }
+
+out:
+    g_free(reftable_tmp);
+    return ret;
+}
diff --git a/block/qcow2.c b/block/qcow2.c
index b3ba5daa93..0ad46d2776 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2545,6 +2545,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t 
offset, Error **errp)
  {
      BDRVQcow2State *s = bs->opaque;
      int64_t new_l1_size;
+    uint64_t total_size;
      int ret;
if (offset & 511) {
@@ -2558,17 +2559,36 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t 
offset, Error **errp)
          return -ENOTSUP;
      }
- /* shrinking is currently not supported */
-    if (offset < bs->total_sectors * 512) {
-        error_setg(errp, "qcow2 doesn't support shrinking images yet");
-        return -ENOTSUP;
-    }
-
      new_l1_size = size_to_l1(s, offset);
-    ret = qcow2_grow_l1_table(bs, new_l1_size, true);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Failed to grow the L1 table");
-        return ret;
+    total_size = bs->total_sectors << BDRV_SECTOR_BITS;
+
+    if (offset < total_size) {
+        ret = qcow2_cluster_discard(bs, ROUND_UP(offset, s->cluster_size),
+                                    total_size - ROUND_UP(offset,
+                                                          s->cluster_size),
+                                    QCOW2_DISCARD_ALWAYS, true);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Failed to discard reduced clasters");

s/clasters/clusters/

And maybe "truncated", "stripped", or "cropped" instead of "reduced"?

+            return ret;
+        }
+
+        ret = qcow2_shrink_l1_table(bs, new_l1_size);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Failed to reduce the L1 table");

s/reduce/shrink/ (or "truncate"; or "reduce the L1 table size")

Also, to be fair, you're actually reducing the number of L2 tables, not
the size of the L1 table. (But that's a nit pick)

In the previous patch version, there really was reducing the L1 table
size :) I think now it's better to fix the error message.

+            return ret;
+        }
+
+        ret = qcow2_shrink_reftable(bs);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Failed to shrink the refcount 
table");

And this is not really shrinking the reftable but instead discarding
some refblocks (potentially). (This is a nit pick, too)

Max

+            return ret;
+        }
+    } else {
+        ret = qcow2_grow_l1_table(bs, new_l1_size, true);
+        if (ret < 0) {
+            error_setg_errno(errp, -ret, "Failed to grow the L1 table");
+            return ret;
+        }
      }
/* write updated header.size */
diff --git a/block/qcow2.h b/block/qcow2.h
index 07faa6dc78..600463bf8e 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -531,10 +531,12 @@ int qcow2_pre_write_overlap_check(BlockDriverState *bs, 
int ign, int64_t offset,
  int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
                                  BlockDriverAmendStatusCB *status_cb,
                                  void *cb_opaque, Error **errp);
+int qcow2_shrink_reftable(BlockDriverState *bs);
/* qcow2-cluster.c functions */
  int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
                          bool exact_size);
+int qcow2_shrink_l1_table(BlockDriverState *bs, uint64_t max_size);
  int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
  int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
  int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
diff --git a/qapi/block-core.json b/qapi/block-core.json
index f85c2235c7..bcbffa3339 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2372,7 +2372,8 @@
              'cluster_alloc_bytes', 'cluster_free', 'flush_to_os',
              'flush_to_disk', 'pwritev_rmw_head', 'pwritev_rmw_after_head',
              'pwritev_rmw_tail', 'pwritev_rmw_after_tail', 'pwritev',
-            'pwritev_zero', 'pwritev_done', 'empty_image_prepare' ] }
+            'pwritev_zero', 'pwritev_done', 'empty_image_prepare',
+            'l1_shrink_write_table', 'l1_shrink_free_l2_clusters' ] }
##
  # @BlkdebugInjectErrorOptions:






reply via email to

[Prev in Thread] Current Thread [Next in Thread]