[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH for-4.2 1/4] Revert "qcow2: skip writing zero buffers to empt
From: |
Vladimir Sementsov-Ogievskiy |
Subject: |
Re: [PATCH for-4.2 1/4] Revert "qcow2: skip writing zero buffers to empty COW areas" |
Date: |
Fri, 1 Nov 2019 10:22:37 +0000 |
01.11.2019 13:00, Max Reitz wrote:
> This reverts commit c8bb23cbdbe32f5c326365e0a82e1b0e68cdcd8a.
>
> This commit causes fundamental performance problems on XFS (because
> fallocate() stalls the AIO pipeline), and as such it is not clear that
> we should unconditionally enable this behavior.
>
> We expect subclusters to alleviate the performance penalty of small
> writes to newly allocated clusters, so when we get them, the originally
> intended performance gain may actually no longer be significant.
>
> If we want to reintroduce something similar to c8bb23cbdbe, it will
> require extensive benchmarking on various systems with subclusters
> enabled.
>
> Cc: address@hidden
> Signed-off-by: Max Reitz <address@hidden>
It's sad, but OK:
Reviewed-by: Vladimir Sementsov-Ogievskiy <address@hidden>
> ---
> qapi/block-core.json | 4 +-
> block/qcow2.h | 6 ---
> block/qcow2-cluster.c | 2 +-
> block/qcow2.c | 86 --------------------------------------
> block/trace-events | 1 -
> tests/qemu-iotests/060 | 7 +---
> tests/qemu-iotests/060.out | 5 +--
> 7 files changed, 4 insertions(+), 107 deletions(-)
>
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index aa97ee2641..f053f15431 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -3304,8 +3304,6 @@
> #
> # @cor_write: a write due to copy-on-read (since 2.11)
> #
> -# @cluster_alloc_space: an allocation of file space for a cluster (since 4.1)
> -#
> # @none: triggers once at creation of the blkdebug node (since 4.1)
> #
> # Since: 2.9
> @@ -3326,7 +3324,7 @@
> 'pwritev_rmw_tail', 'pwritev_rmw_after_tail', 'pwritev',
> 'pwritev_zero', 'pwritev_done', 'empty_image_prepare',
> 'l1_shrink_write_table', 'l1_shrink_free_l2_clusters',
> - 'cor_write', 'cluster_alloc_space', 'none'] }
> + 'cor_write', 'none'] }
>
> ##
> # @BlkdebugIOType:
> diff --git a/block/qcow2.h b/block/qcow2.h
> index 601c2e4c82..8166f6e311 100644
> --- a/block/qcow2.h
> +++ b/block/qcow2.h
> @@ -418,12 +418,6 @@ typedef struct QCowL2Meta
> */
> Qcow2COWRegion cow_end;
>
> - /*
> - * Indicates that COW regions are already handled and do not require
> - * any more processing.
> - */
> - bool skip_cow;
> -
> /**
> * The I/O vector with the data from the actual guest write request.
> * If non-NULL, this is meant to be merged together with the data
> diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
> index 8982b7b762..fbfea8c817 100644
> --- a/block/qcow2-cluster.c
> +++ b/block/qcow2-cluster.c
> @@ -809,7 +809,7 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta
> *m)
> assert(start->nb_bytes + end->nb_bytes <= UINT_MAX - data_bytes);
> assert(start->offset + start->nb_bytes <= end->offset);
>
> - if ((start->nb_bytes == 0 && end->nb_bytes == 0) || m->skip_cow) {
> + if (start->nb_bytes == 0 && end->nb_bytes == 0) {
> return 0;
> }
>
> diff --git a/block/qcow2.c b/block/qcow2.c
> index 7c18721741..17555cb0a1 100644
> --- a/block/qcow2.c
> +++ b/block/qcow2.c
> @@ -2274,11 +2274,6 @@ static bool merge_cow(uint64_t offset, unsigned bytes,
> continue;
> }
>
> - /* If COW regions are handled already, skip this too */
> - if (m->skip_cow) {
> - continue;
> - }
> -
> /* The data (middle) region must be immediately after the
> * start region */
> if (l2meta_cow_start(m) + m->cow_start.nb_bytes != offset) {
> @@ -2305,81 +2300,6 @@ static bool merge_cow(uint64_t offset, unsigned bytes,
> return false;
> }
>
> -static bool is_unallocated(BlockDriverState *bs, int64_t offset, int64_t
> bytes)
> -{
> - int64_t nr;
> - return !bytes ||
> - (!bdrv_is_allocated_above(bs, NULL, false, offset, bytes, &nr) &&
> - nr == bytes);
> -}
> -
> -static bool is_zero_cow(BlockDriverState *bs, QCowL2Meta *m)
> -{
> - /*
> - * This check is designed for optimization shortcut so it must be
> - * efficient.
> - * Instead of is_zero(), use is_unallocated() as it is faster (but not
> - * as accurate and can result in false negatives).
> - */
> - return is_unallocated(bs, m->offset + m->cow_start.offset,
> - m->cow_start.nb_bytes) &&
> - is_unallocated(bs, m->offset + m->cow_end.offset,
> - m->cow_end.nb_bytes);
> -}
> -
> -static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
> -{
> - BDRVQcow2State *s = bs->opaque;
> - QCowL2Meta *m;
> -
> - if (!(s->data_file->bs->supported_zero_flags & BDRV_REQ_NO_FALLBACK)) {
> - return 0;
> - }
> -
> - if (bs->encrypted) {
> - return 0;
> - }
> -
> - for (m = l2meta; m != NULL; m = m->next) {
> - int ret;
> -
> - if (!m->cow_start.nb_bytes && !m->cow_end.nb_bytes) {
> - continue;
> - }
> -
> - if (!is_zero_cow(bs, m)) {
> - continue;
> - }
> -
> - /*
> - * instead of writing zero COW buffers,
> - * efficiently zero out the whole clusters
> - */
> -
> - ret = qcow2_pre_write_overlap_check(bs, 0, m->alloc_offset,
> - m->nb_clusters * s->cluster_size,
> - true);
> - if (ret < 0) {
> - return ret;
> - }
> -
> - BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_SPACE);
> - ret = bdrv_co_pwrite_zeroes(s->data_file, m->alloc_offset,
> - m->nb_clusters * s->cluster_size,
> - BDRV_REQ_NO_FALLBACK);
> - if (ret < 0) {
> - if (ret != -ENOTSUP && ret != -EAGAIN) {
> - return ret;
> - }
> - continue;
> - }
> -
> - trace_qcow2_skip_cow(qemu_coroutine_self(), m->offset,
> m->nb_clusters);
> - m->skip_cow = true;
> - }
> - return 0;
> -}
> -
> /*
> * qcow2_co_pwritev_task
> * Called with s->lock unlocked
> @@ -2421,12 +2341,6 @@ static coroutine_fn int
> qcow2_co_pwritev_task(BlockDriverState *bs,
> qiov_offset = 0;
> }
>
> - /* Try to efficiently initialize the physical space with zeroes */
> - ret = handle_alloc_space(bs, l2meta);
> - if (ret < 0) {
> - goto out_unlocked;
> - }
> -
> /*
> * If we need to do COW, check if it's possible to merge the
> * writing of the guest data together with that of the COW regions.
> diff --git a/block/trace-events b/block/trace-events
> index 6ba86decca..c615b26d71 100644
> --- a/block/trace-events
> +++ b/block/trace-events
> @@ -72,7 +72,6 @@ qcow2_writev_done_part(void *co, int cur_bytes) "co %p
> cur_bytes %d"
> qcow2_writev_data(void *co, uint64_t offset) "co %p offset 0x%" PRIx64
> qcow2_pwrite_zeroes_start_req(void *co, int64_t offset, int count) "co %p
> offset 0x%" PRIx64 " count %d"
> qcow2_pwrite_zeroes(void *co, int64_t offset, int count) "co %p offset 0x%"
> PRIx64 " count %d"
> -qcow2_skip_cow(void *co, uint64_t offset, int nb_clusters) "co %p offset
> 0x%" PRIx64 " nb_clusters %d"
>
> # qcow2-cluster.c
> qcow2_alloc_clusters_offset(void *co, uint64_t offset, int bytes) "co %p
> offset 0x%" PRIx64 " bytes %d"
> diff --git a/tests/qemu-iotests/060 b/tests/qemu-iotests/060
> index b91d8321bb..89e911400c 100755
> --- a/tests/qemu-iotests/060
> +++ b/tests/qemu-iotests/060
> @@ -150,15 +150,10 @@ $QEMU_IO -c "$OPEN_RO" -c "read -P 1 0 512" |
> _filter_qemu_io
> echo
> echo "=== Testing overlap while COW is in flight ==="
> echo
> -BACKING_IMG=$TEST_IMG.base
> -TEST_IMG=$BACKING_IMG _make_test_img 1G
> -
> -$QEMU_IO -c 'write 0k 64k' "$BACKING_IMG" | _filter_qemu_io
> -
> # compat=0.10 is required in order to make the following discard actually
> # unallocate the sector rather than make it a zero sector - we want COW,
> after
> # all.
> -IMGOPTS='compat=0.10' _make_test_img -b "$BACKING_IMG" 1G
> +IMGOPTS='compat=0.10' _make_test_img 1G
> # Write two clusters, the second one enforces creation of an L2 table after
> # the first data cluster.
> $QEMU_IO -c 'write 0k 64k' -c 'write 512M 64k' "$TEST_IMG" | _filter_qemu_io
> diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
> index 0f6b0658a1..e42bf8c5a9 100644
> --- a/tests/qemu-iotests/060.out
> +++ b/tests/qemu-iotests/060.out
> @@ -97,10 +97,7 @@ read 512/512 bytes at offset 0
>
> === Testing overlap while COW is in flight ===
>
> -Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=1073741824
> -wrote 65536/65536 bytes at offset 0
> -64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> -Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
> backing_file=TEST_DIR/t.IMGFMT.base
> +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
> wrote 65536/65536 bytes at offset 0
> 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> wrote 65536/65536 bytes at offset 536870912
>
--
Best regards,
Vladimir
[PATCH for-4.2 2/4] block: Make wait/mark serialising requests public, Max Reitz, 2019/11/01
[PATCH for-4.2 3/4] block: Add bdrv_co_get_self_request(), Max Reitz, 2019/11/01
[PATCH for-4.2 4/4] block/file-posix: Let post-EOF fallocate serialize, Max Reitz, 2019/11/01
Re: [PATCH for-4.2 0/4] qcow2: Fix data corruption on XFS, Max Reitz, 2019/11/01