[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v3 04/11] block: Reschedule query-block during qcow2 invalidation
From: |
Fabiano Rosas |
Subject: |
[PATCH v3 04/11] block: Reschedule query-block during qcow2 invalidation |
Date: |
Tue, 9 Apr 2024 11:59:10 -0300 |
There is a small window at the end of block device migration when
devices are being re-activated. This includes a resetting of some
fields of BDRVQcow2State at qcow2_co_invalidate_cache(). A concurrent
QMP query-block command can call qcow2_get_specific_info() during this
window and see the cleared values, which leads to an assert:
qcow2_get_specific_info: Assertion `false' failed
This is the same issue as Gitlab #1933, which has already been
resolved[1], but there the fix applied only to non-coroutine
commands. Once we move query-block to a coroutine the problem will
manifest again.
Add an operation blocker to the invalidation function to block the
query info path during this window.
Instead of failing query-block, which would be disruptive to users,
use the blocker to know when to reschedule the coroutine back into the
iohandler so it doesn't run while the BDRVQcow2State is inconsistent.
To avoid failing query-block when all block operations are blocked,
unblock the INFO operation at various places. This preserves the prior
situations where query-block used to work.
1 - https://gitlab.com/qemu-project/qemu/-/issues/1933
Signed-off-by: Fabiano Rosas <farosas@suse.de>
---
block.c | 1 +
block/mirror.c | 1 +
block/qcow2.c | 20 ++++++++++++++++++++
block/replication.c | 1 +
blockjob.c | 1 +
include/block/block-common.h | 1 +
migration/block.c | 1 +
7 files changed, 26 insertions(+)
diff --git a/block.c b/block.c
index 468cf5e67d..01478c5471 100644
--- a/block.c
+++ b/block.c
@@ -1296,6 +1296,7 @@ static void GRAPH_WRLOCK bdrv_backing_attach(BdrvChild *c)
parent->backing_blocker);
bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_BACKUP_TARGET,
parent->backing_blocker);
+ bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_INFO, parent->backing_blocker);
}
static void bdrv_backing_detach(BdrvChild *c)
diff --git a/block/mirror.c b/block/mirror.c
index 1bdce3b657..9f952f3fdd 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -1191,6 +1191,7 @@ static void mirror_complete(Job *job, Error **errp)
error_setg(&s->replace_blocker,
"block device is in use by block-job-complete");
bdrv_op_block_all(s->to_replace, s->replace_blocker);
+ bdrv_op_unblock(s->to_replace, BLOCK_OP_TYPE_INFO, s->replace_blocker);
bdrv_ref(s->to_replace);
}
diff --git a/block/qcow2.c b/block/qcow2.c
index 956128b409..b0ec8009e3 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2834,6 +2834,7 @@ qcow2_co_invalidate_cache(BlockDriverState *bs, Error
**errp)
BdrvChild *data_file;
int flags = s->flags;
QCryptoBlock *crypto = NULL;
+ Error *blocker = NULL;
QDict *options;
int ret;
@@ -2845,6 +2846,17 @@ qcow2_co_invalidate_cache(BlockDriverState *bs, Error
**errp)
crypto = s->crypto;
s->crypto = NULL;
+ /*
+ * When qcow2_do_open() below reads the qcow header, it yields to
+ * wait for the I/O which allows a concurrent QMP query-block
+ * command to be dispatched on the same context before
+ * BDRVQcow2State has been completely repopulated. Block the
+ * query-info operation during this window to avoid having
+ * qcow2_get_specific_info() access bogus values.
+ */
+ error_setg(&blocker, "invalidating cached metadata");
+ bdrv_op_block(bs, BLOCK_OP_TYPE_INFO, blocker);
+
/*
* Do not reopen s->data_file (i.e., have qcow2_do_close() not close it,
* and then prevent qcow2_do_open() from opening it), because this function
@@ -2864,6 +2876,8 @@ qcow2_co_invalidate_cache(BlockDriverState *bs, Error
**errp)
qemu_co_mutex_lock(&s->lock);
ret = qcow2_do_open(bs, options, flags, false, errp);
qemu_co_mutex_unlock(&s->lock);
+ bdrv_op_unblock(bs, BLOCK_OP_TYPE_INFO, blocker);
+ g_free(blocker);
qobject_unref(options);
if (ret < 0) {
error_prepend(errp, "Could not reopen qcow2 layer: ");
@@ -5240,6 +5254,12 @@ qcow2_get_specific_info(BlockDriverState *bs, Error
**errp)
ImageInfoSpecific *spec_info;
QCryptoBlockInfo *encrypt_info = NULL;
+ if (qemu_in_coroutine() &&
+ bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INFO, errp)) {
+ errp = NULL;
+ aio_co_reschedule_self(iohandler_get_aio_context());
+ }
+
if (s->crypto != NULL) {
encrypt_info = qcrypto_block_get_info(s->crypto, errp);
if (!encrypt_info) {
diff --git a/block/replication.c b/block/replication.c
index ca6bd0a720..459d644673 100644
--- a/block/replication.c
+++ b/block/replication.c
@@ -577,6 +577,7 @@ static void replication_start(ReplicationState *rs,
ReplicationMode mode,
}
bdrv_op_block_all(top_bs, s->blocker);
bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_DATAPLANE, s->blocker);
+ bdrv_op_unblock(top_bs, BLOCK_OP_TYPE_INFO, s->blocker);
bdrv_graph_wrunlock();
diff --git a/blockjob.c b/blockjob.c
index d5f29e14af..f0df345982 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -244,6 +244,7 @@ int block_job_add_bdrv(BlockJob *job, const char *name,
BlockDriverState *bs,
job->nodes = g_slist_prepend(job->nodes, c);
bdrv_op_block_all(bs, job->blocker);
+ bdrv_op_unblock(bs, BLOCK_OP_TYPE_INFO, job->blocker);
return 0;
}
diff --git a/include/block/block-common.h b/include/block/block-common.h
index a846023a09..4458617179 100644
--- a/include/block/block-common.h
+++ b/include/block/block-common.h
@@ -364,6 +364,7 @@ typedef enum BlockOpType {
BLOCK_OP_TYPE_RESIZE,
BLOCK_OP_TYPE_STREAM,
BLOCK_OP_TYPE_REPLACE,
+ BLOCK_OP_TYPE_INFO,
BLOCK_OP_TYPE_MAX,
} BlockOpType;
diff --git a/migration/block.c b/migration/block.c
index 2b9054889a..3f3bc2748f 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -451,6 +451,7 @@ static int init_blk_migration(QEMUFile *f)
alloc_aio_bitmap(bmds);
error_setg(&bmds->blocker, "block device is in use by migration");
bdrv_op_block_all(bs, bmds->blocker);
+ bdrv_op_unblock(bs, BLOCK_OP_TYPE_INFO, bmds->blocker);
}
}
--
2.35.3
- [PATCH v3 00/11] block: Convert qmp_query_block into a coroutine, Fabiano Rosas, 2024/04/09
- [PATCH v3 01/11] block: Allow the wrapper script to see functions declared in qapi.h, Fabiano Rosas, 2024/04/09
- [PATCH v3 02/11] block: Temporarily mark bdrv_co_get_allocated_file_size as mixed, Fabiano Rosas, 2024/04/09
- [PATCH v3 03/11] block: Take the graph lock in bdrv_snapshot_list, Fabiano Rosas, 2024/04/09
- [PATCH v3 04/11] block: Reschedule query-block during qcow2 invalidation,
Fabiano Rosas <=
- [PATCH v3 05/11] block: Run bdrv_do_query_node_info in a coroutine, Fabiano Rosas, 2024/04/09
- [PATCH v3 06/11] block: Convert bdrv_query_block_graph_info to coroutine, Fabiano Rosas, 2024/04/09
- [PATCH v3 07/11] block: Convert bdrv_query_image_info to coroutine, Fabiano Rosas, 2024/04/09
- [PATCH v3 08/11] block: Convert bdrv_block_device_info into co_wrapper, Fabiano Rosas, 2024/04/09
- [PATCH v3 09/11] block: Don't query all block devices at hmp_nbd_server_start, Fabiano Rosas, 2024/04/09
- [PATCH v3 10/11] block: Convert qmp_query_block and qmp_query_named_block_nodes to coroutine, Fabiano Rosas, 2024/04/09
- [PATCH v3 11/11] block: Add a thread-pool version of fstat, Fabiano Rosas, 2024/04/09