[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH] vl.c/exit: pause cpus before closing block devi
From: |
Paolo Bonzini |
Subject: |
Re: [Qemu-devel] [PATCH] vl.c/exit: pause cpus before closing block devices |
Date: |
Tue, 8 Aug 2017 13:04:24 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.2.1 |
On 08/08/2017 12:02, Kevin Wolf wrote:
> Am 04.08.2017 um 13:46 hat Paolo Bonzini geschrieben:
>> On 04/08/2017 11:58, Stefan Hajnoczi wrote:
>>>> the root cause of this bug is related to this as well:
>>>> https://lists.gnu.org/archive/html/qemu-devel/2017-07/msg02945.html
>>>>
>>>> From commit 99723548 we started assuming (incorrectly?) that blk_
>>>> functions always WILL have an attached BDS, but this is not always true,
>>>> for instance, flushing the cache from an empty CDROM.
>>>>
>>>> Paolo, can we move the flight counter increment outside of the
>>>> block-backend layer, is that safe?
>>> I think the bdrv_inc_in_flight(blk_bs(blk)) needs to be fixed
>>> regardless of the throttling timer issue discussed below. BB cannot
>>> assume that the BDS graph is non-empty.
>>
>> Can we make bdrv_aio_* return NULL (even temporarily) if there is no
>> attached BDS? That would make it much easier to fix.
>
> Would the proper fix be much more complicated than the following? I must
> admit that I don't fully understand the current state of affairs with
> respect to threading, AioContext etc. so I may well be missing
> something.
Not much, but it's not complete either. The issues I see are that: 1)
blk_drain_all does not take the new counter into account; 2)
bdrv_drain_all callers need to be audited to see if they should be
blk_drain_all (or more likely, only device BlockBackends should be drained).
Paolo
> Note that my blk_drain() implementation doesn't necessarily drain
> blk_bs(blk) completely, but only those requests that came from the
> specific BlockBackend. I think this is what the callers want, but
> if otherwise, it shouldn't be hard to change.
Yes, this should be what they want.
Paolo
> Kevin
>
> diff --git a/block.c b/block.c
> index f1c3e98a4d..07decc3b5f 100644
> --- a/block.c
> +++ b/block.c
> @@ -4563,7 +4563,7 @@ out:
>
> AioContext *bdrv_get_aio_context(BlockDriverState *bs)
> {
> - return bs->aio_context;
> + return bs ? bs->aio_context : qemu_get_aio_context();
> }
>
> void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co)
> diff --git a/block/block-backend.c b/block/block-backend.c
> index 968438c149..649d11b4ef 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -68,6 +68,9 @@ struct BlockBackend {
> NotifierList remove_bs_notifiers, insert_bs_notifiers;
>
> int quiesce_counter;
> +
> + /* Number of in-flight requests. Accessed with atomic ops. */
> + unsigned int in_flight;
> };
>
> typedef struct BlockBackendAIOCB {
> @@ -1109,6 +1112,16 @@ int blk_make_zero(BlockBackend *blk, BdrvRequestFlags
> flags)
> return bdrv_make_zero(blk->root, flags);
> }
>
> +static void blk_inc_in_flight(BlockBackend *blk)
> +{
> + atomic_inc(&blk->in_flight);
> +}
> +
> +static void blk_dec_in_flight(BlockBackend *blk)
> +{
> + atomic_dec(&blk->in_flight);
> +}
> +
> static void error_callback_bh(void *opaque)
> {
> struct BlockBackendAIOCB *acb = opaque;
> @@ -1147,7 +1160,7 @@ static const AIOCBInfo blk_aio_em_aiocb_info = {
> static void blk_aio_complete(BlkAioEmAIOCB *acb)
> {
> if (acb->has_returned) {
> - bdrv_dec_in_flight(acb->common.bs);
> + blk_dec_in_flight(acb->rwco.blk);
> acb->common.cb(acb->common.opaque, acb->rwco.ret);
> qemu_aio_unref(acb);
> }
> @@ -1168,7 +1181,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk,
> int64_t offset, int bytes,
> BlkAioEmAIOCB *acb;
> Coroutine *co;
>
> - bdrv_inc_in_flight(blk_bs(blk));
> + blk_inc_in_flight(blk);
> acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
> acb->rwco = (BlkRwCo) {
> .blk = blk,
> @@ -1405,8 +1418,16 @@ int blk_flush(BlockBackend *blk)
>
> void blk_drain(BlockBackend *blk)
> {
> - if (blk_bs(blk)) {
> - bdrv_drain(blk_bs(blk));
> + AioContext *ctx = blk_get_aio_context(blk);
> +
> + while (atomic_read(&blk->in_flight)) {
> + aio_context_acquire(ctx);
> + aio_poll(ctx, false);
> + aio_context_release(ctx);
> +
> + if (blk_bs(blk)) {
> + bdrv_drain(blk_bs(blk));
> + }
> }
> }
>
> @@ -1453,10 +1474,11 @@ static void send_qmp_error_event(BlockBackend *blk,
> bool is_read, int error)
> {
> IoOperationType optype;
> + BlockDriverState *bs = blk_bs(blk);
>
> optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
> qapi_event_send_block_io_error(blk_name(blk),
> - bdrv_get_node_name(blk_bs(blk)), optype,
> + bs ? bdrv_get_node_name(bs) : "", optype,
> action, blk_iostatus_is_enabled(blk),
> error == ENOSPC, strerror(error),
> &error_abort);
> diff --git a/tests/ide-test.c b/tests/ide-test.c
> index bfd79ddbdc..ffbfb04271 100644
> --- a/tests/ide-test.c
> +++ b/tests/ide-test.c
> @@ -689,6 +689,24 @@ static void test_flush_nodev(void)
> ide_test_quit();
> }
>
> +static void test_flush_empty_drive(void)
> +{
> + QPCIDevice *dev;
> + QPCIBar bmdma_bar, ide_bar;
> +
> + ide_test_start("-device ide-cd,bus=ide.0");
> + dev = get_pci_device(&bmdma_bar, &ide_bar);
> +
> + /* FLUSH CACHE command on device 0*/
> + qpci_io_writeb(dev, ide_bar, reg_device, 0);
> + qpci_io_writeb(dev, ide_bar, reg_command, CMD_FLUSH_CACHE);
> +
> + /* Just testing that qemu doesn't crash... */
> +
> + free_pci_device(dev);
> + ide_test_quit();
> +}
> +
> static void test_pci_retry_flush(void)
> {
> test_retry_flush("pc");
> @@ -954,6 +972,7 @@ int main(int argc, char **argv)
>
> qtest_add_func("/ide/flush", test_flush);
> qtest_add_func("/ide/flush/nodev", test_flush_nodev);
> + qtest_add_func("/ide/flush/empty_drive", test_flush_empty_drive);
> qtest_add_func("/ide/flush/retry_pci", test_pci_retry_flush);
> qtest_add_func("/ide/flush/retry_isa", test_isa_retry_flush);
>
> diff --git a/tests/test-block-backend.c b/tests/test-block-backend.c
> new file mode 100644
> index 0000000000..5348781a42
> --- /dev/null
> +++ b/tests/test-block-backend.c
> @@ -0,0 +1,62 @@
> +/*
> + * BlockBackend tests
> + *
> + * Copyright (c) 2017 Kevin Wolf <address@hidden>
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> copy
> + * of this software and associated documentation files (the "Software"), to
> deal
> + * in the Software without restriction, including without limitation the
> rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "block/block.h"
> +#include "sysemu/block-backend.h"
> +#include "qapi/error.h"
> +
> +static void test_drain_aio_error_flush_cb(void *opaque, int ret)
> +{
> + bool *completed = opaque;
> +
> + g_assert(ret == -ENOMEDIUM);
> + *completed = true;
> +}
> +
> +static void test_drain_aio_error(void)
> +{
> + BlockBackend *blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
> + BlockAIOCB *acb;
> + bool completed = false;
> +
> + acb = blk_aio_flush(blk, test_drain_aio_error_flush_cb, &completed);
> + g_assert(acb != NULL);
> + g_assert(completed == false);
> +
> + blk_drain(blk);
> + g_assert(completed == true);
> +}
> +
> +int main(int argc, char **argv)
> +{
> + bdrv_init();
> + qemu_init_main_loop(&error_abort);
> +
> + g_test_init(&argc, &argv, NULL);
> +
> + g_test_add_func("/block-backend/drain_aio_error", test_drain_aio_error);
> +
> + return g_test_run();
> +}
> diff --git a/tests/Makefile.include b/tests/Makefile.include
> index 59e536bf0b..7beb65397b 100644
> --- a/tests/Makefile.include
> +++ b/tests/Makefile.include
> @@ -56,6 +56,7 @@ check-unit-y += tests/test-hbitmap$(EXESUF)
> gcov-files-test-hbitmap-y = blockjob.c
> check-unit-y += tests/test-blockjob$(EXESUF)
> check-unit-y += tests/test-blockjob-txn$(EXESUF)
> +check-unit-y += tests/test-block-backend$(EXESUF)
> check-unit-y += tests/test-x86-cpuid$(EXESUF)
> # all code tested by test-x86-cpuid is inside topology.h
> gcov-files-test-x86-cpuid-y =
> @@ -556,6 +557,7 @@ tests/test-aio-multithread$(EXESUF):
> tests/test-aio-multithread.o $(test-block-o
> tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y)
> tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y)
> $(test-util-obj-y)
> tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o
> $(test-block-obj-y) $(test-util-obj-y)
> +tests/test-block-backend$(EXESUF): tests/test-block-backend.o
> $(test-block-obj-y) $(test-util-obj-y)
> tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y)
> tests/test-iov$(EXESUF): tests/test-iov.o $(test-util-obj-y)
> tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o $(test-util-obj-y)
> $(test-crypto-obj-y)
>