qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH] vl.c/exit: pause cpus before closing block devi


From: Paolo Bonzini
Subject: Re: [Qemu-devel] [PATCH] vl.c/exit: pause cpus before closing block devices
Date: Tue, 8 Aug 2017 13:04:24 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Thunderbird/52.2.1

On 08/08/2017 12:02, Kevin Wolf wrote:
> Am 04.08.2017 um 13:46 hat Paolo Bonzini geschrieben:
>> On 04/08/2017 11:58, Stefan Hajnoczi wrote:
>>>> the root cause of this bug is related to this as well:
>>>> https://lists.gnu.org/archive/html/qemu-devel/2017-07/msg02945.html
>>>>
>>>> From commit 99723548 we started assuming (incorrectly?) that blk_
>>>> functions always WILL have an attached BDS, but this is not always true,
>>>> for instance, flushing the cache from an empty CDROM.
>>>>
>>>> Paolo, can we move the flight counter increment outside of the
>>>> block-backend layer, is that safe?
>>> I think the bdrv_inc_in_flight(blk_bs(blk)) needs to be fixed
>>> regardless of the throttling timer issue discussed below.  BB cannot
>>> assume that the BDS graph is non-empty.
>>
>> Can we make bdrv_aio_* return NULL (even temporarily) if there is no
>> attached BDS?  That would make it much easier to fix.
> 
> Would the proper fix be much more complicated than the following? I must
> admit that I don't fully understand the current state of affairs with
> respect to threading, AioContext etc. so I may well be missing
> something.

Not much, but it's not complete either.  The issues I see are that: 1)
blk_drain_all does not take the new counter into account; 2)
bdrv_drain_all callers need to be audited to see if they should be
blk_drain_all (or more likely, only device BlockBackends should be drained).

Paolo

> Note that my blk_drain() implementation doesn't necessarily drain
> blk_bs(blk) completely, but only those requests that came from the
> specific BlockBackend. I think this is what the callers want, but
> if otherwise, it shouldn't be hard to change.

Yes, this should be what they want.

Paolo

> Kevin
> 
> diff --git a/block.c b/block.c
> index f1c3e98a4d..07decc3b5f 100644
> --- a/block.c
> +++ b/block.c
> @@ -4563,7 +4563,7 @@ out:
>  
>  AioContext *bdrv_get_aio_context(BlockDriverState *bs)
>  {
> -    return bs->aio_context;
> +    return bs ? bs->aio_context : qemu_get_aio_context();
>  }
>  
>  void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co)
> diff --git a/block/block-backend.c b/block/block-backend.c
> index 968438c149..649d11b4ef 100644
> --- a/block/block-backend.c
> +++ b/block/block-backend.c
> @@ -68,6 +68,9 @@ struct BlockBackend {
>      NotifierList remove_bs_notifiers, insert_bs_notifiers;
>  
>      int quiesce_counter;
> +
> +    /* Number of in-flight requests. Accessed with atomic ops. */
> +    unsigned int in_flight;
>  };
>  
>  typedef struct BlockBackendAIOCB {
> @@ -1109,6 +1112,16 @@ int blk_make_zero(BlockBackend *blk, BdrvRequestFlags 
> flags)
>      return bdrv_make_zero(blk->root, flags);
>  }
>  
> +static void blk_inc_in_flight(BlockBackend *blk)
> +{
> +    atomic_inc(&blk->in_flight);
> +}
> +
> +static void blk_dec_in_flight(BlockBackend *blk)
> +{
> +    atomic_dec(&blk->in_flight);
> +}
> +
>  static void error_callback_bh(void *opaque)
>  {
>      struct BlockBackendAIOCB *acb = opaque;
> @@ -1147,7 +1160,7 @@ static const AIOCBInfo blk_aio_em_aiocb_info = {
>  static void blk_aio_complete(BlkAioEmAIOCB *acb)
>  {
>      if (acb->has_returned) {
> -        bdrv_dec_in_flight(acb->common.bs);
> +        blk_dec_in_flight(acb->rwco.blk);
>          acb->common.cb(acb->common.opaque, acb->rwco.ret);
>          qemu_aio_unref(acb);
>      }
> @@ -1168,7 +1181,7 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, 
> int64_t offset, int bytes,
>      BlkAioEmAIOCB *acb;
>      Coroutine *co;
>  
> -    bdrv_inc_in_flight(blk_bs(blk));
> +    blk_inc_in_flight(blk);
>      acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
>      acb->rwco = (BlkRwCo) {
>          .blk    = blk,
> @@ -1405,8 +1418,16 @@ int blk_flush(BlockBackend *blk)
>  
>  void blk_drain(BlockBackend *blk)
>  {
> -    if (blk_bs(blk)) {
> -        bdrv_drain(blk_bs(blk));
> +    AioContext *ctx = blk_get_aio_context(blk);
> +
> +    while (atomic_read(&blk->in_flight)) {
> +        aio_context_acquire(ctx);
> +        aio_poll(ctx, false);
> +        aio_context_release(ctx);
> +
> +        if (blk_bs(blk)) {
> +            bdrv_drain(blk_bs(blk));
> +        }
>      }
>  }
>  
> @@ -1453,10 +1474,11 @@ static void send_qmp_error_event(BlockBackend *blk,
>                                   bool is_read, int error)
>  {
>      IoOperationType optype;
> +    BlockDriverState *bs = blk_bs(blk);
>  
>      optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
>      qapi_event_send_block_io_error(blk_name(blk),
> -                                   bdrv_get_node_name(blk_bs(blk)), optype,
> +                                   bs ? bdrv_get_node_name(bs) : "", optype,
>                                     action, blk_iostatus_is_enabled(blk),
>                                     error == ENOSPC, strerror(error),
>                                     &error_abort);
> diff --git a/tests/ide-test.c b/tests/ide-test.c
> index bfd79ddbdc..ffbfb04271 100644
> --- a/tests/ide-test.c
> +++ b/tests/ide-test.c
> @@ -689,6 +689,24 @@ static void test_flush_nodev(void)
>      ide_test_quit();
>  }
>  
> +static void test_flush_empty_drive(void)
> +{
> +    QPCIDevice *dev;
> +    QPCIBar bmdma_bar, ide_bar;
> +
> +    ide_test_start("-device ide-cd,bus=ide.0");
> +    dev = get_pci_device(&bmdma_bar, &ide_bar);
> +
> +    /* FLUSH CACHE command on device 0*/
> +    qpci_io_writeb(dev, ide_bar, reg_device, 0);
> +    qpci_io_writeb(dev, ide_bar, reg_command, CMD_FLUSH_CACHE);
> +
> +    /* Just testing that qemu doesn't crash... */
> +
> +    free_pci_device(dev);
> +    ide_test_quit();
> +}
> +
>  static void test_pci_retry_flush(void)
>  {
>      test_retry_flush("pc");
> @@ -954,6 +972,7 @@ int main(int argc, char **argv)
>  
>      qtest_add_func("/ide/flush", test_flush);
>      qtest_add_func("/ide/flush/nodev", test_flush_nodev);
> +    qtest_add_func("/ide/flush/empty_drive", test_flush_empty_drive);
>      qtest_add_func("/ide/flush/retry_pci", test_pci_retry_flush);
>      qtest_add_func("/ide/flush/retry_isa", test_isa_retry_flush);
>  
> diff --git a/tests/test-block-backend.c b/tests/test-block-backend.c
> new file mode 100644
> index 0000000000..5348781a42
> --- /dev/null
> +++ b/tests/test-block-backend.c
> @@ -0,0 +1,62 @@
> +/*
> + * BlockBackend tests
> + *
> + * Copyright (c) 2017 Kevin Wolf <address@hidden>
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this software and associated documentation files (the "Software"), to 
> deal
> + * in the Software without restriction, including without limitation the 
> rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
> FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "block/block.h"
> +#include "sysemu/block-backend.h"
> +#include "qapi/error.h"
> +
> +static void test_drain_aio_error_flush_cb(void *opaque, int ret)
> +{
> +    bool *completed = opaque;
> +
> +    g_assert(ret == -ENOMEDIUM);
> +    *completed = true;
> +}
> +
> +static void test_drain_aio_error(void)
> +{
> +    BlockBackend *blk = blk_new(BLK_PERM_ALL, BLK_PERM_ALL);
> +    BlockAIOCB *acb;
> +    bool completed = false;
> +
> +    acb = blk_aio_flush(blk, test_drain_aio_error_flush_cb, &completed);
> +    g_assert(acb != NULL);
> +    g_assert(completed == false);
> +
> +    blk_drain(blk);
> +    g_assert(completed == true);
> +}
> +
> +int main(int argc, char **argv)
> +{
> +    bdrv_init();
> +    qemu_init_main_loop(&error_abort);
> +
> +    g_test_init(&argc, &argv, NULL);
> +
> +    g_test_add_func("/block-backend/drain_aio_error", test_drain_aio_error);
> +
> +    return g_test_run();
> +}
> diff --git a/tests/Makefile.include b/tests/Makefile.include
> index 59e536bf0b..7beb65397b 100644
> --- a/tests/Makefile.include
> +++ b/tests/Makefile.include
> @@ -56,6 +56,7 @@ check-unit-y += tests/test-hbitmap$(EXESUF)
>  gcov-files-test-hbitmap-y = blockjob.c
>  check-unit-y += tests/test-blockjob$(EXESUF)
>  check-unit-y += tests/test-blockjob-txn$(EXESUF)
> +check-unit-y += tests/test-block-backend$(EXESUF)
>  check-unit-y += tests/test-x86-cpuid$(EXESUF)
>  # all code tested by test-x86-cpuid is inside topology.h
>  gcov-files-test-x86-cpuid-y =
> @@ -556,6 +557,7 @@ tests/test-aio-multithread$(EXESUF): 
> tests/test-aio-multithread.o $(test-block-o
>  tests/test-throttle$(EXESUF): tests/test-throttle.o $(test-block-obj-y)
>  tests/test-blockjob$(EXESUF): tests/test-blockjob.o $(test-block-obj-y) 
> $(test-util-obj-y)
>  tests/test-blockjob-txn$(EXESUF): tests/test-blockjob-txn.o 
> $(test-block-obj-y) $(test-util-obj-y)
> +tests/test-block-backend$(EXESUF): tests/test-block-backend.o 
> $(test-block-obj-y) $(test-util-obj-y)
>  tests/test-thread-pool$(EXESUF): tests/test-thread-pool.o $(test-block-obj-y)
>  tests/test-iov$(EXESUF): tests/test-iov.o $(test-util-obj-y)
>  tests/test-hbitmap$(EXESUF): tests/test-hbitmap.o $(test-util-obj-y) 
> $(test-crypto-obj-y)
> 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]