Re: [Qemu-block] [PATCH v2 3/3] xen-disk: use an IOThread per instance

qemu-block

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-block] [PATCH v2 3/3] xen-disk: use an IOThread per instance

From:	Stefano Stabellini
Subject:	Re: [Qemu-block] [PATCH v2 3/3] xen-disk: use an IOThread per instance
Date:	Thu, 22 Jun 2017 15:14:44 -0700 (PDT)
User-agent:	Alpine 2.10 (DEB 1266 2009-07-14)

CC'ing Andreas Färber. Could you please give a quick look below at the
way the iothread object is instantiate and destroyed? I am no object
model expert and would appreaciate a second opinion.


On Wed, 21 Jun 2017, Paul Durrant wrote:
> This patch allocates an IOThread object for each xen_disk instance and
> sets the AIO context appropriately on connect. This allows processing
> of I/O to proceed in parallel.
> 
> The patch also adds tracepoints into xen_disk to make it possible to
> follow the state transtions of an instance in the log.
> 
> Signed-off-by: Paul Durrant <address@hidden>
> ---
> Cc: Stefano Stabellini <address@hidden>
> Cc: Anthony Perard <address@hidden>
> Cc: Kevin Wolf <address@hidden>
> Cc: Max Reitz <address@hidden>
> 
> v2:
>  - explicitly acquire and release AIO context in qemu_aio_complete() and
>    blk_bh()
> ---
>  hw/block/trace-events |  7 ++++++
>  hw/block/xen_disk.c   | 69 
> ++++++++++++++++++++++++++++++++++++++++++++-------
>  2 files changed, 67 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/block/trace-events b/hw/block/trace-events
> index 65e83dc258..608b24ba66 100644
> --- a/hw/block/trace-events
> +++ b/hw/block/trace-events
> @@ -10,3 +10,10 @@ virtio_blk_submit_multireq(void *mrb, int start, int 
> num_reqs, uint64_t offset,
>  # hw/block/hd-geometry.c
>  hd_geometry_lchs_guess(void *blk, int cyls, int heads, int secs) "blk %p 
> LCHS %d %d %d"
>  hd_geometry_guess(void *blk, uint32_t cyls, uint32_t heads, uint32_t secs, 
> int trans) "blk %p CHS %u %u %u trans %d"
> +
> +# hw/block/xen_disk.c
> +xen_disk_alloc(char *name) "%s"
> +xen_disk_init(char *name) "%s"
> +xen_disk_connect(char *name) "%s"
> +xen_disk_disconnect(char *name) "%s"
> +xen_disk_free(char *name) "%s"
> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
> index 0e6513708e..8548195195 100644
> --- a/hw/block/xen_disk.c
> +++ b/hw/block/xen_disk.c
> @@ -27,10 +27,13 @@
>  #include "hw/xen/xen_backend.h"
>  #include "xen_blkif.h"
>  #include "sysemu/blockdev.h"
> +#include "sysemu/iothread.h"
>  #include "sysemu/block-backend.h"
>  #include "qapi/error.h"
>  #include "qapi/qmp/qdict.h"
>  #include "qapi/qmp/qstring.h"
> +#include "qom/object_interfaces.h"
> +#include "trace.h"
>  
>  /* ------------------------------------------------------------- */
>  
> @@ -128,6 +131,9 @@ struct XenBlkDev {
>      DriveInfo           *dinfo;
>      BlockBackend        *blk;
>      QEMUBH              *bh;
> +
> +    IOThread            *iothread;
> +    AioContext          *ctx;
>  };
>  
>  /* ------------------------------------------------------------- */
> @@ -599,9 +605,12 @@ static int ioreq_runio_qemu_aio(struct ioreq *ioreq);
>  static void qemu_aio_complete(void *opaque, int ret)
>  {
>      struct ioreq *ioreq = opaque;
> +    struct XenBlkDev *blkdev = ioreq->blkdev;
> +
> +    aio_context_acquire(blkdev->ctx);

I think that Paolo was right that we need a aio_context_acquire here,
however the issue is that with the current code:

  blk_handle_requests -> ioreq_runio_qemu_aio -> qemu_aio_complete

leading to aio_context_acquire being called twice on the same lock,
which I don't think is allowed?

I think we need to get rid of the qemu_aio_complete call from
ioreq_runio_qemu_aio, but to do that we need to be careful with the
accounting of aio_inflight (today it's incremented unconditionally at
the beginning of ioreq_runio_qemu_aio, I think we would have to change
that to increment it only if presync).


>      if (ret != 0) {
> -        xen_pv_printf(&ioreq->blkdev->xendev, 0, "%s I/O error\n",
> +        xen_pv_printf(&blkdev->xendev, 0, "%s I/O error\n",
>                        ioreq->req.operation == BLKIF_OP_READ ? "read" : 
> "write");
>          ioreq->aio_errors++;
>      }
> @@ -610,13 +619,13 @@ static void qemu_aio_complete(void *opaque, int ret)
>      if (ioreq->presync) {
>          ioreq->presync = 0;
>          ioreq_runio_qemu_aio(ioreq);
> -        return;
> +        goto done;
>      }
>      if (ioreq->aio_inflight > 0) {
> -        return;
> +        goto done;
>      }
>  
> -    if (ioreq->blkdev->feature_grant_copy) {
> +    if (blkdev->feature_grant_copy) {
>          switch (ioreq->req.operation) {
>          case BLKIF_OP_READ:
>              /* in case of failure ioreq->aio_errors is increased */
> @@ -638,7 +647,7 @@ static void qemu_aio_complete(void *opaque, int ret)
>      }
>  
>      ioreq->status = ioreq->aio_errors ? BLKIF_RSP_ERROR : BLKIF_RSP_OKAY;
> -    if (!ioreq->blkdev->feature_grant_copy) {
> +    if (!blkdev->feature_grant_copy) {
>          ioreq_unmap(ioreq);
>      }
>      ioreq_finish(ioreq);
> @@ -650,16 +659,19 @@ static void qemu_aio_complete(void *opaque, int ret)
>          }
>      case BLKIF_OP_READ:
>          if (ioreq->status == BLKIF_RSP_OKAY) {
> -            block_acct_done(blk_get_stats(ioreq->blkdev->blk), &ioreq->acct);
> +            block_acct_done(blk_get_stats(blkdev->blk), &ioreq->acct);
>          } else {
> -            block_acct_failed(blk_get_stats(ioreq->blkdev->blk), 
> &ioreq->acct);
> +            block_acct_failed(blk_get_stats(blkdev->blk), &ioreq->acct);
>          }
>          break;
>      case BLKIF_OP_DISCARD:
>      default:
>          break;
>      }
> -    qemu_bh_schedule(ioreq->blkdev->bh);
> +    qemu_bh_schedule(blkdev->bh);
> +
> +done:
> +    aio_context_release(blkdev->ctx);
>  }
>  
>  static bool blk_split_discard(struct ioreq *ioreq, blkif_sector_t 
> sector_number,
> @@ -917,17 +929,40 @@ static void blk_handle_requests(struct XenBlkDev 
> *blkdev)
>  static void blk_bh(void *opaque)
>  {
>      struct XenBlkDev *blkdev = opaque;
> +
> +    aio_context_acquire(blkdev->ctx);
>      blk_handle_requests(blkdev);
> +    aio_context_release(blkdev->ctx);
>  }
>  
>  static void blk_alloc(struct XenDevice *xendev)
>  {
>      struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, 
> xendev);
> +    Object *obj;
> +    char *name;
> +    Error *err = NULL;
> +
> +    trace_xen_disk_alloc(xendev->name);
>  
>      QLIST_INIT(&blkdev->inflight);
>      QLIST_INIT(&blkdev->finished);
>      QLIST_INIT(&blkdev->freelist);
> -    blkdev->bh = qemu_bh_new(blk_bh, blkdev);
> +
> +    obj = object_new(TYPE_IOTHREAD);
> +    name = g_strdup_printf("iothread-%s", xendev->name);
> +
> +    object_property_add_child(object_get_objects_root(), name, obj, &err);
> +    assert(!err);

Would it be enough to call object_ref?


> +    g_free(name);
> +
> +    user_creatable_complete(obj, &err);

Why do we need to call this?


> +    assert(!err);
> +
> +    blkdev->iothread = (IOThread *)object_dynamic_cast(obj, TYPE_IOTHREAD);
> +    blkdev->ctx = iothread_get_aio_context(blkdev->iothread);
> +    blkdev->bh = aio_bh_new(blkdev->ctx, blk_bh, blkdev);
> +
>      if (xen_mode != XEN_EMULATE) {
>          batch_maps = 1;
>      }
> @@ -1288,6 +1327,8 @@ static int blk_connect(struct XenDevice *xendev)
>          blkdev->persistent_gnt_count = 0;
>      }
>  
> +    blk_set_aio_context(blkdev->blk, blkdev->ctx);
> +
>      xen_be_bind_evtchn(&blkdev->xendev);
>  
>      xen_pv_printf(&blkdev->xendev, 1, "ok: proto %s, nr-ring-ref %u, "
> @@ -1301,13 +1342,20 @@ static void blk_disconnect(struct XenDevice *xendev)
>  {
>      struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, 
> xendev);
>  
> +    trace_xen_disk_disconnect(xendev->name);
> +
> +    aio_context_acquire(blkdev->ctx);
> +
>      if (blkdev->blk) {
> +        blk_set_aio_context(blkdev->blk, qemu_get_aio_context());
>          blk_detach_dev(blkdev->blk, blkdev);
>          blk_unref(blkdev->blk);
>          blkdev->blk = NULL;
>      }
>      xen_pv_unbind_evtchn(&blkdev->xendev);
>  
> +    aio_context_release(blkdev->ctx);
> +
>      if (blkdev->sring) {
>          xengnttab_unmap(blkdev->xendev.gnttabdev, blkdev->sring,
>                          blkdev->nr_ring_ref);
> @@ -1358,6 +1408,7 @@ static int blk_free(struct XenDevice *xendev)
>      g_free(blkdev->dev);
>      g_free(blkdev->devtype);
>      qemu_bh_delete(blkdev->bh);
> +    object_unparent(OBJECT(blkdev->iothread));

Shouldn't this be object_unref?


>      return 0;
>  }

[Prev in Thread]

Current Thread

[Next in Thread]

[Qemu-block] [PATCH v2 0/3] xen-disk: performance improvements, Paul Durrant, 2017/06/21
- [Qemu-block] [PATCH v2 1/3] xen-disk: only advertize feature-persistent if grant copy is not available, Paul Durrant, 2017/06/21
  - Re: [Qemu-block] [PATCH v2 1/3] xen-disk: only advertize feature-persistent if grant copy is not available, Stefano Stabellini, 2017/06/21
- [Qemu-block] [PATCH v2 2/3] xen-disk: add support for multi-page shared rings, Paul Durrant, 2017/06/21
  - Re: [Qemu-block] [PATCH v2 2/3] xen-disk: add support for multi-page shared rings, Stefano Stabellini, 2017/06/21
- [Qemu-block] [PATCH v2 3/3] xen-disk: use an IOThread per instance, Paul Durrant, 2017/06/21
  - Re: [Qemu-block] [PATCH v2 3/3] xen-disk: use an IOThread per instance, Stefano Stabellini <=
- Re: [Qemu-block] [Xen-devel] [PATCH v2 0/3] xen-disk: performance improvements, Stefano Stabellini, 2017/06/27
  - Re: [Qemu-block] [Xen-devel] [PATCH v2 0/3] xen-disk: performance improvements, Paul Durrant, 2017/06/28

Prev by Date: Re: [Qemu-block] [PATCH v2 00/31] qed: Convert to coroutines
Next by Date: Re: [Qemu-block] [Qemu-devel] [PATCH v3] live-block-ops.txt: Rename, rewrite, and improve it
Previous by thread: [Qemu-block] [PATCH v2 3/3] xen-disk: use an IOThread per instance
Next by thread: Re: [Qemu-block] [Xen-devel] [PATCH v2 0/3] xen-disk: performance improvements
Index(es):
- Date
- Thread