qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH] Block: don't do copy-on-read in before_write_no


From: Kevin Wolf
Subject: Re: [Qemu-devel] [PATCH] Block: don't do copy-on-read in before_write_notifier
Date: Thu, 3 Sep 2015 17:44:36 +0200
User-agent: Mutt/1.5.21 (2010-09-15)

Am 02.09.2015 um 16:18 hat Stefan Hajnoczi geschrieben:
> On Wed, Aug 19, 2015 at 10:54:44AM +0800, Wen Congyang wrote:
> > We will copy data in before_write_notifier to do backup.
> > It is a nested I/O request, so we cannot do copy-on-read.
> > 
> > Signed-off-by: Wen Congyang <address@hidden>
> > ---
> >  block/backup.c        | 19 +++++++++++++------
> >  block/io.c            | 11 ++++++++++-
> >  include/block/block.h |  3 +++
> >  trace-events          |  1 +
> >  4 files changed, 27 insertions(+), 7 deletions(-)
> > 
> > diff --git a/block/backup.c b/block/backup.c
> > index 965654d..b729c4b 100644
> > --- a/block/backup.c
> > +++ b/block/backup.c
> > @@ -89,7 +89,8 @@ static void cow_request_end(CowRequest *req)
> >  
> >  static int coroutine_fn backup_do_cow(BlockDriverState *bs,
> >                                        int64_t sector_num, int nb_sectors,
> > -                                      bool *error_is_read)
> > +                                      bool *error_is_read,
> > +                                      bool is_write_notifier)
> >  {
> >      BackupBlockJob *job = (BackupBlockJob *)bs->job;
> >      CowRequest cow_request;
> > @@ -129,8 +130,13 @@ static int coroutine_fn backup_do_cow(BlockDriverState 
> > *bs,
> >          iov.iov_len = n * BDRV_SECTOR_SIZE;
> >          qemu_iovec_init_external(&bounce_qiov, &iov, 1);
> >  
> > -        ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n,
> > -                            &bounce_qiov);
> > +        if (is_write_notifier) {
> > +            ret = bdrv_co_nested_readv(bs, start * 
> > BACKUP_SECTORS_PER_CLUSTER,
> > +                                       n, &bounce_qiov);
> > +        } else {
> > +            ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n,
> > +                                &bounce_qiov);
> > +        }
> >          if (ret < 0) {
> >              trace_backup_do_cow_read_fail(job, start, ret);
> >              if (error_is_read) {
> > @@ -190,7 +196,7 @@ static int coroutine_fn backup_before_write_notify(
> >      assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
> >      assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
> >  
> > -    return backup_do_cow(req->bs, sector_num, nb_sectors, NULL);
> > +    return backup_do_cow(req->bs, sector_num, nb_sectors, NULL, true);
> >  }
> >  
> >  static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
> > @@ -303,7 +309,8 @@ static int coroutine_fn 
> > backup_run_incremental(BackupBlockJob *job)
> >                      return ret;
> >                  }
> >                  ret = backup_do_cow(bs, cluster * 
> > BACKUP_SECTORS_PER_CLUSTER,
> > -                                    BACKUP_SECTORS_PER_CLUSTER, 
> > &error_is_read);
> > +                                    BACKUP_SECTORS_PER_CLUSTER, 
> > &error_is_read,
> > +                                    false);
> >                  if ((ret < 0) &&
> >                      backup_error_action(job, error_is_read, -ret) ==
> >                      BLOCK_ERROR_ACTION_REPORT) {
> > @@ -408,7 +415,7 @@ static void coroutine_fn backup_run(void *opaque)
> >              }
> >              /* FULL sync mode we copy the whole drive. */
> >              ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER,
> > -                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
> > +                    BACKUP_SECTORS_PER_CLUSTER, &error_is_read, false);
> >              if (ret < 0) {
> >                  /* Depending on error action, fail now or retry cluster */
> >                  BlockErrorAction action =
> > diff --git a/block/io.c b/block/io.c
> > index d4bc83b..04325f9 100644
> > --- a/block/io.c
> > +++ b/block/io.c
> > @@ -932,7 +932,8 @@ static int coroutine_fn 
> > bdrv_co_do_preadv(BlockDriverState *bs,
> >          return ret;
> >      }
> >  
> > -    if (bs->copy_on_read) {
> > +    /* Don't do copy-on-read if we read data before write operation */
> > +    if (bs->copy_on_read && !(flags & BDRV_REQ_NESTED)) {
> >          flags |= BDRV_REQ_COPY_ON_READ;
> >      }
> >  
> > @@ -1001,6 +1002,14 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, 
> > int64_t sector_num,
> >      return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
> >  }
> >  
> > +int coroutine_fn bdrv_co_nested_readv(BlockDriverState *bs,
> > +    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
> > +{
> > +    trace_bdrv_co_nested_readv(bs, sector_num, nb_sectors);
> > +
> > +    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 
> > BDRV_REQ_NESTED);
> > +}
> > +
> >  int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
> >      int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
> >  {
> > diff --git a/include/block/block.h b/include/block/block.h
> > index 608cd4e..f5578b2 100644
> > --- a/include/block/block.h
> > +++ b/include/block/block.h
> > @@ -60,6 +60,7 @@ typedef enum {
> >       * opened with BDRV_O_UNMAP.
> >       */
> >      BDRV_REQ_MAY_UNMAP    = 0x4,
> > +    BDRV_REQ_NESTED       = 0x8,
> >  } BdrvRequestFlags;
> >  
> >  typedef struct BlockSizes {
> > @@ -253,6 +254,8 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, 
> > int64_t sector_num,
> >      int nb_sectors, QEMUIOVector *qiov);
> >  int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
> >      int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
> > +int coroutine_fn bdrv_co_nested_readv(BlockDriverState *bs,
> > +    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
> >  int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
> >      int nb_sectors, QEMUIOVector *qiov);
> >  /*
> > diff --git a/trace-events b/trace-events
> > index 8f9614a..e29e1cf 100644
> > --- a/trace-events
> > +++ b/trace-events
> > @@ -69,6 +69,7 @@ bdrv_aio_write_zeroes(void *bs, int64_t sector_num, int 
> > nb_sectors, int flags, v
> >  bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d"
> >  bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p 
> > sector_num %"PRId64" nb_sectors %d"
> >  bdrv_co_copy_on_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p 
> > sector_num %"PRId64" nb_sectors %d"
> > +bdrv_co_nested_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p 
> > sector_num %"PRId64" nb_sectors %d"
> >  bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p 
> > sector_num %"PRId64" nb_sectors %d"
> >  bdrv_co_write_zeroes(void *bs, int64_t sector_num, int nb_sector, int 
> > flags) "bs %p sector_num %"PRId64" nb_sectors %d flags %#x"
> >  bdrv_co_io_em(void *bs, int64_t sector_num, int nb_sectors, int is_write, 
> > void *acb) "bs %p sector_num %"PRId64" nb_sectors %d is_write %d acb %p"
> 
> This solution looks good to me.
> 
> I think the BDRV_REQ_NESTED and bdrv_co_nested_readv() name is too
> vague, especially since there is no documentation about what "nested"
> means here.  I'm afraid the flag will be (ab)used for other stuff in the
> future and we'll end up with confusing/broken semantics.
> 
> Please call it BDRV_REQ_NO_COPY_ON_READ so it's clear what this flag
> does.

It also makes clear that it's an ugly hack. :-)

The scenario that is fixed here is one of the reasons why I disliked
these notifiers from the beginning. I still hope that we can move to
filter BDSes at some point and get rid of the hacks that were required
in the generic block layer to make things work without filters.

Kevin



reply via email to

[Prev in Thread] Current Thread [Next in Thread]