qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_z


From: Peter Lieven
Subject: Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes
Date: Wed, 07 May 2014 02:19:15 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Thunderbird/24.4.0

please ignore this one, I accidently used an old commit message

Am 07.05.2014 02:01, schrieb Peter Lieven:
> this patch tries to optimize zero write requests
> by automatically using bdrv_write_zeroes if it is
> supported by the format.
>
> this should significantly speed up file system initialization and
> should speed zero write test used to test backend storage performance.
>
> the difference can simply be tested by e.g.
>
>    dd if=/dev/zero of=/dev/vdX bs=1M
>
> or
>
>    mkfs.ext4 -E lazy_itable_init=0,lazy_journal_init=0 /dev/vdX
>
> Signed-off-by: Peter Lieven <address@hidden>
> ---
> v2->v3: - moved parameter parsing to blockdev_init
>         - added per device detect_zeroes status to 
>           hmp (info block -v) and qmp (query-block) [Eric]
>         - added support to enable detect-zeroes also
>           for hot added devices [Eric]
>         - added missing entry to qemu_common_drive_opts
>         - fixed description of qemu_iovec_is_zero [Fam]
>
> v1->v2: - added tests to commit message (Markus)
> RFCv2->v1: - fixed paramter parsing strncmp -> strcmp (Eric)
>            - fixed typo (choosen->chosen) (Eric)
>            - added second example to commit msg
>
> RFCv1->RFCv2: - add detect-zeroes=off|on|unmap knob to drive cmdline parameter
>               - call zero detection only for format (bs->file != NULL)
>
>  block.c                   |   11 ++++++++++
>  block/qapi.c              |   11 ++++++++++
>  blockdev.c                |   28 +++++++++++++++++++++++++
>  hmp.c                     |    6 ++++++
>  include/block/block_int.h |   12 +++++++++++
>  include/qemu-common.h     |    1 +
>  qapi-schema.json          |   50 
> +++++++++++++++++++++++++++++++--------------
>  qemu-options.hx           |    6 ++++++
>  qmp-commands.hx           |    3 +++
>  util/iov.c                |   21 +++++++++++++++++++
>  10 files changed, 134 insertions(+), 15 deletions(-)
>
> diff --git a/block.c b/block.c
> index b749d31..f27b35d 100644
> --- a/block.c
> +++ b/block.c
> @@ -3244,6 +3244,17 @@ static int coroutine_fn 
> bdrv_aligned_pwritev(BlockDriverState *bs,
>  
>      ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
>  
> +    if (!ret && bs->detect_zeroes != BDRV_DETECT_ZEROES_OFF &&
> +        !(flags & BDRV_REQ_ZERO_WRITE) && bs->file &&
> +        drv->bdrv_co_write_zeroes && qemu_iovec_is_zero(qiov)) {
> +        flags |= BDRV_REQ_ZERO_WRITE;
> +        /* if the device was not opened with discard=on the below flag
> +         * is immediately cleared again in bdrv_co_do_write_zeroes */
> +        if (bs->detect_zeroes == BDRV_DETECT_ZEROES_UNMAP) {
> +            flags |= BDRV_REQ_MAY_UNMAP;
> +        }
> +    }
> +
>      if (ret < 0) {
>          /* Do nothing, write notifier decided to fail this request */
>      } else if (flags & BDRV_REQ_ZERO_WRITE) {
> diff --git a/block/qapi.c b/block/qapi.c
> index af11445..fbf66c2 100644
> --- a/block/qapi.c
> +++ b/block/qapi.c
> @@ -51,6 +51,17 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState 
> *bs)
>  
>      info->backing_file_depth = bdrv_get_backing_file_depth(bs);
>  
> +    switch (bs->detect_zeroes) {
> +    case BDRV_DETECT_ZEROES_ON:
> +        info->detect_zeroes = g_strdup("on");
> +        break;
> +    case BDRV_DETECT_ZEROES_UNMAP:
> +        info->detect_zeroes = g_strdup("unmap");
> +        break;
> +    default:
> +        info->detect_zeroes = g_strdup("off");
> +    }
> +
>      if (bs->io_limits_enabled) {
>          ThrottleConfig cfg;
>          throttle_get_config(&bs->throttle_state, &cfg);
> diff --git a/blockdev.c b/blockdev.c
> index 7810e9f..96c11fd 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -288,6 +288,21 @@ static int parse_block_error_action(const char *buf, 
> bool is_read, Error **errp)
>      }
>  }
>  
> +static BdrvDetectZeroes parse_detect_zeroes(const char *buf, Error **errp)
> +{
> +    if (!buf || !strcmp(buf, "off")) {
> +        return BDRV_DETECT_ZEROES_OFF;
> +    } else if (!strcmp(buf, "on")) {
> +        return BDRV_DETECT_ZEROES_ON;
> +    } else if (!strcmp(buf, "unmap")) {
> +        return BDRV_DETECT_ZEROES_UNMAP;
> +    } else {
> +        error_setg(errp, "invalid value for detect-zeroes: %s",
> +                   buf);
> +    }
> +    return BDRV_DETECT_ZEROES_OFF;
> +}
> +
>  static bool check_throttle_config(ThrottleConfig *cfg, Error **errp)
>  {
>      if (throttle_conflicting(cfg)) {
> @@ -324,6 +339,7 @@ static DriveInfo *blockdev_init(const char *file, QDict 
> *bs_opts,
>      QemuOpts *opts;
>      const char *id;
>      bool has_driver_specific_opts;
> +    BdrvDetectZeroes detect_zeroes;
>      BlockDriver *drv = NULL;
>  
>      /* Check common options by copying from bs_opts to opts, all other 
> options
> @@ -452,6 +468,13 @@ static DriveInfo *blockdev_init(const char *file, QDict 
> *bs_opts,
>          }
>      }
>  
> +    detect_zeroes =
> +        parse_detect_zeroes(qemu_opt_get(opts, "detect-zeroes"), &error);
> +    if (error) {
> +        error_propagate(errp, error);
> +        goto early_err;
> +    }
> +
>      /* init */
>      dinfo = g_malloc0(sizeof(*dinfo));
>      dinfo->id = g_strdup(qemu_opts_id(opts));
> @@ -462,6 +485,7 @@ static DriveInfo *blockdev_init(const char *file, QDict 
> *bs_opts,
>      }
>      dinfo->bdrv->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
>      dinfo->bdrv->read_only = ro;
> +    dinfo->bdrv->detect_zeroes = detect_zeroes;
>      dinfo->refcount = 1;
>      if (serial != NULL) {
>          dinfo->serial = g_strdup(serial);
> @@ -2455,6 +2479,10 @@ QemuOptsList qemu_common_drive_opts = {
>              .name = "copy-on-read",
>              .type = QEMU_OPT_BOOL,
>              .help = "copy read data from backing file into image file",
> +        },{
> +            .name = "detect-zeroes",
> +            .type = QEMU_OPT_STRING,
> +            .help = "try to optimize zero writes",
>          },
>          { /* end of list */ }
>      },
> diff --git a/hmp.c b/hmp.c
> index ca869ba..b1942ed 100644
> --- a/hmp.c
> +++ b/hmp.c
> @@ -336,6 +336,12 @@ void hmp_info_block(Monitor *mon, const QDict *qdict)
>                             info->value->inserted->backing_file_depth);
>          }
>  
> +        if (verbose) {
> +            monitor_printf(mon,
> +                           "    Detect zeroes:    %s\n",
> +                           info->value->inserted->detect_zeroes);
> +        }
> +
>          if (info->value->inserted->bps
>              || info->value->inserted->bps_rd
>              || info->value->inserted->bps_wr
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 9ffcb69..7b9ca05 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -271,6 +271,17 @@ typedef struct BlockLimits {
>  } BlockLimits;
>  
>  /*
> + * Different operation modes for automatic zero detection
> + * to speed the write operation up with bdrv_write_zeroes.
> + */
> +typedef enum {
> +    BDRV_DETECT_ZEROES_OFF   = 0x0,
> +    BDRV_DETECT_ZEROES_ON    = 0x1,
> +    /* also set the BDRV_MAY_UNMAP flag with bdrv_write_zeroes */
> +    BDRV_DETECT_ZEROES_UNMAP = 0x2,
> +} BdrvDetectZeroes;
> +
> +/*
>   * Note: the function bdrv_append() copies and swaps contents of
>   * BlockDriverStates, so if you add new fields to this struct, please
>   * inspect bdrv_append() to determine if the new fields need to be
> @@ -364,6 +375,7 @@ struct BlockDriverState {
>      BlockJob *job;
>  
>      QDict *options;
> +    BdrvDetectZeroes detect_zeroes;
>  };
>  
>  int get_tmp_filename(char *filename, int size);
> diff --git a/include/qemu-common.h b/include/qemu-common.h
> index a998e8d..8e3d6eb 100644
> --- a/include/qemu-common.h
> +++ b/include/qemu-common.h
> @@ -330,6 +330,7 @@ void qemu_iovec_concat(QEMUIOVector *dst,
>  void qemu_iovec_concat_iov(QEMUIOVector *dst,
>                             struct iovec *src_iov, unsigned int src_cnt,
>                             size_t soffset, size_t sbytes);
> +bool qemu_iovec_is_zero(QEMUIOVector *qiov);
>  void qemu_iovec_destroy(QEMUIOVector *qiov);
>  void qemu_iovec_reset(QEMUIOVector *qiov);
>  size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
> diff --git a/qapi-schema.json b/qapi-schema.json
> index 0b00427..5e3b4a89 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -937,6 +937,8 @@
>  # @encryption_key_missing: true if the backing device is encrypted but an
>  #                          valid encryption key is missing
>  #
> +# @detect_zeroes: detect and optimize zero writes (Since 2.1)
> +#
>  # @bps: total throughput limit in bytes per second is specified
>  #
>  # @bps_rd: read throughput limit in bytes per second is specified
> @@ -972,6 +974,7 @@
>    'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str',
>              '*backing_file': 'str', 'backing_file_depth': 'int',
>              'encrypted': 'bool', 'encryption_key_missing': 'bool',
> +            'detect_zeroes': 'str',
>              'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
>              'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
>              'image': 'ImageInfo',
> @@ -4250,6 +4253,20 @@
>    'data': [ 'ignore', 'unmap' ] }
>  
>  ##
> +# @BlockdevDetectZeroesOptions
> +#
> +# Selects the operation mode for zero write detection.
> +#
> +# @off:      Disabled
> +# @on:       Enabled
> +# @unmap:    Enabled and even try to unmap blocks if possible
> +#
> +# Since: 2.1
> +##
> +{ 'enum': 'BlockdevDetectZeroesOptions',
> +  'data': [ 'off', 'on', 'unmap' ] }
> +
> +##
>  # @BlockdevAioOptions
>  #
>  # Selects the AIO backend to handle I/O requests
> @@ -4301,20 +4318,22 @@
>  # Options that are available for all block devices, independent of the block
>  # driver.
>  #
> -# @driver:      block driver name
> -# @id:          #optional id by which the new block device can be referred 
> to.
> -#               This is a required option on the top level of blockdev-add, 
> and
> -#               currently not allowed on any other level.
> -# @node-name:   #optional the name of a block driver state node (Since 2.0)
> -# @discard:     #optional discard-related options (default: ignore)
> -# @cache:       #optional cache-related options
> -# @aio:         #optional AIO backend (default: threads)
> -# @rerror:      #optional how to handle read errors on the device
> -#               (default: report)
> -# @werror:      #optional how to handle write errors on the device
> -#               (default: enospc)
> -# @read-only:   #optional whether the block device should be read-only
> -#               (default: false)
> +# @driver:        block driver name
> +# @id:            #optional id by which the new block device can be referred 
> to.
> +#                 This is a required option on the top level of 
> blockdev-add, and
> +#                 currently not allowed on any other level.
> +# @node-name:     #optional the name of a block driver state node (Since 2.0)
> +# @discard:       #optional discard-related options (default: ignore)
> +# @cache:         #optional cache-related options
> +# @aio:           #optional AIO backend (default: threads)
> +# @rerror:        #optional how to handle read errors on the device
> +#                 (default: report)
> +# @werror:        #optional how to handle write errors on the device
> +#                 (default: enospc)
> +# @read-only:     #optional whether the block device should be read-only
> +#                 (default: false)
> +# @detect-zeroes: #optional detect and optimize zero writes (Since 2.1)
> +#                 (default: off)
>  #
>  # Since: 1.7
>  ##
> @@ -4327,7 +4346,8 @@
>              '*aio': 'BlockdevAioOptions',
>              '*rerror': 'BlockdevOnError',
>              '*werror': 'BlockdevOnError',
> -            '*read-only': 'bool' } }
> +            '*read-only': 'bool',
> +            '*detect-zeroes': 'BlockdevDetectZeroesOptions' } }
>  
>  ##
>  # @BlockdevOptionsFile
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 781af14..5ee94ea 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -414,6 +414,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
>      "       [,serial=s][,addr=A][,rerror=ignore|stop|report]\n"
>      "       
> [,werror=ignore|stop|report|enospc][,id=name][,aio=threads|native]\n"
>      "       [,readonly=on|off][,copy-on-read=on|off]\n"
> +    "       [,detect-zeroes=on|off|unmap]\n"
>      "       [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
>      "       [[,iops=i]|[[,iops_rd=r][,iops_wr=w]]]\n"
>      "       [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n"
> @@ -475,6 +476,11 @@ Open drive @option{file} as read-only. Guest write 
> attempts will fail.
>  @item address@hidden
>  @var{copy-on-read} is "on" or "off" and enables whether to copy read backing
>  file sectors into the image file.
> address@hidden address@hidden
> address@hidden is "off", "on" or "unmap" and enables the automatic
> +conversion of plain zero writes by the OS to driver specific optimized
> +zero write commands. If "unmap" is chosen and @var{discard} is "on"
> +a zero write may even be converted to an UNMAP operation.
>  @end table
>  
>  By default, the @option{cache=writeback} mode is used. It will report data
> diff --git a/qmp-commands.hx b/qmp-commands.hx
> index ed3ab92..a535955 100644
> --- a/qmp-commands.hx
> +++ b/qmp-commands.hx
> @@ -2032,6 +2032,8 @@ Each json-object contain the following:
>           - "iops_rd_max":  read I/O operations max (json-int)
>           - "iops_wr_max":  write I/O operations max (json-int)
>           - "iops_size": I/O size when limiting by iops (json-int)
> +         - "detect_zeroes": detect and optimize zero writes (json-string)
> +             - Possible values: "off", "on", "unmap"
>           - "image": the detail of the image, it is a json-object containing
>              the following:
>               - "filename": image file name (json-string)
> @@ -2108,6 +2110,7 @@ Example:
>                 "iops_rd_max": 0,
>                 "iops_wr_max": 0,
>                 "iops_size": 0,
> +               "detect_zeroes": "on",
>                 "image":{
>                    "filename":"disks/test.qcow2",
>                    "format":"qcow2",
> diff --git a/util/iov.c b/util/iov.c
> index 6569b5a..f8c49a1 100644
> --- a/util/iov.c
> +++ b/util/iov.c
> @@ -335,6 +335,27 @@ void qemu_iovec_concat(QEMUIOVector *dst,
>      qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
>  }
>  
> +/*
> + *  check if the contents of the iovecs are all zero
> + */
> +bool qemu_iovec_is_zero(QEMUIOVector *qiov)
> +{
> +    int i;
> +    for (i = 0; i < qiov->niov; i++) {
> +        size_t offs = qiov->iov[i].iov_len & ~(4 * sizeof(long) - 1);
> +        uint8_t *ptr = qiov->iov[i].iov_base;
> +        if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) {
> +            return false;
> +        }
> +        for (; offs < qiov->iov[i].iov_len; offs++) {
> +            if (ptr[offs]) {
> +                return false;
> +            }
> +        }
> +    }
> +    return true;
> +}
> +
>  void qemu_iovec_destroy(QEMUIOVector *qiov)
>  {
>      assert(qiov->nalloc != -1);




reply via email to

[Prev in Thread] Current Thread [Next in Thread]