[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_z
From: |
Peter Lieven |
Subject: |
Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes |
Date: |
Wed, 07 May 2014 02:19:15 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Thunderbird/24.4.0 |
please ignore this one, I accidently used an old commit message
Am 07.05.2014 02:01, schrieb Peter Lieven:
> this patch tries to optimize zero write requests
> by automatically using bdrv_write_zeroes if it is
> supported by the format.
>
> this should significantly speed up file system initialization and
> should speed zero write test used to test backend storage performance.
>
> the difference can simply be tested by e.g.
>
> dd if=/dev/zero of=/dev/vdX bs=1M
>
> or
>
> mkfs.ext4 -E lazy_itable_init=0,lazy_journal_init=0 /dev/vdX
>
> Signed-off-by: Peter Lieven <address@hidden>
> ---
> v2->v3: - moved parameter parsing to blockdev_init
> - added per device detect_zeroes status to
> hmp (info block -v) and qmp (query-block) [Eric]
> - added support to enable detect-zeroes also
> for hot added devices [Eric]
> - added missing entry to qemu_common_drive_opts
> - fixed description of qemu_iovec_is_zero [Fam]
>
> v1->v2: - added tests to commit message (Markus)
> RFCv2->v1: - fixed paramter parsing strncmp -> strcmp (Eric)
> - fixed typo (choosen->chosen) (Eric)
> - added second example to commit msg
>
> RFCv1->RFCv2: - add detect-zeroes=off|on|unmap knob to drive cmdline parameter
> - call zero detection only for format (bs->file != NULL)
>
> block.c | 11 ++++++++++
> block/qapi.c | 11 ++++++++++
> blockdev.c | 28 +++++++++++++++++++++++++
> hmp.c | 6 ++++++
> include/block/block_int.h | 12 +++++++++++
> include/qemu-common.h | 1 +
> qapi-schema.json | 50
> +++++++++++++++++++++++++++++++--------------
> qemu-options.hx | 6 ++++++
> qmp-commands.hx | 3 +++
> util/iov.c | 21 +++++++++++++++++++
> 10 files changed, 134 insertions(+), 15 deletions(-)
>
> diff --git a/block.c b/block.c
> index b749d31..f27b35d 100644
> --- a/block.c
> +++ b/block.c
> @@ -3244,6 +3244,17 @@ static int coroutine_fn
> bdrv_aligned_pwritev(BlockDriverState *bs,
>
> ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
>
> + if (!ret && bs->detect_zeroes != BDRV_DETECT_ZEROES_OFF &&
> + !(flags & BDRV_REQ_ZERO_WRITE) && bs->file &&
> + drv->bdrv_co_write_zeroes && qemu_iovec_is_zero(qiov)) {
> + flags |= BDRV_REQ_ZERO_WRITE;
> + /* if the device was not opened with discard=on the below flag
> + * is immediately cleared again in bdrv_co_do_write_zeroes */
> + if (bs->detect_zeroes == BDRV_DETECT_ZEROES_UNMAP) {
> + flags |= BDRV_REQ_MAY_UNMAP;
> + }
> + }
> +
> if (ret < 0) {
> /* Do nothing, write notifier decided to fail this request */
> } else if (flags & BDRV_REQ_ZERO_WRITE) {
> diff --git a/block/qapi.c b/block/qapi.c
> index af11445..fbf66c2 100644
> --- a/block/qapi.c
> +++ b/block/qapi.c
> @@ -51,6 +51,17 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState
> *bs)
>
> info->backing_file_depth = bdrv_get_backing_file_depth(bs);
>
> + switch (bs->detect_zeroes) {
> + case BDRV_DETECT_ZEROES_ON:
> + info->detect_zeroes = g_strdup("on");
> + break;
> + case BDRV_DETECT_ZEROES_UNMAP:
> + info->detect_zeroes = g_strdup("unmap");
> + break;
> + default:
> + info->detect_zeroes = g_strdup("off");
> + }
> +
> if (bs->io_limits_enabled) {
> ThrottleConfig cfg;
> throttle_get_config(&bs->throttle_state, &cfg);
> diff --git a/blockdev.c b/blockdev.c
> index 7810e9f..96c11fd 100644
> --- a/blockdev.c
> +++ b/blockdev.c
> @@ -288,6 +288,21 @@ static int parse_block_error_action(const char *buf,
> bool is_read, Error **errp)
> }
> }
>
> +static BdrvDetectZeroes parse_detect_zeroes(const char *buf, Error **errp)
> +{
> + if (!buf || !strcmp(buf, "off")) {
> + return BDRV_DETECT_ZEROES_OFF;
> + } else if (!strcmp(buf, "on")) {
> + return BDRV_DETECT_ZEROES_ON;
> + } else if (!strcmp(buf, "unmap")) {
> + return BDRV_DETECT_ZEROES_UNMAP;
> + } else {
> + error_setg(errp, "invalid value for detect-zeroes: %s",
> + buf);
> + }
> + return BDRV_DETECT_ZEROES_OFF;
> +}
> +
> static bool check_throttle_config(ThrottleConfig *cfg, Error **errp)
> {
> if (throttle_conflicting(cfg)) {
> @@ -324,6 +339,7 @@ static DriveInfo *blockdev_init(const char *file, QDict
> *bs_opts,
> QemuOpts *opts;
> const char *id;
> bool has_driver_specific_opts;
> + BdrvDetectZeroes detect_zeroes;
> BlockDriver *drv = NULL;
>
> /* Check common options by copying from bs_opts to opts, all other
> options
> @@ -452,6 +468,13 @@ static DriveInfo *blockdev_init(const char *file, QDict
> *bs_opts,
> }
> }
>
> + detect_zeroes =
> + parse_detect_zeroes(qemu_opt_get(opts, "detect-zeroes"), &error);
> + if (error) {
> + error_propagate(errp, error);
> + goto early_err;
> + }
> +
> /* init */
> dinfo = g_malloc0(sizeof(*dinfo));
> dinfo->id = g_strdup(qemu_opts_id(opts));
> @@ -462,6 +485,7 @@ static DriveInfo *blockdev_init(const char *file, QDict
> *bs_opts,
> }
> dinfo->bdrv->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
> dinfo->bdrv->read_only = ro;
> + dinfo->bdrv->detect_zeroes = detect_zeroes;
> dinfo->refcount = 1;
> if (serial != NULL) {
> dinfo->serial = g_strdup(serial);
> @@ -2455,6 +2479,10 @@ QemuOptsList qemu_common_drive_opts = {
> .name = "copy-on-read",
> .type = QEMU_OPT_BOOL,
> .help = "copy read data from backing file into image file",
> + },{
> + .name = "detect-zeroes",
> + .type = QEMU_OPT_STRING,
> + .help = "try to optimize zero writes",
> },
> { /* end of list */ }
> },
> diff --git a/hmp.c b/hmp.c
> index ca869ba..b1942ed 100644
> --- a/hmp.c
> +++ b/hmp.c
> @@ -336,6 +336,12 @@ void hmp_info_block(Monitor *mon, const QDict *qdict)
> info->value->inserted->backing_file_depth);
> }
>
> + if (verbose) {
> + monitor_printf(mon,
> + " Detect zeroes: %s\n",
> + info->value->inserted->detect_zeroes);
> + }
> +
> if (info->value->inserted->bps
> || info->value->inserted->bps_rd
> || info->value->inserted->bps_wr
> diff --git a/include/block/block_int.h b/include/block/block_int.h
> index 9ffcb69..7b9ca05 100644
> --- a/include/block/block_int.h
> +++ b/include/block/block_int.h
> @@ -271,6 +271,17 @@ typedef struct BlockLimits {
> } BlockLimits;
>
> /*
> + * Different operation modes for automatic zero detection
> + * to speed the write operation up with bdrv_write_zeroes.
> + */
> +typedef enum {
> + BDRV_DETECT_ZEROES_OFF = 0x0,
> + BDRV_DETECT_ZEROES_ON = 0x1,
> + /* also set the BDRV_MAY_UNMAP flag with bdrv_write_zeroes */
> + BDRV_DETECT_ZEROES_UNMAP = 0x2,
> +} BdrvDetectZeroes;
> +
> +/*
> * Note: the function bdrv_append() copies and swaps contents of
> * BlockDriverStates, so if you add new fields to this struct, please
> * inspect bdrv_append() to determine if the new fields need to be
> @@ -364,6 +375,7 @@ struct BlockDriverState {
> BlockJob *job;
>
> QDict *options;
> + BdrvDetectZeroes detect_zeroes;
> };
>
> int get_tmp_filename(char *filename, int size);
> diff --git a/include/qemu-common.h b/include/qemu-common.h
> index a998e8d..8e3d6eb 100644
> --- a/include/qemu-common.h
> +++ b/include/qemu-common.h
> @@ -330,6 +330,7 @@ void qemu_iovec_concat(QEMUIOVector *dst,
> void qemu_iovec_concat_iov(QEMUIOVector *dst,
> struct iovec *src_iov, unsigned int src_cnt,
> size_t soffset, size_t sbytes);
> +bool qemu_iovec_is_zero(QEMUIOVector *qiov);
> void qemu_iovec_destroy(QEMUIOVector *qiov);
> void qemu_iovec_reset(QEMUIOVector *qiov);
> size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
> diff --git a/qapi-schema.json b/qapi-schema.json
> index 0b00427..5e3b4a89 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -937,6 +937,8 @@
> # @encryption_key_missing: true if the backing device is encrypted but an
> # valid encryption key is missing
> #
> +# @detect_zeroes: detect and optimize zero writes (Since 2.1)
> +#
> # @bps: total throughput limit in bytes per second is specified
> #
> # @bps_rd: read throughput limit in bytes per second is specified
> @@ -972,6 +974,7 @@
> 'data': { 'file': 'str', '*node-name': 'str', 'ro': 'bool', 'drv': 'str',
> '*backing_file': 'str', 'backing_file_depth': 'int',
> 'encrypted': 'bool', 'encryption_key_missing': 'bool',
> + 'detect_zeroes': 'str',
> 'bps': 'int', 'bps_rd': 'int', 'bps_wr': 'int',
> 'iops': 'int', 'iops_rd': 'int', 'iops_wr': 'int',
> 'image': 'ImageInfo',
> @@ -4250,6 +4253,20 @@
> 'data': [ 'ignore', 'unmap' ] }
>
> ##
> +# @BlockdevDetectZeroesOptions
> +#
> +# Selects the operation mode for zero write detection.
> +#
> +# @off: Disabled
> +# @on: Enabled
> +# @unmap: Enabled and even try to unmap blocks if possible
> +#
> +# Since: 2.1
> +##
> +{ 'enum': 'BlockdevDetectZeroesOptions',
> + 'data': [ 'off', 'on', 'unmap' ] }
> +
> +##
> # @BlockdevAioOptions
> #
> # Selects the AIO backend to handle I/O requests
> @@ -4301,20 +4318,22 @@
> # Options that are available for all block devices, independent of the block
> # driver.
> #
> -# @driver: block driver name
> -# @id: #optional id by which the new block device can be referred
> to.
> -# This is a required option on the top level of blockdev-add,
> and
> -# currently not allowed on any other level.
> -# @node-name: #optional the name of a block driver state node (Since 2.0)
> -# @discard: #optional discard-related options (default: ignore)
> -# @cache: #optional cache-related options
> -# @aio: #optional AIO backend (default: threads)
> -# @rerror: #optional how to handle read errors on the device
> -# (default: report)
> -# @werror: #optional how to handle write errors on the device
> -# (default: enospc)
> -# @read-only: #optional whether the block device should be read-only
> -# (default: false)
> +# @driver: block driver name
> +# @id: #optional id by which the new block device can be referred
> to.
> +# This is a required option on the top level of
> blockdev-add, and
> +# currently not allowed on any other level.
> +# @node-name: #optional the name of a block driver state node (Since 2.0)
> +# @discard: #optional discard-related options (default: ignore)
> +# @cache: #optional cache-related options
> +# @aio: #optional AIO backend (default: threads)
> +# @rerror: #optional how to handle read errors on the device
> +# (default: report)
> +# @werror: #optional how to handle write errors on the device
> +# (default: enospc)
> +# @read-only: #optional whether the block device should be read-only
> +# (default: false)
> +# @detect-zeroes: #optional detect and optimize zero writes (Since 2.1)
> +# (default: off)
> #
> # Since: 1.7
> ##
> @@ -4327,7 +4346,8 @@
> '*aio': 'BlockdevAioOptions',
> '*rerror': 'BlockdevOnError',
> '*werror': 'BlockdevOnError',
> - '*read-only': 'bool' } }
> + '*read-only': 'bool',
> + '*detect-zeroes': 'BlockdevDetectZeroesOptions' } }
>
> ##
> # @BlockdevOptionsFile
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 781af14..5ee94ea 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -414,6 +414,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
> " [,serial=s][,addr=A][,rerror=ignore|stop|report]\n"
> "
> [,werror=ignore|stop|report|enospc][,id=name][,aio=threads|native]\n"
> " [,readonly=on|off][,copy-on-read=on|off]\n"
> + " [,detect-zeroes=on|off|unmap]\n"
> " [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
> " [[,iops=i]|[[,iops_rd=r][,iops_wr=w]]]\n"
> " [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n"
> @@ -475,6 +476,11 @@ Open drive @option{file} as read-only. Guest write
> attempts will fail.
> @item address@hidden
> @var{copy-on-read} is "on" or "off" and enables whether to copy read backing
> file sectors into the image file.
> address@hidden address@hidden
> address@hidden is "off", "on" or "unmap" and enables the automatic
> +conversion of plain zero writes by the OS to driver specific optimized
> +zero write commands. If "unmap" is chosen and @var{discard} is "on"
> +a zero write may even be converted to an UNMAP operation.
> @end table
>
> By default, the @option{cache=writeback} mode is used. It will report data
> diff --git a/qmp-commands.hx b/qmp-commands.hx
> index ed3ab92..a535955 100644
> --- a/qmp-commands.hx
> +++ b/qmp-commands.hx
> @@ -2032,6 +2032,8 @@ Each json-object contain the following:
> - "iops_rd_max": read I/O operations max (json-int)
> - "iops_wr_max": write I/O operations max (json-int)
> - "iops_size": I/O size when limiting by iops (json-int)
> + - "detect_zeroes": detect and optimize zero writes (json-string)
> + - Possible values: "off", "on", "unmap"
> - "image": the detail of the image, it is a json-object containing
> the following:
> - "filename": image file name (json-string)
> @@ -2108,6 +2110,7 @@ Example:
> "iops_rd_max": 0,
> "iops_wr_max": 0,
> "iops_size": 0,
> + "detect_zeroes": "on",
> "image":{
> "filename":"disks/test.qcow2",
> "format":"qcow2",
> diff --git a/util/iov.c b/util/iov.c
> index 6569b5a..f8c49a1 100644
> --- a/util/iov.c
> +++ b/util/iov.c
> @@ -335,6 +335,27 @@ void qemu_iovec_concat(QEMUIOVector *dst,
> qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
> }
>
> +/*
> + * check if the contents of the iovecs are all zero
> + */
> +bool qemu_iovec_is_zero(QEMUIOVector *qiov)
> +{
> + int i;
> + for (i = 0; i < qiov->niov; i++) {
> + size_t offs = qiov->iov[i].iov_len & ~(4 * sizeof(long) - 1);
> + uint8_t *ptr = qiov->iov[i].iov_base;
> + if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) {
> + return false;
> + }
> + for (; offs < qiov->iov[i].iov_len; offs++) {
> + if (ptr[offs]) {
> + return false;
> + }
> + }
> + }
> + return true;
> +}
> +
> void qemu_iovec_destroy(QEMUIOVector *qiov)
> {
> assert(qiov->nalloc != -1);
- [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes, Peter Lieven, 2014/05/06
- Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes,
Peter Lieven <=
- [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes, Peter Lieven, 2014/05/06
- Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes, Eric Blake, 2014/05/06
- Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes, Peter Lieven, 2014/05/07
- Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes, Kevin Wolf, 2014/05/07
- Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes, Peter Lieven, 2014/05/07
- Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes, Kevin Wolf, 2014/05/07
- Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes, Peter Lieven, 2014/05/07
- Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes, Peter Lieven, 2014/05/07
- Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes, Kevin Wolf, 2014/05/08
- Re: [Qemu-devel] [PATCHv3] block: optimize zero writes with bdrv_write_zeroes, Peter Lieven, 2014/05/07