[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCHv2] block: optimize zero writes with bdrv_write_zeroe
From: |
Peter Lieven |
Subject: |
[Qemu-devel] [PATCHv2] block: optimize zero writes with bdrv_write_zeroes |
Date: |
Fri, 28 Mar 2014 15:46:22 +0100 |
this patch tries to optimize zero write requests
by automatically using bdrv_write_zeroes if it is
supported by the format.
This significantly speeds up file system initialization and
should speed zero write test used to test backend storage
performance.
I ran the following 2 tests on my internal SSD with a
50G QCOW2 container and on an attached iSCSI storage.
a) mkfs.ext4 -E lazy_itable_init=0,lazy_journal_init=0 /dev/vdX
QCOW2 [off] [on] [unmap]
-----
runtime: 14secs 1.1secs 1.1secs
filesize: 937M 18M 18M
iSCSI [off] [on] [unmap]
----
runtime: 9.3s 0.9s 0.9s
b) dd if=/dev/zero of=/dev/vdX bs=1M oflag=direct
QCOW2 [off] [on] [unmap]
-----
runtime: 246secs 18secs 18secs
filesize: 51G 192K 192K
throughput: 203M/s 2.3G/s 2.3G/s
iSCSI* [off] [on] [unmap]
----
runtime: 8mins 45secs 33secs
throughput: 106M/s 1.2G/s 1.6G/s
allocated: 100% 100% 0%
* The storage was connected via an 1Gbit interface.
It seems to internally handle writing zeroes
via WRITESAME16 very fast.
Signed-off-by: Peter Lieven <address@hidden>
---
v1->v2: - added tests to commit message (Markus)
RFCv2->v1: - fixed paramter parsing strncmp -> strcmp (Eric)
- fixed typo (choosen->chosen) (Eric)
- added second example to commit msg
RFCv1->RFCv2: - add detect-zeroes=off|on|unmap knob to drive cmdline parameter
- call zero detection only for format (bs->file != NULL)
block.c | 39 ++++++++++++++++++++++++++++++++++++++-
include/block/block_int.h | 12 ++++++++++++
include/qemu-common.h | 1 +
qemu-options.hx | 6 ++++++
util/iov.c | 21 +++++++++++++++++++++
5 files changed, 78 insertions(+), 1 deletion(-)
diff --git a/block.c b/block.c
index acb70fd..3f61f76 100644
--- a/block.c
+++ b/block.c
@@ -817,6 +817,25 @@ static int bdrv_assign_node_name(BlockDriverState *bs,
return 0;
}
+static int bdrv_set_detect_zeroes(BlockDriverState *bs,
+ const char *detect_zeroes,
+ Error **errp)
+{
+ if (!detect_zeroes || !strcmp(detect_zeroes, "off")) {
+ bs->detect_zeroes = BDRV_DETECT_ZEROES_OFF;
+ } else if (!strcmp(detect_zeroes, "on")) {
+ bs->detect_zeroes = BDRV_DETECT_ZEROES_ON;
+ } else if (!strcmp(detect_zeroes, "unmap")) {
+ bs->detect_zeroes = BDRV_DETECT_ZEROES_UNMAP;
+ } else {
+ error_setg(errp, "invalid value for detect-zeroes: %s",
+ detect_zeroes);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/*
* Common part for opening disk images and files
*
@@ -827,7 +846,7 @@ static int bdrv_open_common(BlockDriverState *bs,
BlockDriverState *file,
{
int ret, open_flags;
const char *filename;
- const char *node_name = NULL;
+ const char *node_name = NULL, *detect_zeroes = NULL;
Error *local_err = NULL;
assert(drv != NULL);
@@ -855,6 +874,13 @@ static int bdrv_open_common(BlockDriverState *bs,
BlockDriverState *file,
}
qdict_del(options, "node-name");
+ detect_zeroes = qdict_get_try_str(options, "detect-zeroes");
+ ret = bdrv_set_detect_zeroes(bs, detect_zeroes, errp);
+ if (ret < 0) {
+ return ret;
+ }
+ qdict_del(options, "detect-zeroes");
+
/* bdrv_open() with directly using a protocol as drv. This layer is already
* opened, so assign it to bs (while file becomes a closed
BlockDriverState)
* and return immediately. */
@@ -3179,6 +3205,17 @@ static int coroutine_fn
bdrv_aligned_pwritev(BlockDriverState *bs,
ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
+ if (!ret && bs->detect_zeroes != BDRV_DETECT_ZEROES_OFF &&
+ !(flags & BDRV_REQ_ZERO_WRITE) && bs->file &&
+ drv->bdrv_co_write_zeroes && qemu_iovec_is_zero(qiov)) {
+ flags |= BDRV_REQ_ZERO_WRITE;
+ /* if the device was not opened with discard=on the below flag
+ * is immediately cleared again in bdrv_co_do_write_zeroes */
+ if (bs->detect_zeroes == BDRV_DETECT_ZEROES_UNMAP) {
+ flags |= BDRV_REQ_MAY_UNMAP;
+ }
+ }
+
if (ret < 0) {
/* Do nothing, write notifier decided to fail this request */
} else if (flags & BDRV_REQ_ZERO_WRITE) {
diff --git a/include/block/block_int.h b/include/block/block_int.h
index cd5bc73..7a3013a 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -271,6 +271,17 @@ typedef struct BlockLimits {
} BlockLimits;
/*
+ * Different operation modes for automatic zero detection
+ * to speed the write operation up with bdrv_write_zeroes.
+ */
+typedef enum {
+ BDRV_DETECT_ZEROES_OFF = 0x0,
+ BDRV_DETECT_ZEROES_ON = 0x1,
+ /* also set the BDRV_MAY_UNMAP flag with bdrv_write_zeroes */
+ BDRV_DETECT_ZEROES_UNMAP = 0x2,
+} BdrvDetectZeroes;
+
+/*
* Note: the function bdrv_append() copies and swaps contents of
* BlockDriverStates, so if you add new fields to this struct, please
* inspect bdrv_append() to determine if the new fields need to be
@@ -365,6 +376,7 @@ struct BlockDriverState {
BlockJob *job;
QDict *options;
+ BdrvDetectZeroes detect_zeroes;
};
int get_tmp_filename(char *filename, int size);
diff --git a/include/qemu-common.h b/include/qemu-common.h
index c8a58a8..574da73 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -330,6 +330,7 @@ void qemu_iovec_concat(QEMUIOVector *dst,
void qemu_iovec_concat_iov(QEMUIOVector *dst,
struct iovec *src_iov, unsigned int src_cnt,
size_t soffset, size_t sbytes);
+bool qemu_iovec_is_zero(QEMUIOVector *qiov);
void qemu_iovec_destroy(QEMUIOVector *qiov);
void qemu_iovec_reset(QEMUIOVector *qiov);
size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
diff --git a/qemu-options.hx b/qemu-options.hx
index ee5437b..f824d9b 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -410,6 +410,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
"
[,cache=writethrough|writeback|none|directsync|unsafe][,format=f]\n"
" [,serial=s][,addr=A][,id=name][,aio=threads|native]\n"
" [,readonly=on|off][,copy-on-read=on|off]\n"
+ " [,detect-zeroes=on|off|unmap]\n"
" [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
" [[,iops=i]|[[,iops_rd=r][,iops_wr=w]]]\n"
" [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n"
@@ -470,6 +471,11 @@ Open drive @option{file} as read-only. Guest write
attempts will fail.
@item address@hidden
@var{copy-on-read} is "on" or "off" and enables whether to copy read backing
file sectors into the image file.
address@hidden address@hidden
address@hidden is "off", "on" or "unmap" and enables the automatic
+conversion of plain zero writes by the OS to driver specific optimized
+zero write commands. If "unmap" is chosen and @var{discard} is "on"
+a zero write may even be converted to an UNMAP operation.
@end table
By default, the @option{cache=writeback} mode is used. It will report data
diff --git a/util/iov.c b/util/iov.c
index 6569b5a..0b17392 100644
--- a/util/iov.c
+++ b/util/iov.c
@@ -335,6 +335,27 @@ void qemu_iovec_concat(QEMUIOVector *dst,
qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
}
+/*
+ * check if the contents of the iovecs is all zero
+ */
+bool qemu_iovec_is_zero(QEMUIOVector *qiov)
+{
+ int i;
+ for (i = 0; i < qiov->niov; i++) {
+ size_t offs = qiov->iov[i].iov_len & ~(4 * sizeof(long) - 1);
+ uint8_t *ptr = qiov->iov[i].iov_base;
+ if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) {
+ return false;
+ }
+ for (; offs < qiov->iov[i].iov_len; offs++) {
+ if (ptr[offs]) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
void qemu_iovec_destroy(QEMUIOVector *qiov)
{
assert(qiov->nalloc != -1);
--
1.7.9.5
- [Qemu-devel] [PATCHv2] block: optimize zero writes with bdrv_write_zeroes,
Peter Lieven <=