>From 98f2978ae5d0f34dca0369fcc727d1e533c0e6b0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 8 Mar 2012 14:48:29 +0100 Subject: [PATCH 1/2] qed: make write-zeroes bounce buffer smaller than a single cluster Currently, a write-zeroes operation could allocates memory for the whole I/O operation if it is not aligned. This is not necessary, because only two unaligned clusters could be written. This makes the write-zeroes operation proceed one cluster at a time, even if all clusters are currently available and zero. This does cause worse performance due to multiple L2 table writes. However, if zero-write detection is enabled it means we're not interested in maximum performance. Signed-off-by: Paolo Bonzini --- block/qed.c | 75 +++++++++++++++++++++++++++++++++++++------------------------ 1 file modificato, 46 inserzioni(+), 29 rimozioni(-) diff --git a/block/qed.c b/block/qed.c index a02dbfd..bcea346 100644 --- a/block/qed.c +++ b/block/qed.c @@ -869,7 +869,9 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret) /* Free the buffer we may have allocated for zero writes */ if (acb->flags & QED_AIOCB_ZERO) { qemu_vfree(acb->qiov->iov[0].iov_base); - acb->qiov->iov[0].iov_base = NULL; + qemu_iovec_destroy(acb->qiov); + g_free(acb->qiov); + acb->qiov = NULL; } /* Arrange for a bh to invoke the completion function */ @@ -1096,6 +1098,34 @@ static void qed_aio_write_zero_cluster(void *opaque, int ret) } /** + * Calculate the I/O vector + * + * @acb: Write request + * @len: Length in bytes + */ +static void qed_prepare_qiov(QEDAIOCB *acb, size_t len) +{ + /* Calculate the I/O vector */ + if (acb->flags & QED_AIOCB_ZERO) { + /* Allocate buffer for zero writes */ + if (!acb->qiov) { + BDRVQEDState *s = acb_to_s(acb); + char *base; + + acb->qiov = g_malloc(sizeof(QEMUIOVector)); + base = qemu_blockalign(s->bs, s->header.cluster_size); + qemu_iovec_init(acb->qiov, 1); + qemu_iovec_add(acb->qiov, base, s->header.cluster_size); + memset(base, 0, s->header.cluster_size); + } + assert(len <= acb->qiov->size); + qemu_iovec_add(&acb->cur_qiov, acb->qiov->iov[0].iov_base, len); + } else { + qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); + } +} + +/** * Write new data cluster * * @acb: Write request @@ -1124,21 +1154,20 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) acb->cur_nclusters = qed_bytes_to_clusters(s, qed_offset_into_cluster(s, acb->cur_pos) + len); - qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); if (acb->flags & QED_AIOCB_ZERO) { /* Skip ahead if the clusters are already zero */ if (acb->find_cluster_ret == QED_CLUSTER_ZERO) { - qed_aio_next_io(acb, 0); - return; + cb = qed_aio_next_io; + } else { + cb = qed_aio_write_zero_cluster; } - - cb = qed_aio_write_zero_cluster; } else { cb = qed_aio_write_prefill; acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters); } + qed_prepare_qiov(acb, len); if (qed_should_set_need_check(s)) { s->header.features |= QED_F_NEED_CHECK; qed_write_header(s, cb, acb); @@ -1158,19 +1187,8 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) */ static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len) { - /* Allocate buffer for zero writes */ - if (acb->flags & QED_AIOCB_ZERO) { - struct iovec *iov = acb->qiov->iov; - - if (!iov->iov_base) { - iov->iov_base = qemu_blockalign(acb->common.bs, iov->iov_len); - memset(iov->iov_base, 0, iov->iov_len); - } - } - - /* Calculate the I/O vector */ acb->cur_cluster = offset; - qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len); + qed_prepare_qiov(acb, len); /* Do the actual write */ qed_aio_write_main(acb, 0); @@ -1270,6 +1288,7 @@ static void qed_aio_next_io(void *opaque, int ret) { QEDAIOCB *acb = opaque; BDRVQEDState *s = acb_to_s(acb); + uint64_t len; QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ? qed_aio_write_data : qed_aio_read_data; @@ -1291,10 +1310,14 @@ static void qed_aio_next_io(void *opaque, int ret) return; } + /* Limit buffer size to one cluster when writing zeroes. */ + len = acb->end_pos - acb->cur_pos; + if (acb->flags & QED_AIOCB_ZERO) { + len = MIN(len, s->header.cluster_size); + } + /* Find next cluster and start I/O */ - qed_find_cluster(s, &acb->request, - acb->cur_pos, acb->end_pos - acb->cur_pos, - io_fn, acb); + qed_find_cluster(s, &acb->request, acb->cur_pos, len, io_fn, acb); } static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs, @@ -1315,7 +1338,7 @@ static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs, acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE; acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE; acb->request.l2_table = NULL; - qemu_iovec_init(&acb->cur_qiov, qiov->niov); + qemu_iovec_init(&acb->cur_qiov, qiov ? qiov->niov : 1); /* Start request */ qed_aio_next_io(acb, 0); @@ -1364,17 +1387,11 @@ static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs, { BlockDriverAIOCB *blockacb; QEDWriteZeroesCB cb = { .done = false }; - QEMUIOVector qiov; - struct iovec iov; /* Zero writes start without an I/O buffer. If a buffer becomes necessary * then it will be allocated during request processing. */ - iov.iov_base = NULL, - iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE, - - qemu_iovec_init_external(&qiov, &iov, 1); - blockacb = qed_aio_setup(bs, sector_num, &qiov, nb_sectors, + blockacb = qed_aio_setup(bs, sector_num, NULL, nb_sectors, qed_co_write_zeroes_cb, &cb, QED_AIOCB_WRITE | QED_AIOCB_ZERO); if (!blockacb) { -- 1.7.11.2 >From c74de5ae7650233574fb1572bc3b463864af8a3e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 28 Aug 2012 16:09:23 +0200 Subject: [PATCH 2/2] qed: do copy-on-write for the first and last cluster in a misaligned write-zero request Signed-off-by: Paolo Bonzini --- block/qed.c | 11 +++++++++++ 1 file modificato, 11 inserzioni(+) diff --git a/block/qed.c b/block/qed.c index bcea346..0f8d06c 100644 --- a/block/qed.c +++ b/block/qed.c @@ -1160,9 +1160,20 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len) if (acb->find_cluster_ret == QED_CLUSTER_ZERO) { cb = qed_aio_next_io; } else { + /* For misaligned requests, do normal copy-on-write for the first + * and last cluster. + */ + unsigned offset = qed_offset_into_cluster(s, acb->cur_pos); + if (offset != 0 || qed_offset_into_cluster(s, offset + len) != 0) { + acb->cur_nclusters = 1; + len = MIN(len, s->header.cluster_size - offset); + goto copy; + } + cb = qed_aio_write_zero_cluster; } } else { +copy: cb = qed_aio_write_prefill; acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters); } -- 1.7.11.2