[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCHv4] Add blkmirror block driver
From: |
Federico Simoncelli |
Subject: |
[Qemu-devel] [PATCHv4] Add blkmirror block driver |
Date: |
Wed, 29 Feb 2012 17:01:14 +0000 |
Mirrored writes are used by live block copy.
Signed-off-by: Marcelo Tosatti <address@hidden>
Signed-off-by: Federico Simoncelli <address@hidden>
Signed-off-by: Paolo Bonzini <address@hidden>
---
Makefile.objs | 2 +-
block/blkmirror.c | 278 ++++++++++++++++++++++++++++++++++++++++++++++++++++
cutils.c | 56 +++++++++++
docs/blkmirror.txt | 15 +++
qemu-common.h | 2 +
5 files changed, 352 insertions(+), 1 deletions(-)
create mode 100644 block/blkmirror.c
create mode 100644 docs/blkmirror.txt
diff --git a/Makefile.objs b/Makefile.objs
index 808de6a..2302c96 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -34,7 +34,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o
dmg.o bochs.o vpc.o vv
block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o
qcow2-cache.o
block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-nested-y += qed-check.o
-block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
+block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
blkmirror.o
block-nested-y += stream.o
block-nested-$(CONFIG_WIN32) += raw-win32.o
block-nested-$(CONFIG_POSIX) += raw-posix.o
diff --git a/block/blkmirror.c b/block/blkmirror.c
new file mode 100644
index 0000000..d894ca8
--- /dev/null
+++ b/block/blkmirror.c
@@ -0,0 +1,278 @@
+/*
+ * Block driver for mirrored writes.
+ *
+ * Copyright (C) 2011-2012 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <stdarg.h>
+#include "block_int.h"
+
+typedef struct {
+ BlockDriverState *bs[2];
+} BdrvMirrorState;
+
+typedef struct DupAIOCB DupAIOCB;
+
+typedef struct SingleAIOCB {
+ BlockDriverAIOCB *aiocb;
+ int finished;
+ DupAIOCB *parent;
+} SingleAIOCB;
+
+struct DupAIOCB {
+ BlockDriverAIOCB common;
+ int count;
+
+ BlockDriverCompletionFunc *cb;
+ SingleAIOCB aios[2];
+ int ret;
+};
+
+/* Valid blkmirror filenames look like
+ * blkmirror:fmt1:path/to/image1:fmt2:path/to/image2 */
+static int blkmirror_open(BlockDriverState *bs, const char *filename, int
flags)
+{
+ int ret = 0, i;
+ char *tmpbuf, *tok[4], *next;
+ BlockDriver *drv1, *drv2;
+ BdrvMirrorState *m = bs->opaque;
+ BlockDriverState *bk;
+
+ m->bs[0] = bdrv_new("");
+ if (m->bs[0] == NULL) {
+ return -ENOMEM;
+ }
+
+ m->bs[1] = bdrv_new("");
+ if (m->bs[1] == NULL) {
+ bdrv_delete(m->bs[0]);
+ return -ENOMEM;
+ }
+
+ tmpbuf = g_malloc(strlen(filename) + 1);
+ pstrcpy(tmpbuf, strlen(filename) + 1, filename);
+
+ /* Parse the blkmirror: prefix */
+ if (!strstart(tmpbuf, "blkmirror:", (const char **) &next)) {
+ next = tmpbuf;
+ }
+
+ for (i = 0; i < 4; i++) {
+ if (!next) {
+ ret = -EINVAL;
+ goto out;
+ }
+ tok[i] = estrtok_r(NULL, ":", '\\', &next);
+ }
+
+ drv1 = bdrv_find_whitelisted_format(tok[0]);
+ drv2 = bdrv_find_whitelisted_format(tok[2]);
+ if (!drv1 || !drv2) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = bdrv_open(m->bs[0], tok[1], flags, drv1);
+ if (ret < 0) {
+ goto out;
+ }
+
+ /* If we crash, we cannot assume that the destination is a
+ * valid mirror and we have to start over. So speed up things
+ * by effectively operating on the destination in cache=unsafe
+ * mode.
+ */
+ ret = bdrv_open(m->bs[1], tok[3], flags | BDRV_O_NO_BACKING
+ | BDRV_O_NO_FLUSH | BDRV_O_CACHE_WB, drv2);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (m->bs[0]->backing_hd) {
+ bk = m->bs[0]->backing_hd;
+
+ m->bs[1]->backing_hd = bdrv_new("");
+ if (!m->bs[1]->backing_hd) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* opening the same backing file of the source */
+ ret = bdrv_open(m->bs[1]->backing_hd,
+ bk->filename, bk->open_flags, bk->drv);
+ if (ret < 0) {
+ goto out;
+ }
+ }
+
+ out:
+ g_free(tmpbuf);
+
+ if (ret < 0) {
+ for (i = 0; i < 2; i++) {
+ bdrv_delete(m->bs[i]);
+ m->bs[i] = NULL;
+ }
+ }
+
+ return ret;
+}
+
+static void blkmirror_close(BlockDriverState *bs)
+{
+ BdrvMirrorState *m = bs->opaque;
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ bdrv_delete(m->bs[i]);
+ m->bs[i] = NULL;
+ }
+}
+
+static coroutine_fn int blkmirror_co_flush(BlockDriverState *bs)
+{
+ BdrvMirrorState *m = bs->opaque;
+ int ret;
+
+ ret = bdrv_co_flush(m->bs[0]);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return bdrv_co_flush(m->bs[1]);
+}
+
+static int64_t blkmirror_getlength(BlockDriverState *bs)
+{
+ BdrvMirrorState *m = bs->opaque;
+
+ return bdrv_getlength(m->bs[0]);
+}
+
+static BlockDriverAIOCB *blkmirror_aio_readv(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ BdrvMirrorState *m = bs->opaque;
+ return bdrv_aio_readv(m->bs[0], sector_num, qiov, nb_sectors, cb, opaque);
+}
+
+static void dup_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+ DupAIOCB *acb = container_of(blockacb, DupAIOCB, common);
+ int i;
+
+ for (i = 0 ; i < 2; i++) {
+ if (!acb->aios[i].finished) {
+ bdrv_aio_cancel(acb->aios[i].aiocb);
+ }
+ }
+ qemu_aio_release(acb);
+}
+
+static AIOPool dup_aio_pool = {
+ .aiocb_size = sizeof(DupAIOCB),
+ .cancel = dup_aio_cancel,
+};
+
+static void blkmirror_aio_cb(void *opaque, int ret)
+{
+ SingleAIOCB *scb = opaque;
+ DupAIOCB *dcb = scb->parent;
+
+ scb->finished = 1;
+ dcb->count--;
+ assert(dcb->count >= 0);
+ if (ret < 0) {
+ dcb->ret = ret;
+ }
+ if (dcb->count == 0) {
+ dcb->common.cb(dcb->common.opaque, dcb->ret);
+ qemu_aio_release(dcb);
+ }
+}
+
+static DupAIOCB *dup_aio_get(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ DupAIOCB *dcb;
+ int i;
+
+ dcb = qemu_aio_get(&dup_aio_pool, bs, cb, opaque);
+ if (!dcb) {
+ return NULL;
+ }
+ dcb->count = 2;
+ for (i = 0; i < 2; i++) {
+ dcb->aios[i].parent = dcb;
+ dcb->aios[i].finished = 0;
+ }
+ dcb->ret = 0;
+
+ return dcb;
+}
+
+static BlockDriverAIOCB *blkmirror_aio_writev(BlockDriverState *bs,
+ int64_t sector_num,
+ QEMUIOVector *qiov,
+ int nb_sectors,
+ BlockDriverCompletionFunc *cb,
+ void *opaque)
+{
+ BdrvMirrorState *m = bs->opaque;
+ DupAIOCB *dcb = dup_aio_get(bs, cb, opaque);
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ dcb->aios[i].aiocb = bdrv_aio_writev(m->bs[i], sector_num, qiov,
+ nb_sectors, &blkmirror_aio_cb,
+ &dcb->aios[i]);
+ }
+
+ return &dcb->common;
+}
+
+static coroutine_fn int blkmirror_co_discard(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ BdrvMirrorState *m = bs->opaque;
+ int ret;
+
+ ret = bdrv_co_discard(m->bs[0], sector_num, nb_sectors);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return bdrv_co_discard(m->bs[1], sector_num, nb_sectors);
+}
+
+
+static BlockDriver bdrv_blkmirror = {
+ .format_name = "blkmirror",
+ .protocol_name = "blkmirror",
+ .instance_size = sizeof(BdrvMirrorState),
+
+ .bdrv_getlength = blkmirror_getlength,
+
+ .bdrv_file_open = blkmirror_open,
+ .bdrv_close = blkmirror_close,
+ .bdrv_co_flush_to_disk = blkmirror_co_flush,
+ .bdrv_co_discard = blkmirror_co_discard,
+
+ .bdrv_aio_readv = blkmirror_aio_readv,
+ .bdrv_aio_writev = blkmirror_aio_writev,
+};
+
+static void bdrv_blkmirror_init(void)
+{
+ bdrv_register(&bdrv_blkmirror);
+}
+
+block_init(bdrv_blkmirror_init);
diff --git a/cutils.c b/cutils.c
index af308cd..ae8ddfb 100644
--- a/cutils.c
+++ b/cutils.c
@@ -54,6 +54,62 @@ char *pstrcat(char *buf, int buf_size, const char *s)
return buf;
}
+/* strtok_r with escaping support */
+char *estrtok_r(char *str, const char *delim, char esc, char **p)
+{
+ int n = 0, escape = 0;
+
+ if (str == NULL) {
+ str = *p;
+ }
+
+ for (*p = str; **p != '\0'; (*p)++) {
+ if (!escape && strchr(delim, **p)) {
+ str[n] = '\0', (*p)++;
+ return str;
+ }
+
+ if (!escape && **p == esc) {
+ escape = 1;
+ } else {
+ escape = 0;
+ }
+
+ if (!escape) {
+ str[n++] = **p;
+ }
+ }
+
+ str[n] = '\0', *p = NULL;
+ return str;
+}
+
+/* strdup with escaping support */
+char *estrdup(const char *str, const char *delim, char esc)
+{
+ int i, j;
+ const char *p;
+ char *ret;
+
+ for (p = str, j = 0, i = 0; *p != '\0'; p++, i++) {
+ if (strchr(delim, *p) || *p == esc) {
+ j++;
+ }
+ }
+
+ ret = g_malloc(i + (j * 2) + 1);
+
+ for (p = str, i = 0; *p != '\0'; p++, i++) {
+ if (strchr(delim, *p) || *p == esc) {
+ ret[i++] = esc;
+ }
+ ret[i] = *p;
+ }
+
+ ret[i] = '\0';
+ return ret;
+}
+
int strstart(const char *str, const char *val, const char **ptr)
{
const char *p, *q;
diff --git a/docs/blkmirror.txt b/docs/blkmirror.txt
new file mode 100644
index 0000000..c9967eb
--- /dev/null
+++ b/docs/blkmirror.txt
@@ -0,0 +1,15 @@
+Block mirror driver
+-------------------
+
+This driver will mirror writes to two distinct images.
+It's used internally by live block copy.
+
+Format
+------
+
+blkmirror:fmt1:/image1.img:fmt2:/image2.img
+
+Backslash '\' can be used to escape colon processing as
+a separator. Backslashes themselves also can be escaped
+as '\\'.
+
diff --git a/qemu-common.h b/qemu-common.h
index c5e9cad..af9621f 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -122,6 +122,8 @@ int qemu_timedate_diff(struct tm *tm);
/* cutils.c */
void pstrcpy(char *buf, int buf_size, const char *str);
char *pstrcat(char *buf, int buf_size, const char *s);
+char *estrtok_r(char *str, const char *delim, char esc, char **p);
+char *estrdup(const char *str, const char *delim, char esc);
int strstart(const char *str, const char *val, const char **ptr);
int stristart(const char *str, const char *val, const char **ptr);
int qemu_strnlen(const char *s, int max_len);
--
1.7.1
- Re: [Qemu-devel] drive transactions (was Re: [PATCH 2/2 v2] Add the blockdev-reopen and blockdev-migrate commands), (continued)
Re: [Qemu-devel] Live Block Migration using Mirroring, Paolo Bonzini, 2012/02/28
[Qemu-devel] [PATCHv3] Add blkmirror block driver, Federico Simoncelli, 2012/02/29
[Qemu-devel] [PATCHv4] Add blkmirror block driver,
Federico Simoncelli <=