[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [RFC PATCH 2/2] block: gluster as block backend
From: |
Bharata B Rao |
Subject: |
[Qemu-devel] [RFC PATCH 2/2] block: gluster as block backend |
Date: |
Sat, 21 Jul 2012 14:01:59 +0530 |
User-agent: |
Mutt/1.5.21 (2010-09-15) |
block: gluster as block backend
From: Bharata B Rao <address@hidden>
This patch adds gluster as the new block backend in QEMU. This gives QEMU
the ability to boot VM images from gluster volumes.
Signed-off-by: Bharata B Rao <address@hidden>
---
block/Makefile.objs | 1
block/gluster.c | 483 +++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 484 insertions(+), 0 deletions(-)
create mode 100644 block/gluster.c
diff --git a/block/Makefile.objs b/block/Makefile.objs
index b5754d3..a1ae67f 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -9,3 +9,4 @@ block-obj-$(CONFIG_POSIX) += raw-posix.o
block-obj-$(CONFIG_LIBISCSI) += iscsi.o
block-obj-$(CONFIG_CURL) += curl.o
block-obj-$(CONFIG_RBD) += rbd.o
+block-obj-$(CONFIG_GLUSTERFS) += gluster.o
diff --git a/block/gluster.c b/block/gluster.c
new file mode 100644
index 0000000..c33a006
--- /dev/null
+++ b/block/gluster.c
@@ -0,0 +1,483 @@
+/*
+ * GlusterFS backend for QEMU
+ *
+ * (AIO implementation is derived from block/rbd.c)
+ *
+ * Copyright (C) 2012 Bharata B Rao <address@hidden>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+#include "block_int.h"
+#include <glusterfs/api/glfs.h>
+
+typedef struct GlusterConf {
+ char server[HOST_NAME_MAX];
+ int port;
+ char volname[128]; /* TODO: use GLUSTERD_MAX_VOLUME_NAME */
+ char image[PATH_MAX];
+} GlusterConf;
+
+typedef struct GlusterAIOCB {
+ BlockDriverAIOCB common;
+ QEMUIOVector *qiov;
+ char *bounce;
+ struct BDRVGlusterState *s;
+ int cancelled;
+} GlusterAIOCB;
+
+typedef struct GlusterCBKData {
+ GlusterAIOCB *acb;
+ struct BDRVGlusterState *s;
+ int64_t size;
+ int ret;
+} GlusterCBKData;
+
+typedef struct BDRVGlusterState {
+ struct glfs *glfs;
+ int fds[2];
+ int open_flags;
+ struct glfs_fd *fd;
+ int qemu_aio_count;
+ int event_reader_pos;
+ GlusterCBKData *event_gcbk;
+} BDRVGlusterState;
+
+#define GLUSTER_FD_READ 0
+#define GLUSTER_FD_WRITE 1
+
+static void qemu_gluster_complete_aio(GlusterCBKData *gcbk)
+{
+ GlusterAIOCB *acb = gcbk->acb;
+ int ret;
+
+ if (acb->cancelled) {
+ qemu_aio_release(acb);
+ goto done;
+ }
+
+ if (gcbk->ret == gcbk->size) {
+ ret = 0; /* Success */
+ } else if (gcbk->ret < 0) {
+ ret = gcbk->ret; /* Read/Write failed */
+ } else {
+ ret = -EINVAL; /* Partial read/write - fail it */
+ }
+ acb->common.cb(acb->common.opaque, ret);
+ qemu_aio_release(acb);
+
+done:
+ g_free(gcbk);
+}
+
+static void qemu_gluster_aio_event_reader(void *opaque)
+{
+ BDRVGlusterState *s = opaque;
+ ssize_t ret;
+
+ do {
+ char *p = (char *)&s->event_gcbk;
+
+ ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos,
+ sizeof(s->event_gcbk) - s->event_reader_pos);
+ if (ret > 0) {
+ s->event_reader_pos += ret;
+ if (s->event_reader_pos == sizeof(s->event_gcbk)) {
+ s->event_reader_pos = 0;
+ qemu_gluster_complete_aio(s->event_gcbk);
+ s->qemu_aio_count--;
+ }
+ }
+ } while (ret < 0 && errno == EINTR);
+}
+
+static int qemu_gluster_aio_flush_cb(void *opaque)
+{
+ BDRVGlusterState *s = opaque;
+
+ return (s->qemu_aio_count > 0);
+}
+
+/*
+ * file=protocol:address@hidden:volname:image
+ */
+static int qemu_gluster_parsename(GlusterConf *c, const char *filename)
+{
+ char *file = g_strdup(filename);
+ char *token, *next_token, *saveptr;
+ char *token_s, *next_token_s, *saveptr_s;
+ int ret = -EINVAL;
+
+ /* Discard the protocol */
+ token = strtok_r(file, ":", &saveptr);
+ if (!token) {
+ goto out;
+ }
+
+ /* address@hidden */
+ next_token = strtok_r(NULL, ":", &saveptr);
+ if (!next_token) {
+ goto out;
+ }
+ if (strchr(next_token, '@')) {
+ token_s = strtok_r(next_token, "@", &saveptr_s);
+ if (!token_s) {
+ goto out;
+ }
+ strncpy(c->server, token_s, HOST_NAME_MAX);
+ next_token_s = strtok_r(NULL, "@", &saveptr_s);
+ if (!next_token_s) {
+ goto out;
+ }
+ c->port = atoi(next_token_s);
+ } else {
+ strncpy(c->server, next_token, HOST_NAME_MAX);
+ c->port = 0;
+ }
+
+ /* volname */
+ next_token = strtok_r(NULL, ":", &saveptr);
+ if (!next_token) {
+ goto out;
+ }
+ strncpy(c->volname, next_token, 128);
+
+ /* image */
+ next_token = strtok_r(NULL, ":", &saveptr);
+ if (!next_token) {
+ goto out;
+ }
+ strncpy(c->image, next_token, PATH_MAX);
+ ret = 0;
+out:
+ g_free(file);
+ return ret;
+}
+
+static struct glfs *qemu_gluster_init(GlusterConf *c, const char *filename)
+{
+ struct glfs *glfs = NULL;
+ int ret;
+
+ ret = qemu_gluster_parsename(c, filename);
+ if (ret < 0) {
+ errno = -ret;
+ goto out;
+ }
+
+ glfs = glfs_new(c->volname);
+ if (!glfs) {
+ goto out;
+ }
+
+ ret = glfs_set_volfile_server(glfs, "socket", c->server, c->port);
+ if (ret < 0) {
+ goto out;
+ }
+
+ /*
+ * TODO: Logging is not necessary but instead nice to have.
+ * Can QEMU optionally log into a standard place ?
+ * Need to use defines like gf_loglevel_t:GF_LOG_INFO instead of
+ * hard coded values like 7 here.
+ */
+ ret = glfs_set_logging(glfs, "/tmp/qemu-gluster.log", 7);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = glfs_init(glfs);
+ if (ret < 0) {
+ goto out;
+ }
+ return glfs;
+
+out:
+ if (glfs) {
+ (void)glfs_fini(glfs);
+ }
+ return NULL;
+}
+
+static int qemu_gluster_open(BlockDriverState *bs, const char *filename,
+ int bdrv_flags)
+{
+ BDRVGlusterState *s = bs->opaque;
+ GlusterConf *c = g_malloc(sizeof(GlusterConf));
+ int ret;
+
+ s->glfs = qemu_gluster_init(c, filename);
+ if (!s->glfs) {
+ ret = -errno;
+ goto out;
+ }
+
+ s->open_flags |= O_BINARY;
+ s->open_flags &= ~O_ACCMODE;
+ if (bdrv_flags & BDRV_O_RDWR) {
+ s->open_flags |= O_RDWR;
+ } else {
+ s->open_flags |= O_RDONLY;
+ }
+
+ if ((bdrv_flags & BDRV_O_NOCACHE)) {
+ s->open_flags |= O_DIRECT;
+ }
+
+ s->fd = glfs_open(s->glfs, c->image, s->open_flags);
+ if (!s->fd) {
+ ret = -errno;
+ goto out;
+ }
+
+ ret = qemu_pipe(s->fds);
+ if (ret < 0) {
+ goto out;
+ }
+ fcntl(s->fds[0], F_SETFL, O_NONBLOCK);
+ fcntl(s->fds[1], F_SETFL, O_NONBLOCK);
+ qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ],
+ qemu_gluster_aio_event_reader, NULL, qemu_gluster_aio_flush_cb, s);
+ g_free(c);
+ return ret;
+
+out:
+ g_free(c);
+ if (s->fd) {
+ glfs_close(s->fd);
+ }
+ if (s->glfs) {
+ (void) glfs_fini(s->glfs);
+ }
+ return ret;
+}
+
+static int qemu_gluster_create(const char *filename,
+ QEMUOptionParameter *options)
+{
+ struct glfs *glfs;
+ struct glfs_fd *fd;
+ GlusterConf *c = g_malloc(sizeof(GlusterConf));
+ int ret = 0;
+ int64_t total_size = 0;
+
+ glfs = qemu_gluster_init(c, filename);
+ if (!glfs) {
+ ret = -errno;
+ goto out;
+ }
+
+ /* Read out options */
+ while (options && options->name) {
+ if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+ total_size = options->value.n / BDRV_SECTOR_SIZE;
+ }
+ options++;
+ }
+
+ fd = glfs_creat(glfs, c->image, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY,
S_IRWXU);
+ if (!fd) {
+ ret = -errno;
+ } else {
+ if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
+ ret = -errno;
+ }
+ if (glfs_close(fd) != 0) {
+ ret = -errno;
+ }
+ }
+out:
+ g_free(c);
+ if (glfs) {
+ (void) glfs_fini(glfs);
+ }
+ return ret;
+}
+
+static AIOPool gluster_aio_pool = {
+ .aiocb_size = sizeof(GlusterAIOCB),
+};
+
+static int qemu_gluster_send_pipe(BDRVGlusterState *s, GlusterCBKData *gcbk)
+{
+ int ret = 0;
+ while (1) {
+ fd_set wfd;
+ int fd = s->fds[GLUSTER_FD_WRITE];
+
+ ret = write(fd, (void *)&gcbk, sizeof(gcbk));
+ if (ret >= 0) {
+ break;
+ }
+ if (errno == EINTR) {
+ continue;
+ }
+ if (errno != EAGAIN) {
+ break;
+ }
+
+ FD_ZERO(&wfd);
+ FD_SET(fd, &wfd);
+ do {
+ ret = select(fd + 1, NULL, &wfd, NULL, NULL);
+ } while (ret < 0 && errno == EINTR);
+ }
+ return ret;
+}
+
+static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
+{
+ GlusterCBKData *gcbk = (GlusterCBKData *)arg;
+ BDRVGlusterState *s = gcbk->s;
+
+ gcbk->ret = ret;
+ if (qemu_gluster_send_pipe(s, gcbk) < 0) {
+ error_report("Could not complete read/write/flush from gluster");
+ abort();
+ }
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque, int write)
+{
+ int ret;
+ GlusterAIOCB *acb;
+ GlusterCBKData *gcbk;
+ BDRVGlusterState *s = bs->opaque;
+ size_t size;
+ off_t offset;
+
+ acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque);
+ acb->qiov = qiov;
+ acb->s = s;
+
+ offset = sector_num * BDRV_SECTOR_SIZE;
+ size = nb_sectors * BDRV_SECTOR_SIZE;
+ s->qemu_aio_count++;
+
+ gcbk = g_malloc(sizeof(GlusterCBKData));
+ gcbk->acb = acb;
+ gcbk->s = s;
+ gcbk->size = size;
+
+ if (write) {
+ ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
+ &gluster_finish_aiocb, gcbk);
+ } else {
+ ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
+ &gluster_finish_aiocb, gcbk);
+ }
+
+ if (ret < 0) {
+ goto out;
+ }
+ return &acb->common;
+
+out:
+ g_free(gcbk);
+ s->qemu_aio_count--;
+ qemu_aio_release(acb);
+ return NULL;
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque,
0);
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque,
1);
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs,
+ BlockDriverCompletionFunc *cb, void *opaque)
+{
+ int ret;
+ GlusterAIOCB *acb;
+ GlusterCBKData *gcbk;
+ BDRVGlusterState *s = bs->opaque;
+
+ acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque);
+ acb->s = s;
+ s->qemu_aio_count++;
+
+ gcbk = g_malloc(sizeof(GlusterCBKData));
+ gcbk->acb = acb;
+ gcbk->s = s;
+ gcbk->size = 0;
+
+ ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, gcbk);
+ if (ret < 0) {
+ goto out;
+ }
+ return &acb->common;
+
+out:
+ g_free(gcbk);
+ s->qemu_aio_count--;
+ qemu_aio_release(acb);
+ return NULL;
+}
+
+static int64_t qemu_gluster_getlength(BlockDriverState *bs)
+{
+ BDRVGlusterState *s = bs->opaque;
+ struct stat st;
+ int ret;
+
+ ret = glfs_fstat(s->fd, &st);
+ if (ret < 0) {
+ return -errno;
+ } else {
+ return st.st_size;
+ }
+}
+
+static void qemu_gluster_close(BlockDriverState *bs)
+{
+ BDRVGlusterState *s = bs->opaque;
+
+ if (s->fd) {
+ glfs_close(s->fd);
+ s->fd = NULL;
+ }
+}
+
+static QEMUOptionParameter qemu_gluster_create_options[] = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ { NULL }
+};
+
+static BlockDriver bdrv_gluster = {
+ .format_name = "gluster",
+ .protocol_name = "gluster",
+ .instance_size = sizeof(BDRVGlusterState),
+ .bdrv_file_open = qemu_gluster_open,
+ .bdrv_close = qemu_gluster_close,
+ .bdrv_create = qemu_gluster_create,
+ .bdrv_getlength = qemu_gluster_getlength,
+
+ .bdrv_aio_readv = qemu_gluster_aio_readv,
+ .bdrv_aio_writev = qemu_gluster_aio_writev,
+ .bdrv_aio_flush = qemu_gluster_aio_flush,
+
+ .create_options = qemu_gluster_create_options,
+};
+
+static void bdrv_gluster_init(void)
+{
+ bdrv_register(&bdrv_gluster);
+}
+
+block_init(bdrv_gluster_init);