qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH] iSCSI support


From: Stefan Hajnoczi
Subject: Re: [Qemu-devel] [PATCH] iSCSI support
Date: Sat, 19 Feb 2011 15:34:16 +0000

On Fri, Feb 4, 2011 at 4:37 AM,  <address@hidden> wrote:
> From: Ronnie Sahlberg <address@hidden>
>
> This patch adds a new block driver : block.iscsi.c
> This driver interfaces with the multiplatform posix library
> for iscsi initiator/client access to iscsi devices hosted at
> git://github.com/sahlberg/libiscsi.git
>
> The patch adds the driver to interface with the iscsi library.
> It also updated the configure script to
> * by default, probe is libiscsi is available and if so, build
>  qemu against libiscsi.
> * --enable-libiscsi
>  Force a build against libiscsi. If libiscsi is not available
>  the build will fail.
> * --disable-libiscsi
>  Do not link against libiscsi, even if it is available.
>
> When linked with libiscsi, qemu gains support to access iscsi resources
> such as disks and cdrom directly, without having to make the devices visible
> to the host.
>
> You can specify devices using a iscsi url of the form :
> iscsi://[<username>%<password>@]<host>[:<port]/<target-iqn-name>/<lun>

>From what I can tell there is no standard iSCSI URI scheme but it's
worth checking before we invent our own.

I thought that '%' is an escape character in URIs.  Are you sure it's
valid to use it as a delimeter between the username and password?

There is a security risk in passing the password on the QEMU
command-line since other users will be able to see it in ps(1) output.
 Encrypted image files (e.g. qcow2) require keys and this is closest
to a mechanism for securely providing the password either via QMP or
interactively.  That would be worth looking into.

>
> Example:
> -drive file=iscsi://10.1.1.1:3260/iqn.ronnie.test/1
>
> -cdrom iscsi://10.1.1.1:3260/iqn.ronnie.test/2
>
> Signed-off-by: Ronnie Sahlberg <address@hidden>
> ---
>  Makefile.objs |    1 +
>  block/iscsi.c |  542 
> +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  configure     |   31 ++++
>  trace-events  |    7 +
>  4 files changed, 581 insertions(+), 0 deletions(-)
>  create mode 100644 block/iscsi.c
>
> diff --git a/Makefile.objs b/Makefile.objs
> index f1c7bfe..a62fcdd 100644
> --- a/Makefile.objs
> +++ b/Makefile.objs
> @@ -25,6 +25,7 @@ block-nested-y += qed-check.o
>  block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
>  block-nested-$(CONFIG_WIN32) += raw-win32.o
>  block-nested-$(CONFIG_POSIX) += raw-posix.o
> +block-nested-$(CONFIG_LIBISCSI) += iscsi.o
>  block-nested-$(CONFIG_CURL) += curl.o
>  block-nested-$(CONFIG_RBD) += rbd.o
>
> diff --git a/block/iscsi.c b/block/iscsi.c
> new file mode 100644
> index 0000000..8cb1c6f
> --- /dev/null
> +++ b/block/iscsi.c
> @@ -0,0 +1,542 @@
> +/*
> + * QEMU Block driver for iSCSI images
> + *
> + * Copyright (c) 2010 Ronnie Sahlberg <address@hidden>
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this software and associated documentation files (the "Software"), to 
> deal
> + * in the Software without restriction, including without limitation the 
> rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
> FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "config-host.h"
> +
> +#include <poll.h>
> +#include "sysemu.h"
> +#include "qemu-common.h"
> +#include "qemu-error.h"
> +#include "block_int.h"
> +#include "trace.h"
> +
> +#include <iscsi/iscsi.h>
> +#include <iscsi/scsi-lowlevel.h>
> +
> +
> +typedef struct IscsiLun {
> +    struct iscsi_context *iscsi;
> +    int lun;
> +    int block_size;
> +    unsigned long num_blocks;
> +} IscsiLun;
> +
> +typedef struct IscsiAIOCB {
> +    BlockDriverAIOCB common;
> +    QEMUIOVector *qiov;
> +    QEMUBH *bh;
> +    IscsiLun *iscsilun;
> +    uint8_t *buf;
> +    int canceled;
> +    int status;
> +    size_t read_size;
> +} IscsiAIOCB;
> +
> +struct IscsiTask {
> +    IscsiLun *iscsilun;
> +    int status;
> +    int complete;
> +};
> +
> +static int
> +iscsi_is_inserted(BlockDriverState *bs)
> +{
> +    IscsiLun *iscsilun = bs->opaque;
> +    struct iscsi_context *iscsi = iscsilun->iscsi;
> +
> +    return iscsi_is_logged_in(iscsi);
> +}

Why is this needed?

> +
> +
> +static void
> +iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
> +{
> +    IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
> +
> +    acb->status = -EIO;

-ECANCELED?

> +static void
> +iscsi_readv_writev_bh_cb(void *p)
> +{
> +    IscsiAIOCB *acb = p;
> +
> +    qemu_bh_delete(acb->bh);
> +
> +    acb->common.cb(acb->common.opaque, acb->status);

I think we should double-check that acb->status != -ECANCELED here
just in case we scheduled the BH but someone cancelled the request
before the BH was executed.  This would be a really odd case but I
think it's theoretically possible.

> +static BlockDriverAIOCB *
> +iscsi_aio_writev(BlockDriverState *bs, int64_t sector_num,
> +                 QEMUIOVector *qiov, int nb_sectors,
> +                 BlockDriverCompletionFunc *cb,
> +                 void *opaque)
> +{
> +    IscsiLun *iscsilun = bs->opaque;
> +    struct iscsi_context *iscsi = iscsilun->iscsi;
> +    IscsiAIOCB *acb;
> +    size_t size;
> +    int ret;
> +
> +    acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
> +    trace_iscsi_aio_writev(iscsi, sector_num, nb_sectors, opaque, acb);
> +    if (!acb) {
> +        return NULL;
> +    }
> +
> +    acb->iscsilun = iscsilun;
> +    acb->qiov     = qiov;
> +
> +    acb->canceled   = 0;
> +
> +    /* XXX we should pass the iovec to write10 to avoid the extra copy */
> +    /* this will allow us to get rid of 'buf' completely */
> +    size = nb_sectors * BDRV_SECTOR_SIZE;
> +    acb->buf = qemu_malloc(size);
> +    qemu_iovec_to_buffer(acb->qiov, acb->buf);
> +    ret = iscsi_write10_async(iscsi, iscsilun->lun, acb->buf, size,
> +                              sector_qemu2lun(sector_num, iscsilun),
> +                              0, 0, iscsilun->block_size,
> +                              iscsi_aio_write10_cb, acb);

We're not obeying LUN block size here.  In order to do that we need
read-write-modify.  See comments on iscsi_aio_readv() below.

> +    if (ret != 0) {
> +        error_report("iSCSI: Failed to send write10 command. %s",
> +                     iscsi_get_error(iscsi));
> +        qemu_free(acb->buf);
> +        qemu_aio_release(acb);
> +        return NULL;
> +    }
> +
> +    iscsi_set_events(iscsilun);
> +
> +    return &acb->common;
> +}
> +
> +static void
> +iscsi_aio_read10_cb(struct iscsi_context *iscsi, int status,
> +                    void *command_data, void *opaque)
> +{
> +    IscsiAIOCB *acb = opaque;
> +    struct scsi_task *scsi = command_data;
> +
> +    trace_iscsi_aio_read10_cb(iscsi, status, acb, acb->canceled);
> +
> +    if (acb->canceled != 0) {
> +        qemu_aio_release(acb);
> +        return;
> +    }
> +
> +    acb->status = 0;
> +    if (status < 0) {
> +        error_report("Failed to read10 data from iSCSI lun. %s",
> +                     iscsi_get_error(iscsi));
> +        acb->status = -EIO;
> +    } else {
> +        qemu_iovec_from_buffer(acb->qiov, scsi->datain.data, acb->read_size);

Need to offset from sector_num rounded down to LUN block size.

> +    }
> +
> +    iscsi_schedule_bh(iscsi_readv_writev_bh_cb, acb);
> +}
> +
> +static BlockDriverAIOCB *
> +iscsi_aio_readv(BlockDriverState *bs, int64_t sector_num,
> +                QEMUIOVector *qiov, int nb_sectors,
> +                BlockDriverCompletionFunc *cb,
> +                void *opaque)
> +{
> +    IscsiLun *iscsilun = bs->opaque;
> +    struct iscsi_context *iscsi = iscsilun->iscsi;
> +    IscsiAIOCB *acb;
> +    size_t qemu_read_size, lun_read_size;
> +    int ret;
> +
> +    qemu_read_size = BDRV_SECTOR_SIZE * (size_t)nb_sectors;
> +    lun_read_size  = (qemu_read_size + iscsilun->block_size - 1)
> +      / iscsilun->block_size * iscsilun->block_size;
> +
> +    acb = qemu_aio_get(&iscsi_aio_pool, bs, cb, opaque);
> +    trace_iscsi_aio_readv(iscsi, sector_num, nb_sectors, opaque, acb);
> +    if (!acb) {
> +        return NULL;
> +    }
> +
> +    acb->iscsilun = iscsilun;
> +    acb->qiov     = qiov;
> +
> +    acb->canceled   = 0;
> +    acb->read_size  = qemu_read_size;
> +    acb->buf        = NULL;
> +
> +    ret = iscsi_read10_async(iscsi, iscsilun->lun,
> +                             sector_qemu2lun(sector_num, iscsilun),
> +                             lun_read_size, iscsilun->block_size,
> +                             iscsi_aio_read10_cb, acb);

lba and datalen seem incorrect here.  lun_read_size has been rounded
up to the next LUN block.  But sector_num might not be LUN block
aligned so we need to extend lun_read_size even further in order to
round down lba to LUN block size.

The cleanest way to support this would be for BlockDriverState to
supply a block size but we currently keep that separate in BlockConf
and have it set from device emulation (e.g. scsi-disk.c) code.  So in
the near term we need to emulated BDRV_SECTOR_SIZE accesses.

> +    if (ret != 0) {
> +        error_report("iSCSI: Failed to send read10 command. %s",
> +                     iscsi_get_error(iscsi));
> +        qemu_aio_release(acb);
> +        return NULL;
> +    }
> +
> +    iscsi_set_events(iscsilun);
> +
> +    return &acb->common;
> +}
> +
> +
> +static void synccache10_cb(struct iscsi_context *iscsi, int status,
> +                           void *command_data, void *opaque)
> +{
> +    struct IscsiTask *task = opaque;
> +
> +    task->status   = status;
> +    task->complete = 1;
> +}
> +
> +static int
> +iscsi_flush(BlockDriverState *bs)
> +{
> +    IscsiLun *iscsilun = bs->opaque;
> +    struct iscsi_context *iscsi = iscsilun->iscsi;
> +    struct IscsiTask task;
> +    int ret;
> +
> +    task.iscsilun = iscsilun;
> +    task.status = 0;
> +    task.complete = 0;
> +
> +    ret = iscsi_synchronizecache10_async(iscsi, iscsilun->lun,
> +                                         0, 0, 0, 0,
> +                                         synccache10_cb,
> +                                         &task);
> +    if (ret != 0) {
> +        error_report("iSCSI: Failed to send SYNCHRONIZECACHE10. %s",
> +                     iscsi_get_error(iscsi));
> +        return -1;

Please use an errno here since this function returns a negative errno value.

> +static BlockDriver bdrv_iscsi = {
> +    .format_name     = "iscsi",
> +    .protocol_name   = "iscsi",
> +
> +    .instance_size   = sizeof(IscsiLun),
> +    .bdrv_file_open  = iscsi_open,
> +    .bdrv_close      = iscsi_close,
> +    .bdrv_flush      = iscsi_flush,
> +
> +    .bdrv_getlength  = iscsi_getlength,
> +
> +    .bdrv_aio_readv  = iscsi_aio_readv,
> +    .bdrv_aio_writev = iscsi_aio_writev,

Please implement .bdrv_aio_flush.  Synchronous functions like
.bdrv_flush will block the VM so asynchronous functions are much
preferred.

Stefan



reply via email to

[Prev in Thread] Current Thread [Next in Thread]