[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-block] [PATCH] tcmu: Introduce qemu-tcmu utility
From: |
Yaowei Bai |
Subject: |
[Qemu-block] [PATCH] tcmu: Introduce qemu-tcmu utility |
Date: |
Fri, 21 Dec 2018 18:16:27 +0800 |
This patch introduces a new utility, qemu-tcmu. Apart from the
underlaying protocol it interacts with the world much like
qemu-nbd. This patch bases on Fam's version.
Qemu-tcmu handles SCSI commands which are passed through userspace
from kernel by LIO subsystem using TCMU protocol. Libtcmu is the
library for processing TCMU protocol in userspace. With qemu-tcmu,
we can export images/formats like qcow2, rbd, etc. that qemu supports
using iSCSI protocol or loopback for remote or local access.
Currently qemu-tcmu implements several SCSI command helper functions
to work. Our goal is to refactor and reuse SCSI code in scsi-disk.
Please refer to docs/tcmu.txt to use qemu-tcmu. We test it on CentOS
7.3.(Please use 3.10.0-514 or lower version kernel, there's one issuse
in higher kernel version we're resolving.)
Cc: Mike Christie <address@hidden>
Cc: Amar Tumballi<address@hidden>
Cc: Prasanna Kalever <address@hidden>
Cc: Paolo Bonzini <address@hidden>
Signed-off-by: Fam Zheng <address@hidden>
Signed-off-by: Yaowei Bai <address@hidden>
Signed-off-by: Xiubo Li <address@hidden>
---
Makefile | 1 +
Makefile.objs | 3 +-
configure | 45 ++++
docs/tcmu.txt | 91 +++++++
include/tcmu/tcmu.h | 14 +
qemu-tcmu.c | 214 +++++++++++++++
tcmu/Makefile.objs | 5 +
tcmu/helper.c | 741 ++++++++++++++++++++++++++++++++++++++++++++++++++++
tcmu/helper.h | 31 +++
tcmu/tcmu.c | 598 ++++++++++++++++++++++++++++++++++++++++++
tcmu/trace-events | 12 +
11 files changed, 1754 insertions(+), 1 deletion(-)
create mode 100644 docs/tcmu.txt
create mode 100644 include/tcmu/tcmu.h
create mode 100644 qemu-tcmu.c
create mode 100644 tcmu/Makefile.objs
create mode 100644 tcmu/helper.c
create mode 100644 tcmu/helper.h
create mode 100644 tcmu/tcmu.c
create mode 100644 tcmu/trace-events
diff --git a/Makefile b/Makefile
index 038780c..351e9d4 100644
--- a/Makefile
+++ b/Makefile
@@ -483,6 +483,7 @@ qemu-img.o: qemu-img-cmds.h
qemu-img$(EXESUF): qemu-img.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y)
$(qom-obj-y) $(COMMON_LDADDS)
qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y)
$(qom-obj-y) $(COMMON_LDADDS)
qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y)
$(qom-obj-y) $(COMMON_LDADDS)
+qemu-tcmu$(EXESUF): qemu-tcmu.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y)
$(qom-obj-y) $(COMMON_LDADDS)
qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o $(COMMON_LDADDS)
diff --git a/Makefile.objs b/Makefile.objs
index 56af034..8f96c42 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -26,7 +26,7 @@ block-obj-y += block/ scsi/
block-obj-y += qemu-io-cmds.o
block-obj-$(CONFIG_REPLICATION) += replication.o
-block-obj-m = block/
+block-obj-m = block/ tcmu/
#######################################################################
# crypto-obj-y is code used by both qemu system emulation and qemu-img
@@ -196,6 +196,7 @@ trace-events-subdirs += target/mips
trace-events-subdirs += target/ppc
trace-events-subdirs += target/s390x
trace-events-subdirs += target/sparc
+trace-events-subdirs += tcmu
trace-events-subdirs += ui
trace-events-subdirs += util
diff --git a/configure b/configure
index 224d307..d41e4e9 100755
--- a/configure
+++ b/configure
@@ -346,6 +346,7 @@ fdt=""
netmap="no"
sdl=""
sdlabi=""
+tcmu=""
virtfs=""
mpath=""
vnc="yes"
@@ -1034,6 +1035,10 @@ for opt do
# configure to be used by RPM and similar macros that set
# lots of directory switches by default.
;;
+ --enable-tcmu) tcmu="yes"
+ ;;
+ --disable-tcmu) tcmu="no"
+ ;;
--disable-sdl) sdl="no"
;;
--enable-sdl) sdl="yes"
@@ -3607,6 +3612,36 @@ else
fi
##########################################
+# tcmu support probe
+
+if test "$tcmu" != "no"; then
+ # Sanity check for gio-unix-2.0 (part of glib2), cannot fail unless something
+ # is very wrong.
+ if ! $pkg_config gio-unix-2.0; then
+ error_exit "glib is required to compile QEMU"
+ fi
+ cat > $TMPC <<EOF
+#include <stdio.h>
+#include <libtcmu.h>
+
+int main(int argc, char **argv)
+{
+ struct tcmulib_context *ctx = tcmulib_initialize(NULL, 0);
+ tcmulib_register(ctx);
+ return ctx != NULL;
+}
+EOF
+ if compile_prog "" "-ltcmu" ; then
+ tcmu=yes
+ tcmu_libs="-ltcmu"
+ elif test "$tcmu" == "yes"; then
+ feature_not_found "libtcmu" "Install libtcmu devel (>=1.0.5)"
+ else
+ tcmu=no
+ fi
+fi
+
+##########################################
# libmpathpersist probe
if test "$mpath" != "no" ; then
@@ -5756,6 +5791,9 @@ if test "$want_tools" = "yes" ; then
if [ "$posix" = "yes" ] && [ "$curl" = "yes" ]; then
tools="elf2dmp $tools"
fi
+ if [ "$linux" = "yes" -a "$tcmu" = "yes" ] ; then
+ tools="qemu-tcmu\$(EXESUF) $tools"
+ fi
fi
if test "$softmmu" = yes ; then
if test "$linux" = yes; then
@@ -6142,6 +6180,7 @@ echo "capstone $capstone"
echo "docker $docker"
echo "libpmem support $libpmem"
echo "libudev $libudev"
+echo "tcmu support $tcmu"
if test "$sdl_too_old" = "yes"; then
echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -6782,6 +6821,12 @@ if test "$live_block_migration" = "yes" ; then
echo "CONFIG_LIVE_BLOCK_MIGRATION=y" >> $config_host_mak
fi
+if test "$tcmu" = "yes" ; then
+ echo "CONFIG_TCMU=m" >> $config_host_mak
+ echo "TCMU_CFLAGS=$tcmu_cflags" >> $config_host_mak
+ echo "TCMU_LIBS=$tcmu_libs" >> $config_host_mak
+fi
+
if test "$tpm" = "yes"; then
echo 'CONFIG_TPM=$(CONFIG_SOFTMMU)' >> $config_host_mak
# TPM passthrough support?
diff --git a/docs/tcmu.txt b/docs/tcmu.txt
new file mode 100644
index 0000000..ffe5f85
--- /dev/null
+++ b/docs/tcmu.txt
@@ -0,0 +1,91 @@
+Introduction
+-------------------------
+TCMU is the abbreviation of TCM in Userspace and TCM is another
+name for LIO, an ISCSI target in Linux kernel. TCM can serve
+file, block device, RAM, etc as storage backend for ISCSI target
+totally in kernel. But for userspace storage like Glusterfs and
+Ceph, it's hard for TCM to handle as backend storage. TCMU is used
+in this situation by utilizing UIO ring buffer to passthrough
+userspace so a userspace program can process SCSI command by handling
+TCMU protocol. Qemu-tcmu is such userspace program which can export
+any format/protocol that QEMU supports as ISCSI target or loopback
+by linking to libtcmu in tcmu-runner(a userspace helper daemon to
+handle TCMU interfaces).
+
+Installation
+--------------------
+Qemu-tcmu depends on libtcmu/tcmu-runner to handle TCMU userspace
+interfaces and targetcli-fb and other utilities to manage ISCSI
+targets.
+
+1. install and config tcmu-runner
+
+ # git clone https://github.com/open-iscsi/tcmu-runner
+ # cd tcmu-runner
+ # cmake -DSUPPORT_SYSTEMD=ON -DCMAKE_INSTALL_PREFIX=/usr
+ # make install
+ # systemctl daemon-reload
+ # systemctl enable tcmu-runner
+ # systemctl start tcmu-runner
+
+2. install rtslib-fb
+
+ # git clone https://github.com/open-iscsi/rtslib-fb.git
+ # cd rtslib-fb
+ # python setup.py install
+
+3. install configshell-fb
+
+ # git clone https://github.com/open-iscsi/configshell-fb.git
+ # cd configshell-fb
+ # python setup.py install
+
+4. install targetcli-fb
+
+ # git clone https://github.com/open-iscsi/targetcli-fb.git
+ # cd targetcli-fb
+ # python setup.py install
+
+5. install qemu-tcmu
+
+ # git clone https://github.com/qemu/qemu.git
+ # cd qemu
+ # ./configure --target-list=x86_64-softmmu \
+ --enable-libiscsi \
+ --enable-tcmu
+ # make -j
+ # make -j install
+
+Now we can use qemu-tcmu to export images.
+
+1. create backend storage file
+
+ # qemu-img create test.file 1G
+
+2. load TCMU kernel module
+
+ # modprobe target_core_user
+
+3. start qemu-tcmu
+
+ # qemu-tcmu
+
+4. configure ISCSI target via targetcli
+
+ # IQN=iqn.2016-11.org.test:qemu-tcmu-test
+ # targetcli /backstores/user:qemu create qemulun 1G
@address@hidden/root/test.file
+ # targetcli /iscsi create $IQN
+ # targetcli /iscsi/$IQN/tpg1 set attribute \
+ authentication=0 \
+ generate_node_acls=1 \
+ demo_mode_write_protect=0 \
+ prod_mode_write_protect=0
+ # targetcli /iscsi/$IQN/tpg1/luns create /backstores/user:qemu/qemulun
+
+Then you can connect this exported target on another initiator host.
+
+Others
+------
+More infomation about TCMU and tcmu-runner please refer to
+Documentation/target/tcmu-design.txt in Linux kernel and
+https://github.com/open-iscsi/tcmu-runner.
diff --git a/include/tcmu/tcmu.h b/include/tcmu/tcmu.h
new file mode 100644
index 0000000..656a545
--- /dev/null
+++ b/include/tcmu/tcmu.h
@@ -0,0 +1,14 @@
+#ifndef QEMU_TCMU_H
+#define QEMU_TCMU_H
+
+#include "qemu-common.h"
+
+typedef struct TCMUExport TCMUExport;
+extern QemuOptsList qemu_tcmu_export_opts;
+
+void qemu_tcmu_stop(void);
+void qemu_tcmu_start(const char *subtype, Error **errp);
+TCMUExport *tcmu_export_new(BlockBackend *blk, bool writable, Error **errp);
+int export_init_func(void *opaque, QemuOpts *all_opts, Error **errp);
+
+#endif
diff --git a/qemu-tcmu.c b/qemu-tcmu.c
new file mode 100644
index 0000000..85e348f
--- /dev/null
+++ b/qemu-tcmu.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright 2016 Red Hat, Inc.
+ *
+ * TCMU Handler Program
+ *
+ * Authors:
+ * Fam Zheng <address@hidden>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
+#include "sysemu/block-backend.h"
+#include "block/block_int.h"
+#include "qemu/main-loop.h"
+#include "qemu/error-report.h"
+#include "qemu/config-file.h"
+#include "qemu/bswap.h"
+#include "qemu/log.h"
+#include "qemu/option.h"
+#include "block/snapshot.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
+#include "qom/object_interfaces.h"
+#include "crypto/init.h"
+#include "trace/control.h"
+#include "tcmu/tcmu.h"
+#include <getopt.h>
+#include "qemu-version.h"
+
+#define QEMU_TCMU_OPT_OBJECT 260
+
+static int verbose;
+static enum { RUNNING, TERMINATING, TERMINATED } state;
+
+static void usage(const char *name)
+{
+ (printf) (
+"Usage:\n"
+"%s [OPTIONS]\n"
+"QEMU TCMU Handler\n"
+"\n"
+" -h, --help display this help and exit\n"
+" -V, --version output version information and exit\n"
+"\n"
+"General purpose options:\n"
+" -v, --verbose display extra debugging information\n"
+" -x, --handler-name=NAME handler name to be used as the subtype for TCMU\n"
+" --object type,id=ID,... define an object such as 'secret' for providing\n"
+" passwords and/or encryption keys\n"
+" -T, --trace [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
+" specify tracing options\n"
+"\n"
+"Report bugs to <address@hidden>\n"
+ , name);
+}
+
+static void version(const char *name)
+{
+ printf("%s v" QEMU_FULL_VERSION "\n", name);
+}
+
+static void termsig_handler(int signum)
+{
+ atomic_cmpxchg(&state, RUNNING, TERMINATING);
+ qemu_notify_event();
+}
+
+static QemuOptsList qemu_object_opts = {
+ .name = "object",
+ .implied_opt_name = "qom-type",
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
+ .desc = {
+ { }
+ },
+};
+
+static void qemu_tcmu_shutdown(void)
+{
+ job_cancel_sync_all();
+ bdrv_close_all();
+}
+
+int main(int argc, char **argv)
+{
+ const char *sopt = "hVvx:T:";
+ bool starting = true;
+ struct option lopt[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, 'V' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "object", required_argument, NULL, QEMU_TCMU_OPT_OBJECT },
+ { "handler-name", required_argument, NULL, 'x' },
+ { "trace", required_argument, NULL, 'T' },
+ { NULL, 0, NULL, 0 }
+ };
+ int ch;
+ int opt_ind = 0;
+ Error *local_err = NULL;
+ char *trace_file = NULL;
+ const char *subtype = "qemu";
+
+ struct sigaction sa_sigterm;
+ memset(&sa_sigterm, 0, sizeof(sa_sigterm));
+ sa_sigterm.sa_handler = termsig_handler;
+ sigaction(SIGTERM, &sa_sigterm, NULL);
+ sigaction(SIGINT, &sa_sigterm, NULL);
+
+ module_call_init(MODULE_INIT_TRACE);
+ qcrypto_init(&error_fatal);
+
+ module_call_init(MODULE_INIT_QOM);
+ qemu_add_opts(&qemu_object_opts);
+ qemu_add_opts(&qemu_trace_opts);
+ qemu_init_exec_dir(argv[0]);
+
+ while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
+ switch (ch) {
+ case 'x':
+ subtype = optarg;
+ break;
+ case 'v':
+ verbose = 1;
+ break;
+ case 'V':
+ version(argv[0]);
+ exit(0);
+ break;
+ case 'h':
+ usage(argv[0]);
+ exit(0);
+ break;
+ case '?':
+ error_report("Try `%s --help' for more information.", argv[0]);
+ exit(EXIT_FAILURE);
+ case QEMU_TCMU_OPT_OBJECT: {
+ QemuOpts *opts;
+ opts = qemu_opts_parse_noisily(&qemu_object_opts,
+ optarg, true);
+ if (!opts) {
+ exit(EXIT_FAILURE);
+ }
+ } break;
+ case 'T':
+ g_free(trace_file);
+ trace_file = trace_opt_parse(optarg);
+ break;
+ }
+ }
+
+ if ((argc - optind) != 0) {
+ error_report("Invalid number of arguments");
+ error_printf("Try `%s --help' for more information.\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ if (qemu_opts_foreach(&qemu_object_opts,
+ user_creatable_add_opts_foreach,
+ NULL, NULL)) {
+ exit(EXIT_FAILURE);
+ }
+
+ if (!trace_init_backends()) {
+ exit(1);
+ }
+ trace_init_file(trace_file);
+ qemu_set_log(LOG_TRACE);
+
+ if (qemu_init_main_loop(&local_err)) {
+ error_report_err(local_err);
+ exit(EXIT_FAILURE);
+ }
+ bdrv_init();
+ atexit(qemu_tcmu_shutdown);
+
+ /* now when the initialization is (almost) complete, chdir("/")
+ * to free any busy filesystems */
+ if (chdir("/") < 0) {
+ error_report("Could not chdir to root directory: %s",
+ strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ state = RUNNING;
+ do {
+ main_loop_wait(starting);
+ if (starting) {
+ qemu_tcmu_start(subtype, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ exit(EXIT_FAILURE);
+ }
+ starting = false;
+ }
+ if (state == TERMINATING) {
+ state = TERMINATED;
+ qemu_tcmu_stop();
+ }
+ } while (state != TERMINATED);
+
+ exit(EXIT_SUCCESS);
+}
diff --git a/tcmu/Makefile.objs b/tcmu/Makefile.objs
new file mode 100644
index 0000000..9ffa5b9
--- /dev/null
+++ b/tcmu/Makefile.objs
@@ -0,0 +1,5 @@
+block-obj-$(CONFIG_TCMU) += tcmu.mo
+
+tcmu.mo-objs := tcmu.o helper.o
+tcmu.mo-cflags := $(TCMU_CFLAGS)
+tcmu.mo-libs := $(TCMU_LIBS)
diff --git a/tcmu/helper.c b/tcmu/helper.c
new file mode 100644
index 0000000..0b86b4d
--- /dev/null
+++ b/tcmu/helper.c
@@ -0,0 +1,741 @@
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 2.1 or any later version (LGPLv2.1 or
+ * later), or the Apache License 2.0.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "scsi/constants.h"
+#include "libtcmu.h"
+#include "helper.h"
+
+static int tcmu_emulate_std_inquiry(
+ uint8_t *cdb,
+ struct iovec *iovec,
+ size_t iov_cnt)
+{
+ uint8_t buf[36];
+
+ memset(buf, 0, sizeof(buf));
+
+ buf[2] = 0x05; /* SPC-3 */
+ buf[3] = 0x02; /* response data format */
+
+ /*
+ * A Third-Party Copy (3PC)
+ *
+ * Enable the XCOPY
+ */
+ buf[5] = 0x08;
+
+ buf[7] = 0x02; /* CmdQue */
+
+ memcpy(&buf[8], "LIO-ORG ", 8);
+ memset(&buf[16], 0x20, 16);
+ memcpy(&buf[16], "TCMU device", 11);
+ memcpy(&buf[32], "0002", 4);
+ buf[4] = 31; /* Set additional length to 31 */
+
+ tcmu_memcpy_into_iovec(iovec, iov_cnt, buf, sizeof(buf));
+ return TCMU_STS_OK;
+}
+
+/* This func from CCAN str/hex/hex.c. Public Domain */
+static bool char_to_hex(unsigned char *val, char c)
+{
+ if (c >= '0' && c <= '9') {
+ *val = c - '0';
+ return true;
+ }
+ if (c >= 'a' && c <= 'f') {
+ *val = c - 'a' + 10;
+ return true;
+ }
+ if (c >= 'A' && c <= 'F') {
+ *val = c - 'A' + 10;
+ return true;
+ }
+ return false;
+}
+
+static int tcmu_emulate_evpd_inquiry(
+ struct tcmu_device *dev,
+ uint8_t *cdb,
+ struct iovec *iovec,
+ size_t iov_cnt)
+{
+ switch (cdb[2]) {
+ case 0x0: /* Supported VPD pages */
+ {
+ char data[16];
+
+ memset(data, 0, sizeof(data));
+
+ /* data[1] (page code) already 0 */
+ /*
+ * spc4r22 7.7.13 The supported VPD page list shall contain
+ * a list of all VPD page codes (see 7.7) implemented by the
+ * logical unit in ascending order beginning with page code
00h
+ */
+ data[4] = 0x00;
+ data[5] = 0x80;
+ data[6] = 0x83;
+ data[7] = 0xb0;
+ data[8] = 0xb1;
+ data[9] = 0xb2;
+
+ data[3] = 6;
+
+ tcmu_memcpy_into_iovec(iovec, iov_cnt, data, sizeof(data));
+ return TCMU_STS_OK;
+ }
+ break;
+ case 0x80: /* Unit Serial Number */
+ {
+ char data[512];
+ char *wwn;
+ uint32_t len;
+
+ memset(data, 0, sizeof(data));
+
+ data[1] = 0x80;
+
+ wwn = tcmu_cfgfs_dev_get_wwn(dev);
+ if (!wwn)
+ return TCMU_STS_HW_ERR;
+
+ /*
+ * The maximum length of the unit_serial has limited
+ * to 254 Bytes in kernel, so here limit to 256 Bytes
+ * will be enough.
+ */
+ len = snprintf(&data[4], 256, "%s", wwn);
+ data[3] = len + 1;
+
+ tcmu_memcpy_into_iovec(iovec, iov_cnt, data, sizeof(data));
+
+ free(wwn);
+ return TCMU_STS_OK;
+ }
+ break;
+ case 0x83: /* Device identification */
+ {
+ char data[512];
+ char *ptr, *p, *wwn;
+ size_t len, used = 0;
+ uint16_t *tot_len = (uint16_t*) &data[2];
+ bool next;
+ int i;
+
+ memset(data, 0, sizeof(data));
+
+ data[1] = 0x83;
+
+ wwn = tcmu_cfgfs_dev_get_wwn(dev);
+ if (!wwn)
+ return TCMU_STS_HW_ERR;
+
+ ptr = &data[4];
+
+ /* 1/5: T10 Vendor id */
+ ptr[0] = 2; /* code set: ASCII */
+ ptr[1] = 1; /* identifier: T10 vendor id */
+ memcpy(&ptr[4], "LIO-ORG ", 8);
+ len = snprintf(&ptr[12], sizeof(data) - 16, "%s", wwn);
+
+ ptr[3] = 8 + len + 1;
+ used += (uint8_t)ptr[3] + 4;
+ ptr += used;
+
+ /* 2/5: NAA binary */
+ ptr[0] = 1; /* code set: binary */
+ ptr[1] = 3; /* identifier: NAA */
+ ptr[3] = 16; /* body length for naa registered extended format
*/
+
+ /*
+ * Set type 6 and use OpenFabrics IEEE Company ID: 00 14 05
+ */
+ ptr[4] = 0x60;
+ ptr[5] = 0x01;
+ ptr[6] = 0x40;
+ ptr[7] = 0x50;
+
+ /*
+ * Fill in the rest with a binary representation of WWN
+ *
+ * This implementation only uses a nibble out of every byte of
+ * WWN, but this is what the kernel does, and it's nice for our
+ * values to match.
+ */
+ next = true;
+ for (p = wwn, i = 7; *p && i < 20; p++) {
+ uint8_t val;
+
+ if (!char_to_hex(&val, *p))
+ continue;
+
+ if (next) {
+ next = false;
+ ptr[i++] |= val;
+ } else {
+ next = true;
+ ptr[i] = val << 4;
+ }
+ }
+
+ used += 20;
+ ptr += 20;
+
+ /* 3/6: Vendor specific */
+ ptr[0] = 2; /* code set: ASCII */
+ ptr[1] = 0; /* identifier: vendor-specific */
+
+ len = snprintf(&ptr[4], sizeof(data) - used - 4, "%s",
tcmu_dev_get_cfgstring(dev));
+ ptr[3] = len + 1;
+
+ used += (uint8_t)ptr[3] + 4;
+ ptr += (uint8_t)ptr[3] + 4;
+
+ /* Done with descriptor list */
+
+ *tot_len = htobe16(used);
+
+ tcmu_memcpy_into_iovec(iovec, iov_cnt, data, used + 4);
+
+ free(wwn);
+ wwn = NULL;
+
+ return TCMU_STS_OK;
+ }
+ break;
+ case 0xb0: /* Block Limits */
+ {
+ char data[64];
+ uint32_t max_xfer_length;
+ uint16_t val16;
+ uint32_t val32;
+
+ memset(data, 0, sizeof(data));
+
+ data[1] = 0xb0;
+
+ val16 = htobe16(0x3c);
+ memcpy(&data[2], &val16, 2);
+
+ /* WSNZ = 1: the device server won't support a value of zero
+ * in the NUMBER OF LOGICAL BLOCKS field in the WRITE SAME
+ * command CDBs
+ */
+ data[4] = 0x01;
+
+ /*
+ * Daemons like runner may override the user requested
+ * value due to device specific limits.
+ */
+ max_xfer_length = tcmu_dev_get_max_xfer_len(dev);
+
+ val32 = htobe32(max_xfer_length);
+ /* Max xfer length */
+ memcpy(&data[8], &val32, 4);
+ /* Optimal xfer length */
+ memcpy(&data[12], &val32, 4);
+
+ tcmu_memcpy_into_iovec(iovec, iov_cnt, data, sizeof(data));
+
+ return TCMU_STS_OK;
+ }
+ break;
+ case 0xb1: /* Block Device Characteristics VPD page */
+ {
+ char data[64];
+ uint16_t val16;
+
+ memset(data, 0, sizeof(data));
+
+ /*
+ * From spc-5 Revision 14, section 6.7.2 Standard INQUIRY data
+ * set the devive type to Direct access block device.
+ */
+ data[0] = 0x00;
+
+ /* PAGE CODE (B1h) */
+ data[1] = 0xb1;
+
+ /* PAGE LENGTH (003Ch)*/
+ val16 = htobe16(0x003c);
+ memcpy(&data[2], &val16, 2);
+
+ if (tcmu_dev_get_solid_state_media(dev)) {
+ val16 = htobe16(0x0001);
+ memcpy(&data[4], &val16, 2);
+ }
+
+ tcmu_memcpy_into_iovec(iovec, iov_cnt, data, sizeof(data));
+ return TCMU_STS_OK;
+ }
+ break;
+ case 0xb2: /* Logical Block Provisioning VPD page */
+ {
+ char data[64];
+ uint16_t val16;
+
+ memset(data, 0, sizeof(data));
+
+ /*
+ * From spc-5 Revision 14, section 6.7.2 Standard INQUIRY data
+ * set the device type to Direct access block device.
+ */
+ data[0] = 0x00;
+
+ /* PAGE CODE (B2h) */
+ data[1] = 0xb2;
+
+ /*
+ * PAGE LENGTH field: PROVISIONING GROUP DESCRIPTOR field will
be
+ * not present.
+ */
+ val16 = htobe16(0x0004);
+ memcpy(&data[2], &val16, 2);
+
+ /*
+ * The logical block provisioning read zeros (LBPRZ) field.
+ *
+ * The logical block data represented by unmapped LBAs is set
to zeros
+ */
+ data[5] = 0x04;
+
+ tcmu_memcpy_into_iovec(iovec, iov_cnt, data, sizeof(data));
+ return TCMU_STS_OK;
+ }
+ break;
+ default:
+ error_report("Vital product data page code 0x%x not support\n",
+ cdb[2]);
+ return TCMU_STS_INVALID_CDB;
+ }
+}
+
+/*
+ * Emulate INQUIRY(0x12)
+ */
+int tcmu_emulate_inquiry(
+ struct tcmu_device *dev,
+ uint8_t *cdb,
+ struct iovec *iovec,
+ size_t iov_cnt)
+{
+ if (!(cdb[1] & 0x01)) {
+ if (!cdb[2])
+ return tcmu_emulate_std_inquiry(cdb, iovec,
+ iov_cnt);
+ else
+ return TCMU_STS_INVALID_CDB;
+ } else {
+ return tcmu_emulate_evpd_inquiry(dev, cdb, iovec, iov_cnt);
+ }
+}
+
+int tcmu_emulate_test_unit_ready(
+ uint8_t *cdb,
+ struct iovec *iovec,
+ size_t iov_cnt)
+{
+ return TCMU_STS_OK;
+}
+
+int tcmu_emulate_read_capacity_10(
+ uint64_t num_lbas,
+ uint32_t block_size,
+ uint8_t *cdb,
+ struct iovec *iovec,
+ size_t iov_cnt)
+{
+ uint8_t buf[8];
+ uint32_t val32;
+
+ memset(buf, 0, sizeof(buf));
+
+ if (num_lbas < 0x100000000ULL) {
+ // Return the LBA of the last logical block, so subtract 1.
+ val32 = htobe32(num_lbas-1);
+ } else {
+ // This lets the initiator know that he needs to use
+ // Read Capacity(16).
+ val32 = 0xffffffff;
+ }
+
+ memcpy(&buf[0], &val32, 4);
+
+ val32 = htobe32(block_size);
+ memcpy(&buf[4], &val32, 4);
+
+ /* all else is zero */
+
+ tcmu_memcpy_into_iovec(iovec, iov_cnt, buf, sizeof(buf));
+
+ return TCMU_STS_OK;
+}
+
+int tcmu_emulate_read_capacity_16(
+ uint64_t num_lbas,
+ uint32_t block_size,
+ uint8_t *cdb,
+ struct iovec *iovec,
+ size_t iov_cnt)
+{
+ uint8_t buf[32];
+ uint64_t val64;
+ uint32_t val32;
+
+ memset(buf, 0, sizeof(buf));
+
+ // Return the LBA of the last logical block, so subtract 1.
+ val64 = htobe64(num_lbas-1);
+ memcpy(&buf[0], &val64, 8);
+
+ val32 = htobe32(block_size);
+ memcpy(&buf[8], &val32, 4);
+
+ /*
+ * Logical Block Provisioning Management Enabled (LBPME) bit
+ *
+ * The LBPME bit sets to one and then the logical unit implements
+ * logical block provisioning management
+ */
+ buf[14] = 0x80;
+
+ /*
+ * The logical block provisioning read zeros (LBPRZ) bit shall be
+ * set to one if the LBPRZ field is set to xx1b in VPD B2. The
+ * LBPRZ bit shall be set to zero if the LBPRZ field is not set
+ * to xx1b.
+ */
+ buf[14] |= 0x40;
+
+ /* all else is zero */
+
+ tcmu_memcpy_into_iovec(iovec, iov_cnt, buf, sizeof(buf));
+
+ return TCMU_STS_OK;
+}
+
+static void copy_to_response_buf(uint8_t *to_buf, size_t to_len,
+ uint8_t *from_buf, size_t from_len)
+{
+ if (!to_buf)
+ return;
+ /*
+ * SPC 4r37: 4.3.5.6 Allocation length:
+ *
+ * The device server shall terminate transfers to the Data-In Buffer
+ * when the number of bytes or blocks specified by the ALLOCATION
+ * LENGTH field have been transferred or when all available data
+ * have been transferred, whichever is less.
+ */
+ memcpy(to_buf, from_buf, to_len > from_len ? from_len : to_len);
+}
+
+static int handle_rwrecovery_page(struct tcmu_device *dev, uint8_t *ret_buf,
+ size_t ret_buf_len)
+{
+ uint8_t buf[12];
+
+ memset(buf, 0, sizeof(buf));
+ buf[0] = 0x1;
+ buf[1] = 0xa;
+
+ copy_to_response_buf(ret_buf, ret_buf_len, buf, 12);
+ return 12;
+}
+
+static int handle_cache_page(struct tcmu_device *dev, uint8_t *ret_buf,
+ size_t ret_buf_len)
+{
+ uint8_t buf[20];
+
+ memset(buf, 0, sizeof(buf));
+ buf[0] = 0x8;
+ buf[1] = 0x12;
+
+ /*
+ * If device supports a writeback cache then set writeback
+ * cache enable (WCE)
+ */
+ if (tcmu_dev_get_write_cache_enabled(dev))
+ buf[2] = 0x4;
+
+ copy_to_response_buf(ret_buf, ret_buf_len, buf, 20);
+ return 20;
+}
+
+static int handle_control_page(struct tcmu_device *dev, uint8_t *ret_buf,
+ size_t ret_buf_len)
+{
+ uint8_t buf[12];
+
+ memset(buf, 0, sizeof(buf));
+ buf[0] = 0x0a;
+ buf[1] = 0x0a;
+
+ /* From spc4r31, section 7.5.7 Control mode Page
+ *
+ * GLTSD = 1: because we don't implicitly save log parameters
+ *
+ * A global logging target save disable (GLTSD) bit set to
+ * zero specifies that the logical unit implicitly saves, at
+ * vendor specific intervals, each log parameter in which the
+ * TSD bit (see 7.3) is set to zero. A GLTSD bit set to one
+ * specifies that the logical unit shall not implicitly save
+ * any log parameters.
+ */
+ buf[2] = 0x02;
+
+ /* From spc4r31, section 7.5.7 Control mode Page
+ *
+ * TAS = 1: Currently not settable by tcmu. Using the LIO default
+ *
+ * A task aborted status (TAS) bit set to zero specifies that
+ * aborted commands shall be terminated by the device server
+ * without any response to the application client. A TAS bit
+ * set to one specifies that commands aborted by the actions
+ * of an I_T nexus other than the I_T nexus on which the command
+ * was received shall be completed with TASK ABORTED status
+ */
+ buf[5] = 0x40;
+
+ /* From spc4r31, section 7.5.7 Control mode Page
+ *
+ * BUSY TIMEOUT PERIOD: Currently is unlimited
+ *
+ * The BUSY TIMEOUT PERIOD field specifies the maximum time, in
+ * 100 milliseconds increments, that the application client allows
+ * for the device server to return BUSY status for unanticipated
+ * conditions that are not a routine part of commands from the
+ * application client. This value may be rounded down as defined
+ * in 5.4(the Parameter rounding section).
+ *
+ * A 0000h value in this field is undefined by this standard.
+ * An FFFFh value in this field is defined as an unlimited period.
+ */
+ buf[8] = 0xff;
+ buf[9] = 0xff;
+
+ copy_to_response_buf(ret_buf, ret_buf_len, buf, 12);
+ return 12;
+}
+
+
+static struct mode_sense_handler {
+ uint8_t page;
+ uint8_t subpage;
+ int (*get)(struct tcmu_device *dev, uint8_t *buf, size_t buf_len);
+} modesense_handlers[] = {
+ {0x1, 0, handle_rwrecovery_page},
+ {0x8, 0, handle_cache_page},
+ {0xa, 0, handle_control_page},
+};
+
+static ssize_t handle_mode_sense(struct tcmu_device *dev,
+ struct mode_sense_handler *handler,
+ uint8_t **buf, size_t alloc_len,
+ size_t *used_len, bool sense_ten)
+{
+ int ret;
+
+ ret = handler->get(dev, *buf, alloc_len - *used_len);
+
+ if (!sense_ten && (*used_len + ret >= 255))
+ return -EINVAL;
+
+ /*
+ * SPC 4r37: 4.3.5.6 Allocation length:
+ *
+ * If the information being transferred to the Data-In Buffer includes
+ * fields containing counts of the number of bytes in some or all of
+ * the data (e.g., the PARAMETER DATA LENGTH field, the PAGE LENGTH
+ * field, the DESCRIPTOR LENGTH field, the AVAILABLE DATA field),
+ * then the contents of these fields shall not be altered to reflect
+ * the truncation, if any, that results from an insufficient
+ * ALLOCATION LENGTH value
+ */
+ /*
+ * Setup the buffer so to still loop over the handlers, but just
+ * increment the used_len so we can return the
+ * final value.
+ */
+ if (*buf && (*used_len + ret >= alloc_len))
+ *buf = NULL;
+
+ *used_len += ret;
+ if (*buf)
+ *buf += ret;
+ return ret;
+}
+
+/*
+ * Handle MODE_SENSE(6) and MODE_SENSE(10).
+ *
+ * For TYPE_DISK only.
+ */
+int tcmu_emulate_mode_sense(
+ struct tcmu_device *dev,
+ uint8_t *cdb,
+ struct iovec *iovec,
+ size_t iov_cnt)
+{
+ bool sense_ten = (cdb[0] == MODE_SENSE_10);
+ uint8_t page_code = cdb[2] & 0x3f;
+ uint8_t subpage_code = cdb[3];
+ size_t alloc_len = tcmu_dev_get_max_xfer_len(dev);
+ int i;
+ int ret;
+ size_t used_len;
+ uint8_t *buf;
+ uint8_t *orig_buf = NULL;
+
+ if (!alloc_len)
+ return TCMU_STS_OK;
+
+ /* Mode parameter header. Mode data length filled in at the end. */
+ used_len = sense_ten ? 8 : 4;
+ if (used_len > alloc_len)
+ goto fail;
+
+ buf = calloc(1, alloc_len);
+ if (!buf)
+ return TCMU_STS_NO_RESOURCE;
+
+ orig_buf = buf;
+ buf += used_len;
+
+ /* Don't fill in device-specific parameter */
+ /* This helper fn doesn't support sw write protect (SWP) */
+
+ /* Don't report block descriptors */
+
+ if (page_code == 0x3f) {
+ for (i = 0; i < ARRAY_SIZE(modesense_handlers); i++) {
+ ret = handle_mode_sense(dev, &modesense_handlers[i],
+ &buf, alloc_len, &used_len,
+ sense_ten);
+ if (ret < 0)
+ goto free_buf;
+ }
+ } else {
+ ret = 0;
+
+ for (i = 0; i < ARRAY_SIZE(modesense_handlers); i++) {
+ if (page_code == modesense_handlers[i].page &&
+ subpage_code == modesense_handlers[i].subpage) {
+ ret = handle_mode_sense(dev,
+ &modesense_handlers[i],
+ &buf, alloc_len,
+ &used_len, sense_ten);
+ break;
+ }
+ }
+
+ if (ret <= 0)
+ goto free_buf;
+ }
+
+ if (sense_ten) {
+ uint16_t *ptr = (uint16_t*) orig_buf;
+ *ptr = htobe16(used_len - 2);
+ }
+ else {
+ orig_buf[0] = used_len - 1;
+ }
+
+ tcmu_memcpy_into_iovec(iovec, iov_cnt, orig_buf, alloc_len);
+ free(orig_buf);
+ return TCMU_STS_OK;
+
+free_buf:
+ free(orig_buf);
+fail:
+ return TCMU_STS_INVALID_CDB;
+}
+
+/*
+ * Handle MODE_SELECT(6) and MODE_SELECT(10).
+ *
+ * For TYPE_DISK only.
+ */
+int tcmu_emulate_mode_select(
+ struct tcmu_device *dev,
+ uint8_t *cdb,
+ struct iovec *iovec,
+ size_t iov_cnt)
+{
+ bool select_ten = (cdb[0] == MODE_SELECT_10);
+ uint8_t page_code = cdb[2] & 0x3f;
+ uint8_t subpage_code = cdb[3];
+ size_t alloc_len = tcmu_dev_get_max_xfer_len(dev);
+ int i;
+ int ret = 0;
+ size_t hdr_len = select_ten ? 8 : 4;
+ uint8_t buf[512];
+ uint8_t in_buf[512];
+ bool got_sense = false;
+
+ if (!alloc_len)
+ return TCMU_STS_OK;
+
+ if (tcmu_memcpy_from_iovec(in_buf, sizeof(in_buf), iovec, iov_cnt) >=
sizeof(in_buf))
+ return TCMU_STS_INVALID_PARAM_LIST_LEN;
+
+ /* Abort if !pf or sp */
+ if (!(cdb[1] & 0x10) || (cdb[1] & 0x01))
+ return TCMU_STS_INVALID_CDB;
+
+ memset(buf, 0, sizeof(buf));
+ for (i = 0; i < ARRAY_SIZE(modesense_handlers); i++) {
+ if (page_code == modesense_handlers[i].page
+ && subpage_code == modesense_handlers[i].subpage) {
+ ret = modesense_handlers[i].get(dev, &buf[hdr_len],
+ sizeof(buf) - hdr_len);
+ if (ret <= 0)
+ return TCMU_STS_INVALID_CDB;
+
+ if (!select_ten && (hdr_len + ret >= 255))
+ return TCMU_STS_INVALID_CDB;
+
+ got_sense = true;
+ break;
+ }
+ }
+
+ if (!got_sense)
+ return TCMU_STS_INVALID_CDB;
+
+ if (alloc_len < (hdr_len + ret))
+ return TCMU_STS_INVALID_PARAM_LIST_LEN;
+
+ /* Verify what was selected is identical to what sense returns, since we
+ don't support actually setting anything. */
+ if (memcmp(&buf[hdr_len], &in_buf[hdr_len], ret))
+ return TCMU_STS_INVALID_PARAM_LIST;
+
+ return TCMU_STS_OK;
+}
+
+int tcmu_emulate_start_stop(struct tcmu_device *dev, uint8_t *cdb)
+{
+ if ((cdb[4] >> 4) & 0xf)
+ return TCMU_STS_INVALID_CDB;
+
+ /* Currently, we don't allow ejecting the medium, so we're
+ * ignoring the FBO_PREV_EJECT flag, but it may turn out that
+ * initiators do not handle this well, so we may have to change
+ * this behavior.
+ */
+
+ if (!(cdb[4] & 0x01))
+ return TCMU_STS_INVALID_CDB;
+
+ return TCMU_STS_OK;
+}
diff --git a/tcmu/helper.h b/tcmu/helper.h
new file mode 100644
index 0000000..bbbc2be
--- /dev/null
+++ b/tcmu/helper.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2014 Red Hat, Inc.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 2.1 or any later version (LGPLv2.1 or
+ * later), or the Apache License 2.0.
+ */
+
+/*
+ * APIs for both libtcmu users and tcmu-runner plugins to use.
+ */
+
+#ifndef __TCMU_HELPER_H
+#define __TCMU_HELPER_H
+
+#include <stdbool.h>
+
+/* Basic implementations of mandatory SCSI commands */
+int tcmu_emulate_inquiry(struct tcmu_device *dev, uint8_t *cdb, struct iovec
*iovec, size_t iov_cnt);
+int tcmu_emulate_start_stop(struct tcmu_device *dev, uint8_t *cdb);
+int tcmu_emulate_test_unit_ready(uint8_t *cdb, struct iovec *iovec, size_t
iov_cnt);
+int tcmu_emulate_read_capacity_10(uint64_t num_lbas, uint32_t block_size,
uint8_t *cdb,
+ struct iovec *iovec, size_t iov_cnt);
+int tcmu_emulate_read_capacity_16(uint64_t num_lbas, uint32_t block_size,
uint8_t *cdb,
+ struct iovec *iovec, size_t iov_cnt);
+int tcmu_emulate_mode_sense(struct tcmu_device *dev, uint8_t *cdb,
+ struct iovec *iovec, size_t iov_cnt);
+int tcmu_emulate_mode_select(struct tcmu_device *dev, uint8_t *cdb,
+ struct iovec *iovec, size_t iov_cnt);
+
+#endif
diff --git a/tcmu/tcmu.c b/tcmu/tcmu.c
new file mode 100644
index 0000000..70b9a91
--- /dev/null
+++ b/tcmu/tcmu.c
@@ -0,0 +1,598 @@
+/*
+ * A TCMU userspace handler for QEMU block drivers.
+ *
+ * Copyright (C) 2016 Red Hat, Inc.
+ *
+ * Authors:
+ * Fam Zheng <address@hidden>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "libtcmu.h"
+#include "helper.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/error-report.h"
+#include "sysemu/block-backend.h"
+#include "sysemu/blockdev.h"
+#include "block/aio.h"
+#include "block/qdict.h"
+#include "scsi/constants.h"
+#include "tcmu/tcmu.h"
+#include "qemu/main-loop.h"
+#include "qemu/option.h"
+#include "qapi/qapi-commands.h"
+#include "qapi/qmp/qstring.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/error.h"
+
+#include "qemu/compiler.h"
+#include "trace.h"
+
+typedef struct TCMUExport TCMUExport;
+
+struct TCMUExport {
+ BlockBackend *blk;
+ struct tcmu_device *tcmu_dev;
+ bool writable;
+ QLIST_ENTRY(TCMUExport) next;
+};
+
+typedef struct {
+ struct tcmulib_context *tcmulib_ctx;
+} TCMUHandlerState;
+
+static QLIST_HEAD(, TCMUExport) tcmu_exports =
+ QLIST_HEAD_INITIALIZER(tcmu_exports);
+
+static TCMUHandlerState *handler_state;
+
+/* This's temporary, will use scsi/utils.c code */
+#define ASCQ_INVALID_FIELD_IN_CDB 0x2400
+
+typedef struct {
+ struct tcmulib_cmd *cmd;
+ TCMUExport *exp;
+ QEMUIOVector *qiov;
+} TCMURequest;
+
+static void qemu_tcmu_aio_cb(void *opaque, int ret)
+{
+ TCMURequest *req = opaque;
+
+ trace_qemu_tcmu_aio_cb();
+ tcmulib_command_complete(req->exp->tcmu_dev, req->cmd,
+ ret ? CHECK_CONDITION : GOOD);
+ tcmulib_processing_complete(req->exp->tcmu_dev);
+ g_free(req->qiov);
+ g_free(req);
+}
+
+static inline TCMURequest *qemu_tcmu_req_new(TCMUExport *exp,
+ struct tcmulib_cmd *cmd,
+ QEMUIOVector *qiov)
+{
+ TCMURequest *req = g_new(TCMURequest, 1);
+ *req = (TCMURequest) {
+ .exp = exp,
+ .cmd = cmd,
+ .qiov = qiov,
+ };
+ return req;
+}
+
+static int qemu_tcmu_handle_cmd(TCMUExport *exp, struct tcmulib_cmd *cmd)
+{
+
+ uint8_t *cdb = cmd->cdb;
+ /* TODO: block size? */
+ uint64_t offset = tcmu_cdb_get_lba(cdb) << BDRV_SECTOR_BITS;
+ QEMUIOVector *qiov;
+
+ trace_qemu_tcmu_handle_cmd(cdb[0]);
+ switch (cdb[0]) {
+ case INQUIRY:
+ return tcmu_emulate_inquiry(exp->tcmu_dev, cdb,
+ cmd->iovec, cmd->iov_cnt);
+ case TEST_UNIT_READY:
+ return tcmu_emulate_test_unit_ready(cdb, cmd->iovec, cmd->iov_cnt);
+ case SERVICE_ACTION_IN_16:
+ if (cdb[1] == SAI_READ_CAPACITY_16) {
+ return tcmu_emulate_read_capacity_16(blk_getlength(exp->blk) / 512,
+ 512,
+ cmd->cdb, cmd->iovec,
+ cmd->iov_cnt);
+ } else {
+ return TCMU_STS_NOT_HANDLED;
+ }
+ case MODE_SENSE:
+ case MODE_SENSE_10:
+ return tcmu_emulate_mode_sense(exp->tcmu_dev, cdb, cmd->iovec,
+ cmd->iov_cnt);
+ case MODE_SELECT:
+ case MODE_SELECT_10:
+ return tcmu_emulate_mode_select(exp->tcmu_dev, cdb, cmd->iovec,
+ cmd->iov_cnt);
+ case SYNCHRONIZE_CACHE:
+ case SYNCHRONIZE_CACHE_16:
+ if (cdb[1] & 0x2) {
+ return tcmu_sense_set_data(cmd->sense_buf, ILLEGAL_REQUEST,
+ ASCQ_INVALID_FIELD_IN_CDB);
+ } else {
+ blk_aio_flush(exp->blk, qemu_tcmu_aio_cb,
+ qemu_tcmu_req_new(exp, cmd, NULL));
+ return TCMU_STS_ASYNC_HANDLED;
+ }
+ break;
+ case READ_6:
+ case READ_10:
+ case READ_12:
+ case READ_16:
+ qiov = g_new(QEMUIOVector, 1);
+ qemu_iovec_init_external(qiov, cmd->iovec, cmd->iov_cnt);
+ trace_qemu_tcmu_handle_cmd_read(offset);
+ blk_aio_preadv(exp->blk, offset, qiov, 0, qemu_tcmu_aio_cb,
+ qemu_tcmu_req_new(exp, cmd, qiov));
+ return TCMU_STS_ASYNC_HANDLED;
+
+ case WRITE_6:
+ case WRITE_10:
+ case WRITE_12:
+ case WRITE_16:
+ qiov = g_new(QEMUIOVector, 1);
+ qemu_iovec_init_external(qiov, cmd->iovec, cmd->iov_cnt);
+ trace_qemu_tcmu_handle_cmd_write(offset);
+ blk_aio_pwritev(exp->blk, offset, qiov, 0, qemu_tcmu_aio_cb,
+ qemu_tcmu_req_new(exp, cmd, qiov));
+ return TCMU_STS_ASYNC_HANDLED;
+
+ default:
+ trace_qemu_tcmu_handle_cmd_unknown_cmd(cdb[0]);
+ return TCMU_STS_NOT_HANDLED;
+ }
+}
+
+static void qemu_tcmu_dev_event_handler(void *opaque)
+{
+ TCMUExport *exp = opaque;
+ struct tcmulib_cmd *cmd;
+ struct tcmu_device *dev = exp->tcmu_dev;
+
+ tcmulib_processing_start(dev);
+
+ while ((cmd = tcmulib_get_next_command(dev)) != NULL) {
+ int ret = qemu_tcmu_handle_cmd(exp, cmd);
+ if (ret != TCMU_STS_ASYNC_HANDLED) {
+ tcmulib_command_complete(dev, cmd, ret);
+ }
+ }
+
+ tcmulib_processing_complete(dev);
+}
+
+static TCMUExport *tcmu_export_lookup(const BlockBackend *blk)
+{
+ TCMUExport *exp;
+
+ QLIST_FOREACH(exp, &tcmu_exports, next) {
+ if (exp->blk == blk) {
+ return exp;
+ }
+ }
+ return NULL;
+}
+static TCMUExport *parse_cfgstr(const char *cfgstr,
+ Error **errp);
+static bool check_cfgstr(const char *cfgstr,
+ Error **errp);
+
+QemuOptsList qemu_tcmu_common_export_opts = {
+ .name = "export",
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_tcmu_common_export_opts.head),
+ .desc = {
+ {
+ .name = "snapshot",
+ .type = QEMU_OPT_BOOL,
+ .help = "enable/disable snapshot mode",
+ },{
+ .name = "aio",
+ .type = QEMU_OPT_STRING,
+ .help = "host AIO implementation (threads, native)",
+ },{
+ .name = "format",
+ .type = QEMU_OPT_STRING,
+ .help = "disk format (raw, qcow2, ...)",
+ },{
+ .name = "file",
+ .type = QEMU_OPT_STRING,
+ .help = "file name",
+ },
+ { /* end of list */ }
+ },
+};
+
+QemuOptsList qemu_tcmu_export_opts = {
+ .name = "export",
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_tcmu_export_opts.head),
+ .desc = {
+ /* no elements => accept any params */
+ { /* end of list */ }
+ },
+};
+
+int export_init_func(void *opaque, QemuOpts *all_opts, Error **errp)
+{
+ int flags = BDRV_O_RDWR;
+ const char *buf;
+ int ret = 0;
+ bool writethrough;
+ BlockBackend *blk;
+ int snapshot = 0;
+ Error *local_err = NULL;
+ QemuOpts *common_opts;
+ const char *id;
+ const char *aio;
+ const char *value;
+ QDict *bs_opts;
+ bool read_only = false;
+ const char *file;
+ TCMUExport *exp;
+
+ value = qemu_opt_get(all_opts, "cache");
+ if (value) {
+ if (bdrv_parse_cache_mode(value, &flags, &writethrough) != 0) {
+ error_report("invalid cache option");
+ ret = -1;
+ goto err_too_early;
+ }
+ /* Specific options take precedence */
+ if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_DIRECT)) {
+ qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_DIRECT,
+ !!(flags & BDRV_O_NOCACHE), &error_abort);
+ }
+ if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_NO_FLUSH)) {
+ qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_NO_FLUSH,
+ !!(flags & BDRV_O_NO_FLUSH), &error_abort);
+ }
+ qemu_opt_unset(all_opts, "cache");
+ }
+
+ bs_opts = qdict_new();
+ /* all_opts->id also copied into one option in bs_opts */
+ qemu_opts_to_qdict(all_opts, bs_opts);
+
+ id = qdict_get_try_str(bs_opts, "id");
+ common_opts = qemu_opts_create(&qemu_tcmu_common_export_opts, id, 1,
+ &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ ret = -1;
+ goto err_no_opts;
+ }
+
+ trace_export_init_func();
+
+ qemu_opts_absorb_qdict(common_opts, bs_opts, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ ret = -1;
+ goto early_err;
+ }
+
+ if (id) {
+ qdict_del(bs_opts, "id");
+ }
+
+ if ((aio = qemu_opt_get(common_opts, "aio")) != NULL) {
+ if (!strcmp(aio, "native")) {
+ flags |= BDRV_O_NATIVE_AIO;
+ } else if (!strcmp(aio, "threads")) {
+ /* this is the default */
+ } else {
+ error_report("invalid aio option");
+ ret = -1;
+ goto early_err;
+ }
+ }
+
+ if ((buf = qemu_opt_get(common_opts, "format")) != NULL) {
+ if (qdict_haskey(bs_opts, "driver")) {
+ error_report("Cannot specify both 'driver' and 'format'");
+ ret = -1;
+ goto early_err;
+ }
+ qdict_put_str(bs_opts, "driver", buf);
+ }
+
+ snapshot = qemu_opt_get_bool(common_opts, "snapshot", 0);
+ if (snapshot) {
+ flags |= BDRV_O_SNAPSHOT;
+ }
+
+ read_only = qemu_opt_get_bool(common_opts, BDRV_OPT_READ_ONLY, false);
+ if (read_only)
+ flags &= ~BDRV_O_RDWR;
+
+ /* bdrv_open() defaults to the values in bdrv_flags (for compatibility
+ * with other callers) rather than what we want as the real defaults
+ * Apply the defaults here instead. */
+ qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off");
+ qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off");
+ qdict_set_default_str(bs_opts, BDRV_OPT_READ_ONLY,
+ read_only ? "on" : "off");
+
+ /* if (qemu_opts_id(all_opts) == NULL) */
+
+ file = qemu_opt_get(common_opts, "file");
+ blk = blk_new_open(file, NULL, bs_opts, flags, &local_err);
+ bs_opts = NULL;
+ if (!blk) {
+ error_report_err(local_err);
+ ret = -1;
+ goto err_no_bs_opts;
+ }
+
+ blk_set_enable_write_cache(blk, !writethrough);
+
+ id = qemu_opts_id(common_opts);
+ if (!monitor_add_blk(blk, id, &local_err)) {
+ error_report_err(local_err);
+ blk_unref(blk);
+ ret = -1;
+ goto err_no_bs_opts;
+ }
+
+ exp = tcmu_export_new(blk, flags & BDRV_O_RDWR, &local_err);
+ if (!exp) {
+ error_reportf_err(local_err, "Failed to create export: ");
+ ret = -1;
+ monitor_remove_blk(blk);
+ }
+
+err_no_bs_opts:
+early_err:
+ qemu_opts_del(common_opts);
+err_no_opts:
+ qobject_unref(bs_opts);
+err_too_early:
+ return ret;
+}
+
+static bool qemu_tcmu_check_config(const char *cfgstr, char **reason)
+{
+ Error *local_err = NULL;
+
+ if (!check_cfgstr(cfgstr, &local_err) && local_err) {
+ *reason = strdup(error_get_pretty(local_err));
+ error_free(local_err);
+ return false;
+ }
+ return true;
+}
+
+static int qemu_tcmu_added(struct tcmu_device *dev)
+{
+ TCMUExport *exp;
+ const char *cfgstr = tcmu_dev_get_cfgstring(dev);
+ Error *local_err = NULL;
+
+ exp = parse_cfgstr(cfgstr, &local_err);
+ if (!exp) {
+ return -1;
+ }
+ exp->tcmu_dev = dev;
+ tcmu_dev_set_private(dev, exp);
+ aio_set_fd_handler(blk_get_aio_context(exp->blk),
+ tcmu_dev_get_fd(dev),
+ true, qemu_tcmu_dev_event_handler,
+ NULL, NULL, exp);
+ return 0;
+}
+
+static void tcmu_export_close(TCMUExport *exp)
+{
+ aio_set_fd_handler(blk_get_aio_context(exp->blk),
+ tcmu_dev_get_fd(exp->tcmu_dev),
+ false, NULL,
+ NULL, NULL, NULL);
+ monitor_remove_blk(exp->blk);
+ blk_unref(exp->blk);
+ QLIST_REMOVE(exp, next);
+ g_free(exp);
+}
+
+static void qemu_tcmu_removed(struct tcmu_device *dev)
+{
+ TCMUExport *exp = tcmu_dev_get_private(dev);
+
+ if(exp)
+ tcmu_export_close(exp);
+}
+
+static void qemu_tcmu_master_read(void *opaque)
+{
+ TCMUHandlerState *s = opaque;
+
+ trace_qemu_tcmu_master_read();
+ tcmulib_master_fd_ready(s->tcmulib_ctx);
+}
+
+static struct tcmulib_handler qemu_tcmu_handler = {
+ .name = "Handler for QEMU block devices",
+ .subtype = NULL, /* Dynamically generated when starting. */
+ .cfg_desc = "Format: device=<name>",
+ .added = qemu_tcmu_added,
+ .removed = qemu_tcmu_removed,
+ .check_config = qemu_tcmu_check_config,
+};
+
+static bool check_cfgstr(const char *cfgstr,
+ Error **errp)
+{
+ BlockBackend *blk;
+ const char *dev_str, *id, *device;
+ const char *pr;
+ const char *subtype = qemu_tcmu_handler.subtype;
+ size_t subtype_len;
+ TCMUExport *exp;
+
+ if (!subtype) {
+ error_setg(errp, "TCMU Handler not started");
+ }
+ subtype_len = strlen(subtype);
+ if (strncmp(cfgstr, subtype, subtype_len) ||
+ cfgstr[subtype_len] != '/') {
+ error_report("TCMU: Invalid subtype in device cfgstring: %s", cfgstr);
+ return false;
+ }
+ dev_str = &cfgstr[subtype_len + 1];
+ if (dev_str[0] != '@') {
+ error_report("TCMU: Invalid cfgstring format. Must be @<device_name>");
+ return false;
+ }
+ device = &dev_str[1];
+
+ pr = strchr(device, '@');
+ if (!pr) {
+ id = device;
+ blk = blk_by_name(id);
+ if (!blk) {
+ error_setg(errp, "TCMU: Device not found: %s", id);
+ return false;
+ }
+ exp = tcmu_export_lookup(blk);
+ if (!exp) {
+ error_setg(errp, "TCMU: Device not found: %s", id);
+ return false;
+ }
+ }// TODO: else to check id?
+
+ return true;
+}
+
+static void tcmu_convert_delim(char *to, const char *opts)
+{
+ while (*opts != '\0') {
+ if (*opts == '@') {
+ *to = ',';
+ } else
+ *to = *opts;
+
+ opts++;
+ to++;
+ }
+
+ if(to)
+ *to = '\0';
+}
+static TCMUExport *parse_cfgstr(const char *cfgstr,
+ Error **errp)
+{
+ const char *device, *id, *pr;
+ const char *subtype = qemu_tcmu_handler.subtype;
+ size_t subtype_len;
+ TCMUExport *exp = NULL;
+ char *new_device;
+
+ subtype_len = strlen(subtype);
+ device = &cfgstr[subtype_len + 2];
+
+ pr = strchr(device, '@');
+ if (!pr) {
+ id = device;
+ exp = tcmu_export_lookup(blk_by_name(id));
+ }
+ else {
+ QemuOpts * export_opts;
+
+ new_device = g_malloc0(strlen(device) + 1);
+ tcmu_convert_delim(new_device, device);
+
+ /* parse new_device into an QemuOpts and link into
+ qemu_tcmu_export_opts with QemuOpts->id set while
+ without an option id in QemuOpts.
+ */
+ export_opts = qemu_opts_parse_noisily(&qemu_tcmu_export_opts,
+ new_device, false);
+ trace_qemu_tcmu_parse_cfgstr();
+ g_free(new_device);
+
+ if(!export_opts)
+ goto fail;
+
+ if (export_init_func(NULL, export_opts, NULL))
+ goto fail;
+
+ id = qemu_opts_id(export_opts);
+ exp = tcmu_export_lookup(blk_by_name(id));
+
+ qemu_opts_del(export_opts);
+ }
+
+fail:
+ return exp;
+}
+
+void qemu_tcmu_stop(void)
+{
+ tcmulib_close(handler_state->tcmulib_ctx);
+ g_free(handler_state);
+ handler_state = NULL;
+}
+
+void qemu_tcmu_start(const char *subtype, Error **errp)
+{
+ int fd;
+
+ trace_qemu_tcmu_start();
+ if (handler_state) {
+ error_setg(errp, "TCMU handler already started");
+ return;
+ }
+ assert(!qemu_tcmu_handler.subtype);
+ qemu_tcmu_handler.subtype = g_strdup(subtype);
+ handler_state = g_new0(TCMUHandlerState, 1);
+ handler_state->tcmulib_ctx = tcmulib_initialize(&qemu_tcmu_handler, 1);
+
+ if (!handler_state->tcmulib_ctx) {
+ error_setg(errp, "Failed to initialize tcmulib");
+ goto fail;
+ }
+ fd = tcmulib_get_master_fd(handler_state->tcmulib_ctx);
+ qemu_set_fd_handler(fd, qemu_tcmu_master_read, NULL, handler_state);
+ trace_qemu_tcmu_start_register();
+ tcmulib_register(handler_state->tcmulib_ctx);
+ return;
+
+fail:
+ g_free(handler_state);
+ handler_state = NULL;
+}
+
+TCMUExport *tcmu_export_new(BlockBackend *blk, bool writable, Error **errp)
+{
+ TCMUExport *exp;
+
+ exp = tcmu_export_lookup(blk);
+ if (exp) {
+ error_setg(errp, "Block device already added");
+ return NULL;
+ }
+ exp = g_new0(TCMUExport, 1);
+ exp->blk = blk;
+ blk_ref(blk);
+ exp->writable = writable;
+ QLIST_INSERT_HEAD(&tcmu_exports, exp, next);
+ return exp;
+}
diff --git a/tcmu/trace-events b/tcmu/trace-events
new file mode 100644
index 0000000..62ad30e
--- /dev/null
+++ b/tcmu/trace-events
@@ -0,0 +1,12 @@
+# tcmu/tcmu.c
+
+qemu_tcmu_aio_cb(void) "aio cb"
+qemu_tcmu_handle_cmd(uint8_t cdb) "handle cmd: 0x%x"
+qemu_tcmu_handle_cmd_read(uint64_t offset) "read at %ld"
+qemu_tcmu_handle_cmd_write(uint64_t offset) "write at %ld"
+qemu_tcmu_handle_cmd_unknown_cmd(uint8_t cdb) "unknown cmd: 0x%x"
+qemu_tcmu_master_read(void) "master read"
+qemu_tcmu_start(void) "start"
+qemu_tcmu_start_register(void) "register"
+qemu_tcmu_parse_cfgstr(void) "parse noisily"
+export_init_func(void) "parse common"
--
1.8.3.1
- [Qemu-block] [PATCH] tcmu: Introduce qemu-tcmu utility,
Yaowei Bai <=