[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v6 05/10] hw/scsi: add persistent reservation in/out api for
From: |
Stefan Hajnoczi |
Subject: |
Re: [PATCH v6 05/10] hw/scsi: add persistent reservation in/out api for scsi device |
Date: |
Thu, 4 Jul 2024 20:28:55 +0200 |
On Thu, Jun 13, 2024 at 03:13:22PM +0800, Changqi Lu wrote:
> Add persistent reservation in/out operations in the
> SCSI device layer. By introducing the persistent
> reservation in/out api, this enables the SCSI device
> to perform reservation-related tasks, including querying
> keys, querying reservation status, registering reservation
> keys, initiating and releasing reservations, as well as
> clearing and preempting reservations held by other keys.
>
> These operations are crucial for management and control of
> shared storage resources in a persistent manner.
>
> Signed-off-by: Changqi Lu <luchangqi.123@bytedance.com>
> Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
> ---
> hw/scsi/scsi-disk.c | 352 ++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 352 insertions(+)
>
> diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
> index 4bd7af9d0c..0e964dbd87 100644
> --- a/hw/scsi/scsi-disk.c
> +++ b/hw/scsi/scsi-disk.c
> @@ -32,6 +32,7 @@
> #include "migration/vmstate.h"
> #include "hw/scsi/emulation.h"
> #include "scsi/constants.h"
> +#include "scsi/utils.h"
> #include "sysemu/block-backend.h"
> #include "sysemu/blockdev.h"
> #include "hw/block/block.h"
> @@ -42,6 +43,7 @@
> #include "qemu/cutils.h"
> #include "trace.h"
> #include "qom/object.h"
> +#include "block/block_int.h"
>
> #ifdef __linux
> #include <scsi/sg.h>
> @@ -1474,6 +1476,346 @@ static void scsi_disk_emulate_read_data(SCSIRequest
> *req)
> scsi_req_complete(&r->req, GOOD);
> }
>
> +typedef struct SCSIPrReadKeys {
> + uint32_t generation;
> + uint32_t num_keys;
> + uint64_t *keys;
> + void *req;
> +} SCSIPrReadKeys;
> +
> +typedef struct SCSIPrReadReservation {
> + uint32_t generation;
> + uint64_t key;
> + BlockPrType type;
> + void *req;
> +} SCSIPrReadReservation;
> +
> +static void scsi_pr_read_keys_complete(void *opaque, int ret)
> +{
> + int num_keys;
> + uint8_t *buf;
> + SCSIPrReadKeys *blk_keys = (SCSIPrReadKeys *)opaque;
> + SCSIDiskReq *r = (SCSIDiskReq *)blk_keys->req;
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> + assert(blk_get_aio_context(s->qdev.conf.blk) ==
> + qemu_get_current_aio_context());
> +
> + assert(r->req.aiocb != NULL);
> + r->req.aiocb = NULL;
> +
> + if (scsi_disk_req_check_error(r, ret, true)) {
> + goto done;
> + }
> +
> + buf = scsi_req_get_buf(&r->req);
> + num_keys = MIN(blk_keys->num_keys, ret);
> + blk_keys->generation = cpu_to_be32(blk_keys->generation);
> + memcpy(&buf[0], &blk_keys->generation, 4);
> + for (int i = 0; i < num_keys; i++) {
> + blk_keys->keys[i] = cpu_to_be64(blk_keys->keys[i]);
> + memcpy(&buf[8 + i * 8], &blk_keys->keys[i], 8);
> + }
> + num_keys = cpu_to_be32(num_keys * 8);
> + memcpy(&buf[4], &num_keys, 4);
> +
> + scsi_req_data(&r->req, r->buflen);
> +done:
> + scsi_req_unref(&r->req);
> + g_free(blk_keys->keys);
> + g_free(blk_keys);
> +}
> +
> +static int scsi_disk_emulate_pr_read_keys(SCSIRequest *req)
> +{
> + SCSIPrReadKeys *blk_keys;
> + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> + int buflen = MIN(r->req.cmd.xfer, r->buflen);
> + int num_keys = (buflen - sizeof(uint32_t) * 2) / sizeof(uint64_t);
> +
> + blk_keys = g_new0(SCSIPrReadKeys, 1);
> + blk_keys->generation = 0;
> + /* num_keys is the maximum number of keys that can be transmitted */
> + blk_keys->num_keys = num_keys;
> + blk_keys->keys = g_malloc(sizeof(uint64_t) * num_keys);
> + blk_keys->req = r;
> +
> + /* The request is used as the AIO opaque value, so add a ref. */
> + scsi_req_ref(&r->req);
> + r->req.aiocb = blk_aio_pr_read_keys(s->qdev.conf.blk,
> &blk_keys->generation,
> + blk_keys->num_keys, blk_keys->keys,
> + scsi_pr_read_keys_complete,
> blk_keys);
> + return 0;
> +}
> +
> +static void scsi_pr_read_reservation_complete(void *opaque, int ret)
> +{
> + uint8_t *buf;
> + uint32_t additional_len = 0;
> + SCSIPrReadReservation *blk_rsv = (SCSIPrReadReservation *)opaque;
> + SCSIDiskReq *r = (SCSIDiskReq *)blk_rsv->req;
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> + assert(blk_get_aio_context(s->qdev.conf.blk) ==
> + qemu_get_current_aio_context());
> +
> + assert(r->req.aiocb != NULL);
> + r->req.aiocb = NULL;
> +
> + if (scsi_disk_req_check_error(r, ret, true)) {
> + goto done;
> + }
> +
> + buf = scsi_req_get_buf(&r->req);
> + blk_rsv->generation = cpu_to_be32(blk_rsv->generation);
> + memcpy(&buf[0], &blk_rsv->generation, 4);
> + if (ret) {
> + additional_len = cpu_to_be32(16);
> + blk_rsv->key = cpu_to_be64(blk_rsv->key);
> + memcpy(&buf[8], &blk_rsv->key, 8);
> + buf[21] = block_pr_type_to_scsi(blk_rsv->type) & 0xf;
> + } else {
> + additional_len = cpu_to_be32(0);
> + }
> +
> + memcpy(&buf[4], &additional_len, 4);
> + scsi_req_data(&r->req, r->buflen);
> +
> +done:
> + scsi_req_unref(&r->req);
> + g_free(blk_rsv);
> +}
> +
> +static int scsi_disk_emulate_pr_read_reservation(SCSIRequest *req)
> +{
> + SCSIPrReadReservation *blk_rsv;
> + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> + blk_rsv = g_malloc(sizeof(*blk_rsv));
> + blk_rsv->generation = 0;
> + blk_rsv->key = 0;
> + blk_rsv->type = 0;
> + blk_rsv->req = r;
> +
> + /* The request is used as the AIO opaque value, so add a ref. */
> + scsi_req_ref(&r->req);
> + r->req.aiocb = blk_aio_pr_read_reservation(s->qdev.conf.blk,
> + &blk_rsv->generation, &blk_rsv->key, &blk_rsv->type,
> + scsi_pr_read_reservation_complete, blk_rsv);
> + return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_report_capabilities(SCSIRequest *req)
> +{
> + SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> + BlockDriverState *bs = blk_bs(s->qdev.conf.blk);
> + uint8_t blk_pr_cap = bs->file->bs->bl.pr_cap;
I'm not sure that file->bs is always valid here. Block layer code
typically checks for !bs and !bs->drv.
Also, ->file could be NULL if the SCSI disk points directly to
--blockdev file without a --blockdev raw on top. I think the block layer
should propagate pr_cap from the leaves of the block graph to the root
node via bdrv_merge_limits() so that traversing the graph (->file) is
not necessary. Instead this line should just be bs->bl.pr_cap.
> + uint8_t *buf = scsi_req_get_buf(req);
> + uint16_t pr_cap;
> + /*
> + * according to response of report_capabilities
> + * command of chapter 6.13 of spc4
> + */
> + int len = sizeof(uint16_t) * 2 + sizeof(uint8_t) * 4;
> +
> + if (len > r->buflen) {
> + return -EINVAL;
> + }
> +
> + len = cpu_to_be16(len);
> + memcpy(&buf[0], &len, 2);
> + /* enable PTPL_C */
> + buf[2] = 1;
> + buf[3] = (blk_pr_cap & BLK_PR_CAP_PTPL) ? 1 : 0;
> + /* enable TMV */
> + buf[3] |= 1 << 7;
> +
> + /* enable persistent reservation type */
> + pr_cap = block_pr_cap_to_scsi(blk_pr_cap);
> + buf[4] = pr_cap >> 8 & 0xff;
> + buf[5] = pr_cap & 0xff;
> +
> + scsi_req_data(&r->req, len);
> + return 0;
> +}
> +
> +static void scsi_aio_pr_complete(void *opaque, int ret)
> +{
> + SCSIDiskReq *r = (SCSIDiskReq *)opaque;
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> + /* The request must only run in the BlockBackend's AioContext */
> + assert(blk_get_aio_context(s->qdev.conf.blk) ==
> + qemu_get_current_aio_context());
> +
> + assert(r->req.aiocb != NULL);
> + r->req.aiocb = NULL;
> +
> + if (scsi_disk_req_check_error(r, ret, true)) {
> + goto done;
> + }
> +
> + scsi_req_complete(&r->req, GOOD);
> +
> +done:
> + scsi_req_unref(&r->req);
> +}
> +
> +static int scsi_disk_emulate_pr_register(SCSIDiskReq *r, uint64_t r_key,
> + uint64_t sa_key, SCSIPrType type,
> + uint8_t aptpl, bool ignore_key)
> +{
> + SCSIRequest *req = &r->req;
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> + /* The request is used as the AIO opaque value, so add a ref. */
> + scsi_req_ref(&r->req);
> + r->req.aiocb = blk_aio_pr_register(s->qdev.conf.blk, r_key, sa_key,
> + scsi_pr_type_to_block(type),
> + aptpl ? true : false,
> + ignore_key, scsi_aio_pr_complete, r);
> +
> + return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_reserve(SCSIDiskReq *r, uint64_t r_key,
> + SCSIPrType type)
> +{
> + SCSIRequest *req = &r->req;
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> + /* The request is used as the AIO opaque value, so add a ref. */
> + scsi_req_ref(&r->req);
> + r->req.aiocb = blk_aio_pr_reserve(s->qdev.conf.blk, r_key,
> + scsi_pr_type_to_block(type),
> + scsi_aio_pr_complete, r);
> +
> + return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_release(SCSIDiskReq *r, uint64_t r_key,
> + SCSIPrType type)
> +{
> + SCSIRequest *req = &r->req;
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> + /* The request is used as the AIO opaque value, so add a ref. */
> + scsi_req_ref(&r->req);
> + r->req.aiocb = blk_aio_pr_release(s->qdev.conf.blk, r_key,
> + scsi_pr_type_to_block(type),
> + scsi_aio_pr_complete, r);
> +
> + return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_clear(SCSIDiskReq *r, uint64_t r_key)
> +{
> + SCSIRequest *req = &r->req;
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> + /* The request is used as the AIO opaque value, so add a ref. */
> + scsi_req_ref(&r->req);
> + r->req.aiocb = blk_aio_pr_clear(s->qdev.conf.blk, r_key,
> + scsi_aio_pr_complete, r);
> +
> + return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_preempt(SCSIDiskReq *r, uint64_t r_key,
> + uint64_t sa_key, SCSIPrType type,
> + bool abort)
> +{
> + SCSIRequest *req = &r->req;
> + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> + /* The request is used as the AIO opaque value, so add a ref. */
> + scsi_req_ref(&r->req);
> + r->req.aiocb = blk_aio_pr_preempt(s->qdev.conf.blk, r_key, sa_key,
> + scsi_pr_type_to_block(type), abort,
> + scsi_aio_pr_complete, r);
> +
> + return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_in(SCSIRequest *req)
> +{
> + int rc;
> + SCSIPrInAction action = req->cmd.buf[1] & 0x1f;
> +
> + switch (action) {
> + case SCSI_PR_IN_READ_KEYS:
> + rc = scsi_disk_emulate_pr_read_keys(req);
> + break;
> + case SCSI_PR_IN_READ_RESERVATION:
> + rc = scsi_disk_emulate_pr_read_reservation(req);
> + break;
> + case SCSI_PR_IN_REPORT_CAPABILITIES:
> + rc = scsi_disk_emulate_pr_report_capabilities(req);
> + break;
> + default:
> + return -ENOTSUP;
> + }
> +
> + return rc;
> +}
> +
> +static int scsi_disk_emulate_pr_out(SCSIDiskReq *r, uint8_t *inbuf)
> +{
> + int rc;
> + uint8_t aptpl;
> + uint64_t r_key, sa_key;
> + SCSIPrOutAction action;
> + SCSIPrScope scope;
> + SCSIPrType type;
> + SCSIRequest *req = &r->req;
> +
> + memcpy(&r_key, &inbuf[0], 8);
> + r_key = be64_to_cpu(r_key);
> + memcpy(&sa_key, &inbuf[8], 8);
> + sa_key = be64_to_cpu(sa_key);
> + memcpy(&aptpl, &inbuf[20], 1);
> + aptpl = aptpl & 0x01;
> + action = req->cmd.buf[1] & 0x1f;
> + scope = (req->cmd.buf[2] >> 4) & 0x0f;
> + type = req->cmd.buf[2] & 0x0f;
> +
> + if (scope != SCSI_PR_LU_SCOPE) {
> + return -ENOTSUP;
> + }
> +
> + switch (action) {
> + case SCSI_PR_OUT_REGISTER:
> + rc = scsi_disk_emulate_pr_register(r, r_key, sa_key, type,
> + aptpl, false);
> + break;
> + case SCSI_PR_OUT_REG_AND_IGNORE_KEY:
> + rc = scsi_disk_emulate_pr_register(r, r_key, sa_key, type, aptpl,
> true);
> + break;
> + case SCSI_PR_OUT_RESERVE:
> + rc = scsi_disk_emulate_pr_reserve(r, r_key, type);
> + break;
> + case SCSI_PR_OUT_RELEASE:
> + rc = scsi_disk_emulate_pr_release(r, r_key, type);
> + break;
> + case SCSI_PR_OUT_CLEAR:
> + rc = scsi_disk_emulate_pr_clear(r, r_key);
> + break;
> + case SCSI_PR_OUT_PREEMPT:
> + rc = scsi_disk_emulate_pr_preempt(r, r_key, sa_key, type, false);
> + break;
> + case SCSI_PR_OUT_PREEMPT_AND_ABORT:
> + rc = scsi_disk_emulate_pr_preempt(r, r_key, sa_key, type, true);
> + break;
> + default:
> + return -ENOTSUP;
> + }
> +
> + return rc;
> +}
> +
> static int scsi_disk_check_mode_select(SCSIDiskState *s, int page,
> uint8_t *inbuf, int inlen)
> {
> @@ -1957,6 +2299,9 @@ static void scsi_disk_emulate_write_data(SCSIRequest
> *req)
> scsi_req_complete(&r->req, GOOD);
> break;
>
> + case PERSISTENT_RESERVE_OUT:
> + scsi_disk_emulate_pr_out(r, r->iov.iov_base);
> + break;
> default:
> abort();
> }
> @@ -2213,6 +2558,11 @@ static int32_t scsi_disk_emulate_command(SCSIRequest
> *req, uint8_t *buf)
> case FORMAT_UNIT:
> trace_scsi_disk_emulate_command_FORMAT_UNIT(r->req.cmd.xfer);
> break;
> + case PERSISTENT_RESERVE_OUT:
> + break;
> + case PERSISTENT_RESERVE_IN:
> + scsi_disk_emulate_pr_in(req);
> + return 0;
> default:
> trace_scsi_disk_emulate_command_UNKNOWN(buf[0],
> scsi_command_name(buf[0]));
> @@ -2632,6 +2982,8 @@ static const SCSIReqOps *const
> scsi_disk_reqops_dispatch[256] = {
> [VERIFY_12] = &scsi_disk_emulate_reqops,
> [VERIFY_16] = &scsi_disk_emulate_reqops,
> [FORMAT_UNIT] = &scsi_disk_emulate_reqops,
> + [PERSISTENT_RESERVE_IN] = &scsi_disk_emulate_reqops,
> + [PERSISTENT_RESERVE_OUT] = &scsi_disk_emulate_reqops,
>
> [READ_6] = &scsi_disk_dma_reqops,
> [READ_10] = &scsi_disk_dma_reqops,
> --
> 2.20.1
>
signature.asc
Description: PGP signature