qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v6 05/10] hw/scsi: add persistent reservation in/out api for


From: Stefan Hajnoczi
Subject: Re: [PATCH v6 05/10] hw/scsi: add persistent reservation in/out api for scsi device
Date: Thu, 4 Jul 2024 20:28:55 +0200

On Thu, Jun 13, 2024 at 03:13:22PM +0800, Changqi Lu wrote:
> Add persistent reservation in/out operations in the
> SCSI device layer. By introducing the persistent
> reservation in/out api, this enables the SCSI device
> to perform reservation-related tasks, including querying
> keys, querying reservation status, registering reservation
> keys, initiating and releasing reservations, as well as
> clearing and preempting reservations held by other keys.
> 
> These operations are crucial for management and control of
> shared storage resources in a persistent manner.
> 
> Signed-off-by: Changqi Lu <luchangqi.123@bytedance.com>
> Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
> ---
>  hw/scsi/scsi-disk.c | 352 ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 352 insertions(+)
> 
> diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
> index 4bd7af9d0c..0e964dbd87 100644
> --- a/hw/scsi/scsi-disk.c
> +++ b/hw/scsi/scsi-disk.c
> @@ -32,6 +32,7 @@
>  #include "migration/vmstate.h"
>  #include "hw/scsi/emulation.h"
>  #include "scsi/constants.h"
> +#include "scsi/utils.h"
>  #include "sysemu/block-backend.h"
>  #include "sysemu/blockdev.h"
>  #include "hw/block/block.h"
> @@ -42,6 +43,7 @@
>  #include "qemu/cutils.h"
>  #include "trace.h"
>  #include "qom/object.h"
> +#include "block/block_int.h"
>  
>  #ifdef __linux
>  #include <scsi/sg.h>
> @@ -1474,6 +1476,346 @@ static void scsi_disk_emulate_read_data(SCSIRequest 
> *req)
>      scsi_req_complete(&r->req, GOOD);
>  }
>  
> +typedef struct SCSIPrReadKeys {
> +    uint32_t generation;
> +    uint32_t num_keys;
> +    uint64_t *keys;
> +    void     *req;
> +} SCSIPrReadKeys;
> +
> +typedef struct SCSIPrReadReservation {
> +    uint32_t generation;
> +    uint64_t key;
> +    BlockPrType type;
> +    void *req;
> +} SCSIPrReadReservation;
> +
> +static void scsi_pr_read_keys_complete(void *opaque, int ret)
> +{
> +    int num_keys;
> +    uint8_t *buf;
> +    SCSIPrReadKeys *blk_keys = (SCSIPrReadKeys *)opaque;
> +    SCSIDiskReq *r = (SCSIDiskReq *)blk_keys->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> +    assert(blk_get_aio_context(s->qdev.conf.blk) ==
> +            qemu_get_current_aio_context());
> +
> +    assert(r->req.aiocb != NULL);
> +    r->req.aiocb = NULL;
> +
> +    if (scsi_disk_req_check_error(r, ret, true)) {
> +        goto done;
> +    }
> +
> +    buf = scsi_req_get_buf(&r->req);
> +    num_keys = MIN(blk_keys->num_keys, ret);
> +    blk_keys->generation = cpu_to_be32(blk_keys->generation);
> +    memcpy(&buf[0], &blk_keys->generation, 4);
> +    for (int i = 0; i < num_keys; i++) {
> +        blk_keys->keys[i] = cpu_to_be64(blk_keys->keys[i]);
> +        memcpy(&buf[8 + i * 8], &blk_keys->keys[i], 8);
> +    }
> +    num_keys = cpu_to_be32(num_keys * 8);
> +    memcpy(&buf[4], &num_keys, 4);
> +
> +    scsi_req_data(&r->req, r->buflen);
> +done:
> +    scsi_req_unref(&r->req);
> +    g_free(blk_keys->keys);
> +    g_free(blk_keys);
> +}
> +
> +static int scsi_disk_emulate_pr_read_keys(SCSIRequest *req)
> +{
> +    SCSIPrReadKeys *blk_keys;
> +    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +    int buflen = MIN(r->req.cmd.xfer, r->buflen);
> +    int num_keys = (buflen - sizeof(uint32_t) * 2) / sizeof(uint64_t);
> +
> +    blk_keys = g_new0(SCSIPrReadKeys, 1);
> +    blk_keys->generation = 0;
> +    /* num_keys is the maximum number of keys that can be transmitted */
> +    blk_keys->num_keys = num_keys;
> +    blk_keys->keys = g_malloc(sizeof(uint64_t) * num_keys);
> +    blk_keys->req = r;
> +
> +    /* The request is used as the AIO opaque value, so add a ref.  */
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_read_keys(s->qdev.conf.blk, 
> &blk_keys->generation,
> +                                        blk_keys->num_keys, blk_keys->keys,
> +                                        scsi_pr_read_keys_complete, 
> blk_keys);
> +    return 0;
> +}
> +
> +static void scsi_pr_read_reservation_complete(void *opaque, int ret)
> +{
> +    uint8_t *buf;
> +    uint32_t additional_len = 0;
> +    SCSIPrReadReservation *blk_rsv = (SCSIPrReadReservation *)opaque;
> +    SCSIDiskReq *r = (SCSIDiskReq *)blk_rsv->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> +    assert(blk_get_aio_context(s->qdev.conf.blk) ==
> +            qemu_get_current_aio_context());
> +
> +    assert(r->req.aiocb != NULL);
> +    r->req.aiocb = NULL;
> +
> +    if (scsi_disk_req_check_error(r, ret, true)) {
> +        goto done;
> +    }
> +
> +    buf = scsi_req_get_buf(&r->req);
> +    blk_rsv->generation = cpu_to_be32(blk_rsv->generation);
> +    memcpy(&buf[0], &blk_rsv->generation, 4);
> +    if (ret) {
> +        additional_len = cpu_to_be32(16);
> +        blk_rsv->key = cpu_to_be64(blk_rsv->key);
> +        memcpy(&buf[8], &blk_rsv->key, 8);
> +        buf[21] = block_pr_type_to_scsi(blk_rsv->type) & 0xf;
> +    } else {
> +        additional_len = cpu_to_be32(0);
> +    }
> +
> +    memcpy(&buf[4], &additional_len, 4);
> +    scsi_req_data(&r->req, r->buflen);
> +
> +done:
> +    scsi_req_unref(&r->req);
> +    g_free(blk_rsv);
> +}
> +
> +static int scsi_disk_emulate_pr_read_reservation(SCSIRequest *req)
> +{
> +    SCSIPrReadReservation *blk_rsv;
> +    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    blk_rsv = g_malloc(sizeof(*blk_rsv));
> +    blk_rsv->generation = 0;
> +    blk_rsv->key = 0;
> +    blk_rsv->type = 0;
> +    blk_rsv->req = r;
> +
> +    /* The request is used as the AIO opaque value, so add a ref.  */
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_read_reservation(s->qdev.conf.blk,
> +                   &blk_rsv->generation, &blk_rsv->key, &blk_rsv->type,
> +                   scsi_pr_read_reservation_complete, blk_rsv);
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_report_capabilities(SCSIRequest *req)
> +{
> +    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +    BlockDriverState *bs = blk_bs(s->qdev.conf.blk);
> +    uint8_t blk_pr_cap = bs->file->bs->bl.pr_cap;

I'm not sure that file->bs is always valid here. Block layer code
typically checks for !bs and !bs->drv.

Also, ->file could be NULL if the SCSI disk points directly to
--blockdev file without a --blockdev raw on top. I think the block layer
should propagate pr_cap from the leaves of the block graph to the root
node via bdrv_merge_limits() so that traversing the graph (->file) is
not necessary. Instead this line should just be bs->bl.pr_cap.

> +    uint8_t *buf = scsi_req_get_buf(req);
> +    uint16_t pr_cap;
> +    /*
> +     * according to response of report_capabilities
> +     * command of chapter 6.13 of spc4
> +     */
> +    int len = sizeof(uint16_t) * 2 + sizeof(uint8_t) * 4;
> +
> +    if (len > r->buflen) {
> +        return -EINVAL;
> +    }
> +
> +    len = cpu_to_be16(len);
> +    memcpy(&buf[0], &len, 2);
> +    /* enable PTPL_C */
> +    buf[2] = 1;
> +    buf[3] = (blk_pr_cap & BLK_PR_CAP_PTPL) ? 1 : 0;
> +    /* enable TMV */
> +    buf[3] |= 1 << 7;
> +
> +    /* enable persistent reservation type */
> +    pr_cap = block_pr_cap_to_scsi(blk_pr_cap);
> +    buf[4] = pr_cap >> 8 & 0xff;
> +    buf[5] = pr_cap & 0xff;
> +
> +    scsi_req_data(&r->req, len);
> +    return 0;
> +}
> +
> +static void scsi_aio_pr_complete(void *opaque, int ret)
> +{
> +    SCSIDiskReq *r = (SCSIDiskReq *)opaque;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> +    /* The request must only run in the BlockBackend's AioContext */
> +    assert(blk_get_aio_context(s->qdev.conf.blk) ==
> +           qemu_get_current_aio_context());
> +
> +    assert(r->req.aiocb != NULL);
> +    r->req.aiocb = NULL;
> +
> +    if (scsi_disk_req_check_error(r, ret, true)) {
> +        goto done;
> +    }
> +
> +    scsi_req_complete(&r->req, GOOD);
> +
> +done:
> +    scsi_req_unref(&r->req);
> +}
> +
> +static int scsi_disk_emulate_pr_register(SCSIDiskReq *r, uint64_t r_key,
> +                                         uint64_t sa_key, SCSIPrType type,
> +                                         uint8_t aptpl, bool ignore_key)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    /* The request is used as the AIO opaque value, so add a ref.  */
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_register(s->qdev.conf.blk, r_key, sa_key,
> +                                       scsi_pr_type_to_block(type),
> +                                       aptpl ? true : false,
> +                                       ignore_key, scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_reserve(SCSIDiskReq *r, uint64_t r_key,
> +                                        SCSIPrType type)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    /* The request is used as the AIO opaque value, so add a ref.  */
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_reserve(s->qdev.conf.blk, r_key,
> +                                      scsi_pr_type_to_block(type),
> +                                      scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_release(SCSIDiskReq *r, uint64_t r_key,
> +                                        SCSIPrType type)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    /* The request is used as the AIO opaque value, so add a ref.  */
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_release(s->qdev.conf.blk, r_key,
> +                                      scsi_pr_type_to_block(type),
> +                                      scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_clear(SCSIDiskReq *r, uint64_t r_key)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    /* The request is used as the AIO opaque value, so add a ref.  */
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_clear(s->qdev.conf.blk, r_key,
> +                                    scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_preempt(SCSIDiskReq *r, uint64_t r_key,
> +                                        uint64_t sa_key, SCSIPrType type,
> +                                        bool abort)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    /* The request is used as the AIO opaque value, so add a ref.  */
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_preempt(s->qdev.conf.blk, r_key, sa_key,
> +                                      scsi_pr_type_to_block(type), abort,
> +                                      scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_in(SCSIRequest *req)
> +{
> +    int rc;
> +    SCSIPrInAction action = req->cmd.buf[1] & 0x1f;
> +
> +    switch (action) {
> +    case SCSI_PR_IN_READ_KEYS:
> +        rc = scsi_disk_emulate_pr_read_keys(req);
> +        break;
> +    case SCSI_PR_IN_READ_RESERVATION:
> +        rc = scsi_disk_emulate_pr_read_reservation(req);
> +        break;
> +    case SCSI_PR_IN_REPORT_CAPABILITIES:
> +        rc = scsi_disk_emulate_pr_report_capabilities(req);
> +        break;
> +    default:
> +        return -ENOTSUP;
> +    }
> +
> +    return rc;
> +}
> +
> +static int scsi_disk_emulate_pr_out(SCSIDiskReq *r, uint8_t *inbuf)
> +{
> +    int rc;
> +    uint8_t aptpl;
> +    uint64_t r_key, sa_key;
> +    SCSIPrOutAction action;
> +    SCSIPrScope scope;
> +    SCSIPrType type;
> +    SCSIRequest *req = &r->req;
> +
> +    memcpy(&r_key, &inbuf[0], 8);
> +    r_key = be64_to_cpu(r_key);
> +    memcpy(&sa_key, &inbuf[8], 8);
> +    sa_key = be64_to_cpu(sa_key);
> +    memcpy(&aptpl, &inbuf[20], 1);
> +    aptpl =  aptpl & 0x01;
> +    action = req->cmd.buf[1] & 0x1f;
> +    scope = (req->cmd.buf[2] >> 4) & 0x0f;
> +    type = req->cmd.buf[2] & 0x0f;
> +
> +    if (scope != SCSI_PR_LU_SCOPE) {
> +        return -ENOTSUP;
> +    }
> +
> +    switch (action) {
> +    case SCSI_PR_OUT_REGISTER:
> +        rc = scsi_disk_emulate_pr_register(r, r_key, sa_key, type,
> +                                           aptpl, false);
> +        break;
> +    case SCSI_PR_OUT_REG_AND_IGNORE_KEY:
> +        rc = scsi_disk_emulate_pr_register(r, r_key, sa_key, type, aptpl, 
> true);
> +        break;
> +    case SCSI_PR_OUT_RESERVE:
> +        rc = scsi_disk_emulate_pr_reserve(r, r_key, type);
> +        break;
> +    case SCSI_PR_OUT_RELEASE:
> +        rc = scsi_disk_emulate_pr_release(r, r_key, type);
> +        break;
> +    case SCSI_PR_OUT_CLEAR:
> +        rc = scsi_disk_emulate_pr_clear(r, r_key);
> +        break;
> +    case SCSI_PR_OUT_PREEMPT:
> +        rc = scsi_disk_emulate_pr_preempt(r, r_key, sa_key, type, false);
> +        break;
> +    case SCSI_PR_OUT_PREEMPT_AND_ABORT:
> +        rc = scsi_disk_emulate_pr_preempt(r, r_key, sa_key, type, true);
> +        break;
> +    default:
> +        return -ENOTSUP;
> +    }
> +
> +    return rc;
> +}
> +
>  static int scsi_disk_check_mode_select(SCSIDiskState *s, int page,
>                                         uint8_t *inbuf, int inlen)
>  {
> @@ -1957,6 +2299,9 @@ static void scsi_disk_emulate_write_data(SCSIRequest 
> *req)
>          scsi_req_complete(&r->req, GOOD);
>          break;
>  
> +    case PERSISTENT_RESERVE_OUT:
> +        scsi_disk_emulate_pr_out(r, r->iov.iov_base);
> +        break;
>      default:
>          abort();
>      }
> @@ -2213,6 +2558,11 @@ static int32_t scsi_disk_emulate_command(SCSIRequest 
> *req, uint8_t *buf)
>      case FORMAT_UNIT:
>          trace_scsi_disk_emulate_command_FORMAT_UNIT(r->req.cmd.xfer);
>          break;
> +    case PERSISTENT_RESERVE_OUT:
> +        break;
> +    case PERSISTENT_RESERVE_IN:
> +        scsi_disk_emulate_pr_in(req);
> +        return 0;
>      default:
>          trace_scsi_disk_emulate_command_UNKNOWN(buf[0],
>                                                  scsi_command_name(buf[0]));
> @@ -2632,6 +2982,8 @@ static const SCSIReqOps *const 
> scsi_disk_reqops_dispatch[256] = {
>      [VERIFY_12]                       = &scsi_disk_emulate_reqops,
>      [VERIFY_16]                       = &scsi_disk_emulate_reqops,
>      [FORMAT_UNIT]                     = &scsi_disk_emulate_reqops,
> +    [PERSISTENT_RESERVE_IN]           = &scsi_disk_emulate_reqops,
> +    [PERSISTENT_RESERVE_OUT]          = &scsi_disk_emulate_reqops,
>  
>      [READ_6]                          = &scsi_disk_dma_reqops,
>      [READ_10]                         = &scsi_disk_dma_reqops,
> -- 
> 2.20.1
> 

Attachment: signature.asc
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]