qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC PATCH 17/18] nbd: Implement NBD_CMD_WRITE_ZEROES o


From: Alex Bligh
Subject: Re: [Qemu-devel] [RFC PATCH 17/18] nbd: Implement NBD_CMD_WRITE_ZEROES on server
Date: Sat, 9 Apr 2016 11:54:51 +0100

On 8 Apr 2016, at 23:05, Eric Blake <address@hidden> wrote:

> RFC because there is still discussion on the NBD list about
> adding an NBD_OPT_ to let the client suggest server defaults
> related to scanning for zeroes during NBD_CMD_WRITE, which may
> tweak this patch.
> 
> Upstream NBD protocol recently added the ability to efficiently
> write zeroes without having to send the zeroes over the wire,
> along with a flag to control whether the client wants a hole.
> 
> Signed-off-by: Eric Blake <address@hidden>
> ---
> include/block/nbd.h |  5 ++++-
> nbd/server.c        | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++---
> 2 files changed, 64 insertions(+), 4 deletions(-)
> 
> diff --git a/include/block/nbd.h b/include/block/nbd.h
> index 4c57754..a1d955c 100644
> --- a/include/block/nbd.h
> +++ b/include/block/nbd.h
> @@ -70,6 +70,7 @@ typedef struct nbd_reply nbd_reply;
> #define NBD_FLAG_SEND_FUA       (1 << 3)        /* Send FUA (Force Unit 
> Access) */
> #define NBD_FLAG_ROTATIONAL     (1 << 4)        /* Use elevator algorithm - 
> rotational media */
> #define NBD_FLAG_SEND_TRIM      (1 << 5)        /* Send TRIM (discard) */
> +#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6)     /* Send WRITE_ZEROES */
> #define NBD_FLAG_SEND_CLOSE     (1 << 8)        /* Send CLOSE */
> 
> /* New-style handshake (global) flags, sent from server to client, and
> @@ -92,7 +93,8 @@ typedef struct nbd_reply nbd_reply;
> #define NBD_REP_ERR_UNKNOWN     ((UINT32_C(1) << 31) | 6) /* Export unknown */
> 
> /* Request flags, sent from client to server during transmission phase */
> -#define NBD_CMD_FLAG_FUA        (1 << 0)
> +#define NBD_CMD_FLAG_FUA        (1 << 0) /* 'force unit access' during write 
> */
> +#define NBD_CMD_FLAG_NO_HOLE    (1 << 1) /* don't punch hole on zero run */
> 
> /* Supported request types */
> enum {
> @@ -101,6 +103,7 @@ enum {
>     NBD_CMD_DISC = 2,
>     NBD_CMD_FLUSH = 3,
>     NBD_CMD_TRIM = 4,
> +    NBD_CMD_WRITE_ZEROES = 5,
>     NBD_CMD_CLOSE = 7,
> };
> 
> diff --git a/nbd/server.c b/nbd/server.c
> index 2a6eaf2..09af915 100644
> --- a/nbd/server.c
> +++ b/nbd/server.c
> @@ -625,7 +625,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData 
> *data)
>     int rc;
>     const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
>                               NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
> -                              NBD_FLAG_SEND_CLOSE);
> +                              NBD_FLAG_SEND_CLOSE |
> +                              NBD_FLAG_SEND_WRITE_ZEROES);
>     bool oldStyle;
>     size_t len;
> 
> @@ -1088,7 +1089,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, 
> struct nbd_request *reque
>         goto out;
>     }
> 
> -    if (request->flags & ~NBD_CMD_FLAG_FUA) {
> +    if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
>         LOG("unsupported flags (got 0x%x)", request->flags);
>         return -EINVAL;
>     }
> @@ -1102,7 +1103,13 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, 
> struct nbd_request *reque
>     TRACE("Decoding type");
> 
>     command = request->type;
> -    if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) {
> +    if (request->flags & NBD_CMD_FLAG_NO_HOLE &&
> +        !(command == NBD_CMD_WRITE || command == NBD_CMD_WRITE_ZEROES)) {
> +        LOG("NO_HOLE flag valid only with write operation");
> +        return -EINVAL;
> +    }
> +    if (command == NBD_CMD_READ || command == NBD_CMD_WRITE ||
> +        command == NBD_CMD_WRITE_ZEROES) {
>         if (request->len > NBD_MAX_BUFFER_SIZE) {
>             LOG("len (%" PRIu32" ) is larger than max len (%u)",
>                 request->len, NBD_MAX_BUFFER_SIZE);
> @@ -1143,6 +1150,7 @@ static void nbd_trip(void *opaque)
>     struct nbd_reply reply;
>     ssize_t ret;
>     uint32_t command;
> +    int flags;
> 
>     TRACE("Reading request.");
>     if (client->closing) {
> @@ -1221,6 +1229,9 @@ static void nbd_trip(void *opaque)
> 
>         TRACE("Writing to device");
> 
> +        /* FIXME: if the client passes NBD_CMD_FLAG_NO_HOLE, can we
> +         * make that override a server that is set to look for
> +         * holes? */

No, and I reckon that's an error in the spec. There is a 'MUST'
which should be a 'SHOULD'. Good luck with any FS which supports
dedupe for instance!

>         ret = blk_write(exp->blk,
>                         (request.from + exp->dev_offset) / BDRV_SECTOR_SIZE,
>                         req->data, request.len / BDRV_SECTOR_SIZE);
> @@ -1243,6 +1254,52 @@ static void nbd_trip(void *opaque)
>             goto out;
>         }
>         break;
> +    case NBD_CMD_WRITE_ZEROES:
> +        TRACE("Request type is WRITE_ZEROES");
> +
> +        if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
> +            TRACE("Server is read-only, return error");
> +            reply.error = EROFS;
> +            goto error_reply;
> +        }
> +
> +        TRACE("Writing to device");
> +
> +        flags = 0;
> +        if (request.flags & NBD_CMD_FLAG_FUA) {
> +            flags |= BDRV_REQ_FUA;
> +        }
> +        if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) {
> +            /* FIXME: should this depend on whether the server is set to
> +               look for holes? */
> +            flags |= BDRV_REQ_MAY_UNMAP;
> +        }
> +        ret = blk_write_zeroes(exp->blk,
> +                               ((request.from + exp->dev_offset) /
> +                                BDRV_SECTOR_SIZE),
> +                               request.len / BDRV_SECTOR_SIZE,
> +                               flags);
> +        if (ret < 0) {
> +            LOG("writing to file failed");
> +            reply.error = -ret;
> +            goto error_reply;
> +        }
> +
> +        /* FIXME: do we need FUA flush here, if we also passed it to
> +         * blk_write_zeroes? */

I don't think so. blk_write_zeroes with BDRV_REQ_FUA should
make *that* request access the media, but FLUSH makes ALL PREVIOUS
requests access the media, so you are doing too much work here
I think.

> +        if (request.flags & NBD_CMD_FLAG_FUA) {
> +            ret = blk_co_flush(exp->blk);
> +            if (ret < 0) {
> +                LOG("flush failed");
> +                reply.error = -ret;
> +                goto error_reply;
> +            }
> +        }
> +
> +        if (nbd_co_send_reply(req, &reply, 0) < 0) {
> +            goto out;
> +        }
> +        break;
>     case NBD_CMD_DISC:
>         TRACE("Request type is DISCONNECT");
>         goto out;
> -- 
> 2.5.5
> 
> 

-- 
Alex Bligh







reply via email to

[Prev in Thread] Current Thread [Next in Thread]