qemu-block
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-block] [RFC PATCH 17/18] nbd: Implement NBD_CMD_WRITE_ZEROES on se


From: Eric Blake
Subject: [Qemu-block] [RFC PATCH 17/18] nbd: Implement NBD_CMD_WRITE_ZEROES on server
Date: Fri, 8 Apr 2016 16:05:57 -0600

RFC because there is still discussion on the NBD list about
adding an NBD_OPT_ to let the client suggest server defaults
related to scanning for zeroes during NBD_CMD_WRITE, which may
tweak this patch.

Upstream NBD protocol recently added the ability to efficiently
write zeroes without having to send the zeroes over the wire,
along with a flag to control whether the client wants a hole.

Signed-off-by: Eric Blake <address@hidden>
---
 include/block/nbd.h |  5 ++++-
 nbd/server.c        | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 64 insertions(+), 4 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index 4c57754..a1d955c 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -70,6 +70,7 @@ typedef struct nbd_reply nbd_reply;
 #define NBD_FLAG_SEND_FUA       (1 << 3)        /* Send FUA (Force Unit 
Access) */
 #define NBD_FLAG_ROTATIONAL     (1 << 4)        /* Use elevator algorithm - 
rotational media */
 #define NBD_FLAG_SEND_TRIM      (1 << 5)        /* Send TRIM (discard) */
+#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6)     /* Send WRITE_ZEROES */
 #define NBD_FLAG_SEND_CLOSE     (1 << 8)        /* Send CLOSE */

 /* New-style handshake (global) flags, sent from server to client, and
@@ -92,7 +93,8 @@ typedef struct nbd_reply nbd_reply;
 #define NBD_REP_ERR_UNKNOWN     ((UINT32_C(1) << 31) | 6) /* Export unknown */

 /* Request flags, sent from client to server during transmission phase */
-#define NBD_CMD_FLAG_FUA        (1 << 0)
+#define NBD_CMD_FLAG_FUA        (1 << 0) /* 'force unit access' during write */
+#define NBD_CMD_FLAG_NO_HOLE    (1 << 1) /* don't punch hole on zero run */

 /* Supported request types */
 enum {
@@ -101,6 +103,7 @@ enum {
     NBD_CMD_DISC = 2,
     NBD_CMD_FLUSH = 3,
     NBD_CMD_TRIM = 4,
+    NBD_CMD_WRITE_ZEROES = 5,
     NBD_CMD_CLOSE = 7,
 };

diff --git a/nbd/server.c b/nbd/server.c
index 2a6eaf2..09af915 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -625,7 +625,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData 
*data)
     int rc;
     const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
                               NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
-                              NBD_FLAG_SEND_CLOSE);
+                              NBD_FLAG_SEND_CLOSE |
+                              NBD_FLAG_SEND_WRITE_ZEROES);
     bool oldStyle;
     size_t len;

@@ -1088,7 +1089,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, 
struct nbd_request *reque
         goto out;
     }

-    if (request->flags & ~NBD_CMD_FLAG_FUA) {
+    if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
         LOG("unsupported flags (got 0x%x)", request->flags);
         return -EINVAL;
     }
@@ -1102,7 +1103,13 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, 
struct nbd_request *reque
     TRACE("Decoding type");

     command = request->type;
-    if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) {
+    if (request->flags & NBD_CMD_FLAG_NO_HOLE &&
+        !(command == NBD_CMD_WRITE || command == NBD_CMD_WRITE_ZEROES)) {
+        LOG("NO_HOLE flag valid only with write operation");
+        return -EINVAL;
+    }
+    if (command == NBD_CMD_READ || command == NBD_CMD_WRITE ||
+        command == NBD_CMD_WRITE_ZEROES) {
         if (request->len > NBD_MAX_BUFFER_SIZE) {
             LOG("len (%" PRIu32" ) is larger than max len (%u)",
                 request->len, NBD_MAX_BUFFER_SIZE);
@@ -1143,6 +1150,7 @@ static void nbd_trip(void *opaque)
     struct nbd_reply reply;
     ssize_t ret;
     uint32_t command;
+    int flags;

     TRACE("Reading request.");
     if (client->closing) {
@@ -1221,6 +1229,9 @@ static void nbd_trip(void *opaque)

         TRACE("Writing to device");

+        /* FIXME: if the client passes NBD_CMD_FLAG_NO_HOLE, can we
+         * make that override a server that is set to look for
+         * holes? */
         ret = blk_write(exp->blk,
                         (request.from + exp->dev_offset) / BDRV_SECTOR_SIZE,
                         req->data, request.len / BDRV_SECTOR_SIZE);
@@ -1243,6 +1254,52 @@ static void nbd_trip(void *opaque)
             goto out;
         }
         break;
+    case NBD_CMD_WRITE_ZEROES:
+        TRACE("Request type is WRITE_ZEROES");
+
+        if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
+            TRACE("Server is read-only, return error");
+            reply.error = EROFS;
+            goto error_reply;
+        }
+
+        TRACE("Writing to device");
+
+        flags = 0;
+        if (request.flags & NBD_CMD_FLAG_FUA) {
+            flags |= BDRV_REQ_FUA;
+        }
+        if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) {
+            /* FIXME: should this depend on whether the server is set to
+               look for holes? */
+            flags |= BDRV_REQ_MAY_UNMAP;
+        }
+        ret = blk_write_zeroes(exp->blk,
+                               ((request.from + exp->dev_offset) /
+                                BDRV_SECTOR_SIZE),
+                               request.len / BDRV_SECTOR_SIZE,
+                               flags);
+        if (ret < 0) {
+            LOG("writing to file failed");
+            reply.error = -ret;
+            goto error_reply;
+        }
+
+        /* FIXME: do we need FUA flush here, if we also passed it to
+         * blk_write_zeroes? */
+        if (request.flags & NBD_CMD_FLAG_FUA) {
+            ret = blk_co_flush(exp->blk);
+            if (ret < 0) {
+                LOG("flush failed");
+                reply.error = -ret;
+                goto error_reply;
+            }
+        }
+
+        if (nbd_co_send_reply(req, &reply, 0) < 0) {
+            goto out;
+        }
+        break;
     case NBD_CMD_DISC:
         TRACE("Request type is DISCONNECT");
         goto out;
-- 
2.5.5




reply via email to

[Prev in Thread] Current Thread [Next in Thread]