qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 2/3] block: Add drive-mirror-replace command to


From: Max Reitz
Subject: Re: [Qemu-devel] [PATCH 2/3] block: Add drive-mirror-replace command to repair quorum files.
Date: Tue, 11 Mar 2014 22:00:29 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Thunderbird/24.3.0

On 11.03.2014 17:36, Benoît Canet wrote:
When a quorum file is totally destroyed (broken filer) the user can start a

*file

drive-mirror job on the quorum block backend and then replace the broken
quorum file with drive-mirror-replace given it has a node-name.

Signed-off-by: Benoit Canet <address@hidden>
---
  block.c                   |   6 +--
  block/mirror.c            | 115 ++++++++++++++++++++++++++++++++++++++++++++--
  blockdev.c                |  27 +++++++++++
  include/block/block.h     |   3 ++
  include/block/block_int.h |  15 ++++++
  qapi-schema.json          |  33 +++++++++++++
  qmp-commands.hx           |   5 ++
  trace-events              |   1 +
  8 files changed, 199 insertions(+), 6 deletions(-)

diff --git a/block.c b/block.c
index 9f2fe85..2354e5b 100644
--- a/block.c
+++ b/block.c
@@ -787,9 +787,9 @@ static int bdrv_open_flags(BlockDriverState *bs, int flags)
      return open_flags;
  }
-static int bdrv_assign_node_name(BlockDriverState *bs,
-                                 const char *node_name,
-                                 Error **errp)
+int bdrv_assign_node_name(BlockDriverState *bs,
+                          const char *node_name,
+                          Error **errp)
  {

Before this patch, there was only a single caller to this function, and it (obviously, due to the "static") resided in block.c as well (it was part of bdrv_open_common(), to be precise). This caller used this function to assign a node name to a previously unnamed BDS. With this patch, there is a new caller (mirror_activate_replace_target()) and apparently, the BDS it uses this function on is unnamed as well (the call to bdrv_open_backing_file() in mirror_complete() could have named it, but there are not options given, thus, it will remain unnamed).

The point why I'm bringing this up is that bdrv_assign_node_name() always adds the BDS to the graph_bdrv_states list if a new name is given. Therefore, if the BDS was already named, it will be twice in that list. This is now not an issue, as the BDS will have been unnamed in any case before bdrv_assign_node_name() was called. I'd however prefer some assertion in that latter function that the BDS will not be added twice to the list; I guess a simple assertion that it is unnamed (bs->node_name[0] == '\0') will suffice.

      if (!node_name) {
          return 0;
diff --git a/block/mirror.c b/block/mirror.c
index 6dc84e8..8133ca5 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -49,6 +49,15 @@ typedef struct MirrorBlockJob {
      unsigned long *in_flight_bitmap;
      int in_flight;
      int ret;
+    /* these four fields are used by drive-mirror-replace */
+    /* Must the code replace a target with the new mirror */

Hm, probably better "The code must/has to/should replace a target with the new mirror".

+    bool must_replace;
+    /* The block BDS to replace */
+    BlockDriverState *to_replace;
+    /* the node-name of the new mirror BDS */
+    char *new_node_name;
+    /* Used to block operations on the drive-mirror-replace target. */
+    Error *replace_blocker;
  } MirrorBlockJob;
typedef struct MirrorOp {
@@ -299,6 +308,7 @@ static void coroutine_fn mirror_run(void *opaque)
      MirrorBlockJob *s = opaque;
      BlockDriverState *bs = s->common.bs;
      int64_t sector_num, end, sectors_per_chunk, length;
+    BlockDriverState *to_replace;
      uint64_t last_pause_ns;
      BlockDriverInfo bdi;
      char backing_filename[1024];
@@ -477,11 +487,17 @@ immediate_exit:
      g_free(s->in_flight_bitmap);
      bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
      bdrv_iostatus_disable(s->target);
+    /* Here we handle the drive-mirror-replace QMP command */
+    if (s->must_replace) {
+        to_replace = s->to_replace;
+    } else {
+        to_replace = s->common.bs;
+    }
      if (s->should_complete && ret == 0) {
-        if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
-            bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
+        if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
+            bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
          }
-        bdrv_swap(s->target, s->common.bs);
+        bdrv_swap(s->target, to_replace);
          if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
              /* drop the bs loop chain formed by the swap: break the loop then
               * trigger the unref from the top one */
@@ -491,6 +507,11 @@ immediate_exit:
              bdrv_unref(p);
          }
      }
+    if (s->must_replace) {
+        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
+        error_free(s->replace_blocker);
+        bdrv_unref(s->to_replace);
+    }
      bdrv_unref(s->target);
      block_job_completed(&s->common, ret);
  }
@@ -513,6 +534,87 @@ static void mirror_iostatus_reset(BlockJob *job)
      bdrv_iostatus_reset(s->target);
  }
+bool mirror_set_replace_target(BlockJob *job, const char *reference,
+                               bool has_new_node_name,
+                               const char *new_node_name, Error **errp)
+{
+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+    BlockDriverState *to_replace;
+
+    /* We don't want to give too much power to the user as could result on

*as this could result in

+     * BlockDriverState loops if used with something other than sync=full.
+     */
+    if (s->is_none_mode || s->base) {
+        error_setg(errp, "Can only be used on a mirror done with sync=full");
+        return false;
+    }
+
+    /* check target reference is not an empty string */

*check that the target reference

+    if (!reference[0]) {
+        error_setg(errp, "target-reference is an empty string");
+        return false;
+    }
+
+    /* Get the to replace block driver state */

It's correct, but I find it hard to understand at the first read. I think "Get the block driver state to be replaced" would be easier.

+    to_replace = bdrv_lookup_bs(reference, reference, errp);
+    if (!to_replace) {
+        return false;
+    }
+
+    /* If the BDS to replace is a regular node we need a new node name */

Likewise, s/to replace/to be replaced/?

+    if (to_replace->node_name[0] && !has_new_node_name) {
+        error_setg(errp, "A new-node-name must be provided");
+        return false;
+    }
+
+    /* Can only replace something else than the source of the mirror */
+    if (to_replace == job->bs) {
+        error_setg(errp, "Cannot replace the mirror source");
+        return false;
+    }
+
+    /* are this bs replace operation blocked */

s/are/is/

+    if (bdrv_op_is_blocked(to_replace, BLOCK_OP_TYPE_REPLACE, errp)) {
+        return false;
+    }
+
+    /* save usefull infos for later */

*useful

+    s->to_replace = to_replace;
+    assert(has_new_node_name);
+    s->new_node_name = g_strdup(new_node_name);
+
+    return true;
+}
+
+static void mirror_activate_replace_target(BlockJob *job, Error **errp)
+{
+    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+
+    /* Set the new node name if the BDS to replace is a regular node
+     * of the graph.
+     */
+    if (s->to_replace->node_name[0]) {
+        assert(s->new_node_name);
+        bdrv_assign_node_name(s->target, s->new_node_name, errp);
+        g_free(s->new_node_name);
+    }

Is s->new_node_name leaked if !s->to_replace->node_name[0]?

+
+    if (error_is_set(errp)) {
+        s->to_replace = NULL;
+        return;
+    }
+
+    /* block all operations on the target bs */
+    error_setg(&s->replace_blocker,
+               "block device is in use by drive-mirror-replace");
+    bdrv_op_block_all(s->to_replace, s->replace_blocker);
+
+    bdrv_ref(s->to_replace);
+
+    /* activate the replacement operation */
+    s->must_replace = true;
+}
+
  static void mirror_complete(BlockJob *job, Error **errp)
  {
      MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
@@ -529,6 +631,13 @@ static void mirror_complete(BlockJob *job, Error **errp)
          return;
      }
+ /* drive-mirror-replace is being called on this job so activate the
+     * replacement target
+     */
+    if (s->to_replace) {
+        mirror_activate_replace_target(job, errp);
+    }
+
      s->should_complete = true;
      block_job_resume(job);
  }
diff --git a/blockdev.c b/blockdev.c
index 8a6ae0a..901a05d 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2365,6 +2365,33 @@ void qmp_block_job_complete(const char *device, Error 
**errp)
      block_job_complete(job, errp);
  }
+void qmp_drive_mirror_replace(const char *device, const char *target_reference,
+                              bool has_new_node_name,
+                              const char *new_node_name,
+                              Error **errp)
+{
+    BlockJob *job = find_block_job(device);
+
+    if (!job) {
+        error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
+        return;
+    }
+
+    if (!job->driver || job->driver->job_type != BLOCK_JOB_TYPE_MIRROR) {
+        error_setg(errp, "Can only be used on a drive-mirror block job");
+        return;
+    }
+
+    if (!mirror_set_replace_target(job, target_reference, has_new_node_name,
+                                   new_node_name, errp)) {
+        return;
+    }
+
+    trace_qmp_drive_mirror_replace(job, target_reference,
+                                   has_new_node_name ? new_node_name : "");
+    block_job_complete(job, errp);
+}
+
  void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
  {
      QmpOutputVisitor *ov = qmp_output_visitor_new();
diff --git a/include/block/block.h b/include/block/block.h
index ee1582d..a9d6ead 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -171,6 +171,7 @@ typedef enum BlockOpType {
      BLOCK_OP_TYPE_MIRROR,
      BLOCK_OP_TYPE_RESIZE,
      BLOCK_OP_TYPE_STREAM,
+    BLOCK_OP_TYPE_REPLACE,
      BLOCK_OP_TYPE_MAX,
  } BlockOpType;
@@ -209,6 +210,8 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename,
                      QDict *options, const char *bdref_key, int flags,
                      bool allow_none, Error **errp);
  void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
+int bdrv_assign_node_name(BlockDriverState *bs, const char *node_name,
+                          Error **errp);
  int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error 
**errp);
  int bdrv_open(BlockDriverState **pbs, const char *filename,
                const char *reference, QDict *options, int flags,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 1f4f78b..ea281c8 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -486,6 +486,21 @@ void mirror_start(BlockDriverState *bs, BlockDriverState 
*target,
                    void *opaque, Error **errp);
/*
+ * mirror_set_replace_target:
+ * @job: An active mirroring block job started with sync=full.
+ * @reference: id or node-name of the BDS to replace when the mirror is done.
+ * @has_new_node_name: Set to true if new_node_name if provided
+ * @new_node_name: The optional new node name of the new mirror.
+ * @errp: Error object.
+ *
+ * Prepare a mirroring operation to replace a BDS pointed to by reference with
+ * the new mirror.
+ */
+bool mirror_set_replace_target(BlockJob *job, const char *reference,
+                               bool has_new_node_name,
+                               const char *new_node_name, Error **errp);
+
+/*
   * backup_start:
   * @bs: Block device to operate on.
   * @target: Block device to write to.
diff --git a/qapi-schema.json b/qapi-schema.json
index f5a8767..33c5ab1 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -2716,6 +2716,39 @@
  { 'command': 'block-job-complete', 'data': { 'device': 'str' } }
##
+# @drive-mirror-replace:
+#
+# Manually trigger completion of an active background drive-mirror operation
+# and replace the target reference with the new mirror.
+# This switch the device to write to the target path only.

*switches

+# The ability to complete is signaled with a BLOCK_JOB_READY event.
+#
+# This command completes an active drive-mirror background operation
+# synchronously and replace the target reference with the mirror.

*replaces

Max

+# The ordering of this command's return with the BLOCK_JOB_COMPLETED event
+# is not defined.  Note that if an I/O error occurs during the processing of
+# this command: 1) the command itself will fail; 2) the error will be processed
+# according to the rerror/werror arguments that were specified when starting
+# the operation.
+#
+# A cancelled or paused drive-mirror job cannot be completed.
+#
+# @device:           the device name
+# @target-reference: the id or node name of the block driver state to replace
+# @new-node-name:    #optional set the node-name of the new block driver state
+#                    needed the target reference point to a regular node of the
+#                    graph
+#
+# Returns: Nothing on success
+#          If no background operation is active on this device, DeviceNotActive
+#
+# Since: 2.1
+##
+{ 'command': 'drive-mirror-replace',
+  'data': { 'device': 'str', 'target-reference': 'str',
+            '*new-node-name': 'str' } }
+
+##
  # @ObjectTypeInfo:
  #
  # This structure describes a search result from @qom-list-types
diff --git a/qmp-commands.hx b/qmp-commands.hx
index dd336f7..3b5b382 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -1150,6 +1150,11 @@ EQMP
          .mhandler.cmd_new = qmp_marshal_input_block_job_complete,
      },
      {
+        .name       = "drive-mirror-replace",
+        .args_type  = "device:B,target-reference:s,new-node-name:s?",
+        .mhandler.cmd_new = qmp_marshal_input_drive_mirror_replace,
+    },
+    {
          .name       = "transaction",
          .args_type  = "actions:q",
          .mhandler.cmd_new = qmp_marshal_input_transaction,
diff --git a/trace-events b/trace-events
index aec4202..5282904 100644
--- a/trace-events
+++ b/trace-events
@@ -103,6 +103,7 @@ qmp_block_job_cancel(void *job) "job %p"
  qmp_block_job_pause(void *job) "job %p"
  qmp_block_job_resume(void *job) "job %p"
  qmp_block_job_complete(void *job) "job %p"
+qmp_drive_mirror_replace(void *job, const char *target_reference, const char 
*new_node_name) "job %p target_reference %s new_node_name %s"
  block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
  qmp_block_stream(void *bs, void *job) "bs %p job %p"




reply via email to

[Prev in Thread] Current Thread [Next in Thread]