qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v8 1/4] quorum: Add the rewrite-corrupted parame


From: Benoît Canet
Subject: Re: [Qemu-devel] [PATCH v8 1/4] quorum: Add the rewrite-corrupted parameter to quorum.
Date: Tue, 10 Jun 2014 09:25:04 +0200
User-agent: Mutt/1.5.21 (2010-09-15)

The Tuesday 10 Jun 2014 à 09:21:43 (+0200), Benoît Canet wrote :

Oh i again submitted a commit subject ending with a dot I will remove theses.

> On read operations when this parameter is set and some replicas are corrupted
> while quorum can be reached quorum will proceed to rewrite the correct version
> of the data to fix the corrupted replicas.
> 
> This will shine with SSD where the FTL will remap the same block at another
> place on rewrite.
> 
> Signed-off-by: Benoit Canet <address@hidden>
> ---
>  block/quorum.c             | 97 
> +++++++++++++++++++++++++++++++++++++++++++---
>  qapi/block-core.json       |  5 ++-
>  tests/qemu-iotests/081     | 15 ++++++-
>  tests/qemu-iotests/081.out | 10 +++++
>  4 files changed, 119 insertions(+), 8 deletions(-)
> 
> diff --git a/block/quorum.c b/block/quorum.c
> index 426077a..1a7a01e 100644
> --- a/block/quorum.c
> +++ b/block/quorum.c
> @@ -22,6 +22,7 @@
>  
>  #define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
>  #define QUORUM_OPT_BLKVERIFY      "blkverify"
> +#define QUORUM_OPT_REWRITE        "rewrite-corrupted"
>  
>  /* This union holds a vote hash value */
>  typedef union QuorumVoteValue {
> @@ -69,6 +70,9 @@ typedef struct BDRVQuorumState {
>                              * It is useful to debug other block drivers by
>                              * comparing them with a reference one.
>                              */
> +    bool rewrite_corrupted;/* true if the driver must rewrite-on-read 
> corrupted
> +                            * block if Quorum is reached.
> +                            */
>  } BDRVQuorumState;
>  
>  typedef struct QuorumAIOCB QuorumAIOCB;
> @@ -104,13 +108,17 @@ struct QuorumAIOCB {
>      int count;                  /* number of completed AIOCB */
>      int success_count;          /* number of successfully completed AIOCB */
>  
> +    int rewrite_count;          /* number of replica to rewrite: count down 
> to
> +                                 * zero once writes are fired
> +                                 */
> +
>      QuorumVotes votes;
>  
>      bool is_read;
>      int vote_ret;
>  };
>  
> -static void quorum_vote(QuorumAIOCB *acb);
> +static bool quorum_vote(QuorumAIOCB *acb);
>  
>  static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
>  {
> @@ -182,6 +190,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
>      acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
>      acb->count = 0;
>      acb->success_count = 0;
> +    acb->rewrite_count = 0;
>      acb->votes.compare = quorum_sha256_compare;
>      QLIST_INIT(&acb->votes.vote_list);
>      acb->is_read = false;
> @@ -241,11 +250,27 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB 
> *acb)
>      return false;
>  }
>  
> +static void quorum_rewrite_aio_cb(void *opaque, int ret)
> +{
> +    QuorumAIOCB *acb = opaque;
> +
> +    /* one less rewrite to do */
> +    acb->rewrite_count--;
> +
> +    /* wait until all rewrite callbacks have completed */
> +    if (acb->rewrite_count) {
> +        return;
> +    }
> +
> +    quorum_aio_finalize(acb);
> +}
> +
>  static void quorum_aio_cb(void *opaque, int ret)
>  {
>      QuorumChildRequest *sacb = opaque;
>      QuorumAIOCB *acb = sacb->parent;
>      BDRVQuorumState *s = acb->common.bs->opaque;
> +    bool rewrite = false;
>  
>      sacb->ret = ret;
>      acb->count++;
> @@ -262,12 +287,15 @@ static void quorum_aio_cb(void *opaque, int ret)
>  
>      /* Do the vote on read */
>      if (acb->is_read) {
> -        quorum_vote(acb);
> +        rewrite = quorum_vote(acb);
>      } else {
>          quorum_has_too_much_io_failed(acb);
>      }
>  
> -    quorum_aio_finalize(acb);
> +    /* if no rewrite is done the code will finish right away */
> +    if (!rewrite) {
> +        quorum_aio_finalize(acb);
> +    }
>  }
>  
>  static void quorum_report_bad_versions(BDRVQuorumState *s,
> @@ -287,6 +315,43 @@ static void quorum_report_bad_versions(BDRVQuorumState 
> *s,
>      }
>  }
>  
> +static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
> +                                        QuorumVoteValue *value)
> +{
> +    QuorumVoteVersion *version;
> +    QuorumVoteItem *item;
> +    int count = 0;
> +
> +    /* first count the number of bad versions: done first to avoid 
> concurrency
> +     * issues.
> +     */
> +    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
> +        if (acb->votes.compare(&version->value, value)) {
> +            continue;
> +        }
> +        QLIST_FOREACH(item, &version->items, next) {
> +            count++;
> +        }
> +    }
> +
> +    /* quorum_rewrite_aio_cb will count down this to zero */
> +    acb->rewrite_count = count;
> +
> +    /* now fire the correcting rewrites */
> +    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
> +        if (acb->votes.compare(&version->value, value)) {
> +            continue;
> +        }
> +        QLIST_FOREACH(item, &version->items, next) {
> +            bdrv_aio_writev(s->bs[item->index], acb->sector_num, acb->qiov,
> +                            acb->nb_sectors, quorum_rewrite_aio_cb, acb);
> +        }
> +    }
> +
> +    /* return true if any rewrite is done else false */
> +    return count;
> +}
> +
>  static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
>  {
>      int i;
> @@ -477,16 +542,17 @@ static int quorum_vote_error(QuorumAIOCB *acb)
>      return ret;
>  }
>  
> -static void quorum_vote(QuorumAIOCB *acb)
> +static bool quorum_vote(QuorumAIOCB *acb)
>  {
>      bool quorum = true;
> +    bool rewrite = false;
>      int i, j, ret;
>      QuorumVoteValue hash;
>      BDRVQuorumState *s = acb->common.bs->opaque;
>      QuorumVoteVersion *winner;
>  
>      if (quorum_has_too_much_io_failed(acb)) {
> -        return;
> +        return false;
>      }
>  
>      /* get the index of the first successful read */
> @@ -514,7 +580,7 @@ static void quorum_vote(QuorumAIOCB *acb)
>      /* Every successful read agrees */
>      if (quorum) {
>          quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
> -        return;
> +        return false;
>      }
>  
>      /* compute hashes for each successful read, also store indexes */
> @@ -547,9 +613,15 @@ static void quorum_vote(QuorumAIOCB *acb)
>      /* some versions are bad print them */
>      quorum_report_bad_versions(s, acb, &winner->value);
>  
> +    /* corruption correction is enabled */
> +    if (s->rewrite_corrupted) {
> +        rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value);
> +    }
> +
>  free_exit:
>      /* free lists */
>      quorum_free_vote_list(&acb->votes);
> +    return rewrite;
>  }
>  
>  static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
> @@ -714,6 +786,11 @@ static QemuOptsList quorum_runtime_opts = {
>              .type = QEMU_OPT_BOOL,
>              .help = "Trigger block verify mode if set",
>          },
> +        {
> +            .name = QUORUM_OPT_REWRITE,
> +            .type = QEMU_OPT_BOOL,
> +            .help = "Rewrite corrupted block on read quorum",
> +        },
>          { /* end of list */ }
>      },
>  };
> @@ -775,6 +852,14 @@ static int quorum_open(BlockDriverState *bs, QDict 
> *options, int flags,
>                  "and using two files with vote_threshold=2\n");
>      }
>  
> +    s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE, 
> false);
> +    if (s->rewrite_corrupted && s->is_blkverify) {
> +        error_setg(&local_err,
> +                   "rewrite-corrupted=on cannot be used with blkverify=on");
> +        ret = -EINVAL;
> +        goto exit;
> +    }
> +
>      /* allocate the children BlockDriverState array */
>      s->bs = g_new0(BlockDriverState *, s->num_children);
>      opened = g_new0(bool, s->num_children);
> diff --git a/qapi/block-core.json b/qapi/block-core.json
> index 33bd93f..eec881b 100644
> --- a/qapi/block-core.json
> +++ b/qapi/block-core.json
> @@ -1333,12 +1333,15 @@
>  #
>  # @vote-threshold: the vote limit under which a read will fail
>  #
> +# @rewrite-corrupted: #optional rewrite corrupted data when quorum is reached
> +#                     (Since 2.1)
> +#
>  # Since: 2.0
>  ##
>  { 'type': 'BlockdevOptionsQuorum',
>    'data': { '*blkverify': 'bool',
>              'children': [ 'BlockdevRef' ],
> -            'vote-threshold': 'int' } }
> +            'vote-threshold': 'int', '*rewrite-corrupted': 'bool' } }
>  
>  ##
>  # @BlockdevOptions
> diff --git a/tests/qemu-iotests/081 b/tests/qemu-iotests/081
> index b512d00..7ae4be2 100755
> --- a/tests/qemu-iotests/081
> +++ b/tests/qemu-iotests/081
> @@ -134,15 +134,28 @@ run_qemu -drive 
> "file=$TEST_DIR/2.raw,format=$IMGFMT,if=none,id=drive2" <<EOF
>  EOF
>  
>  echo
> +echo "== using quorum rewrite corrupted mode =="
> +
> +quorum="$quorum,file.rewrite-corrupted=on"
> +
> +$QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
> +
> +echo
> +echo "== checking that quorum has corrected the corrupted file =="
> +
> +$QEMU_IO -c "read -P 0x32 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io
> +
> +echo
>  echo "== breaking quorum =="
>  
>  $QEMU_IO -c "write -P 0x41 0 $size" "$TEST_DIR/1.raw" | _filter_qemu_io
> +$QEMU_IO -c "write -P 0x42 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io
> +
>  echo
>  echo "== checking that quorum is broken =="
>  
>  $QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
>  
> -
>  # success, all done
>  echo "*** done"
>  rm -f $seq.full
> diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out
> index 84aeb0c..1629d2a 100644
> --- a/tests/qemu-iotests/081.out
> +++ b/tests/qemu-iotests/081.out
> @@ -40,9 +40,19 @@ read 10485760/10485760 bytes at offset 0
>  {"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": 
> "DEVICE_TRAY_MOVED", "data": {"device": "floppy0", "tray-open": true}}
>  
>  
> +== using quorum rewrite corrupted mode ==
> +read 10485760/10485760 bytes at offset 0
> +10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> +
> +== checking that quorum has corrected the corrupted file ==
> +read 10485760/10485760 bytes at offset 0
> +10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> +
>  == breaking quorum ==
>  wrote 10485760/10485760 bytes at offset 0
>  10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> +wrote 10485760/10485760 bytes at offset 0
> +10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
>  
>  == checking that quorum is broken ==
>  qemu-io: can't open device (null): Could not read image for determining its 
> format: Input/output error
> -- 
> 1.9.1
> 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]