[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 1/3] quorum: Add the rewrite-corrupted parameter
From: |
Benoît Canet |
Subject: |
Re: [Qemu-devel] [PATCH 1/3] quorum: Add the rewrite-corrupted parameter to quorum. |
Date: |
Tue, 11 Mar 2014 21:14:05 +0100 |
User-agent: |
Mutt/1.5.21 (2010-09-15) |
The Tuesday 11 Mar 2014 à 20:58:06 (+0100), Max Reitz wrote :
> On 11.03.2014 17:36, Benoît Canet wrote:
> >On read operations when this parameter is set and some replicas are corrupted
> >while quorum can be reached quorum will proceed to rewrite the correct
> >version
> >of the data to fix the corrupted replicas.
> >
> >This will shine with SSD where the FTL will remap the same block at another
> >place on rewrite.
> >
> >Signed-off-by: Benoit Canet <address@hidden>
> >---
> > block/quorum.c | 97
> > +++++++++++++++++++++++++++++++++++++++++++---
> > qapi-schema.json | 5 ++-
> > tests/qemu-iotests/081 | 14 +++++++
> > tests/qemu-iotests/081.out | 11 +++++-
> > 4 files changed, 119 insertions(+), 8 deletions(-)
> >
> >diff --git a/block/quorum.c b/block/quorum.c
> >index bd997b7..af4fd3c 100644
> >--- a/block/quorum.c
> >+++ b/block/quorum.c
> >@@ -22,6 +22,7 @@
> > #define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
> > #define QUORUM_OPT_BLKVERIFY "blkverify"
> >+#define QUORUM_OPT_REWRITE "rewrite-corrupted"
> > /* This union holds a vote hash value */
> > typedef union QuorumVoteValue {
> >@@ -69,6 +70,9 @@ typedef struct BDRVQuorumState {
> > * It is useful to debug other block drivers by
> > * comparing them with a reference one.
> > */
> >+ bool rewrite_corrupted;/* true if the driver must rewrite-on-read
> >corrupted
> >+ * block if Quorum is reached.
> >+ */
> > } BDRVQuorumState;
> > typedef struct QuorumAIOCB QuorumAIOCB;
> >@@ -104,13 +108,17 @@ struct QuorumAIOCB {
> > int count; /* number of completed AIOCB */
> > int success_count; /* number of successfully completed AIOCB
> > */
> >+ int rewrite_count; /* number of replica to rewrite: count down
> >to
> >+ * zero once writes are fired
> >+ */
> >+
> > QuorumVotes votes;
> > bool is_read;
> > int vote_ret;
> > };
> >-static void quorum_vote(QuorumAIOCB *acb);
> >+static bool quorum_vote(QuorumAIOCB *acb);
> > static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
> > {
> >@@ -182,6 +190,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
> > acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
> > acb->count = 0;
> > acb->success_count = 0;
> >+ acb->rewrite_count = 0;
> > acb->votes.compare = quorum_sha256_compare;
> > QLIST_INIT(&acb->votes.vote_list);
> > acb->is_read = false;
> >@@ -241,11 +250,27 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB
> >*acb)
> > return false;
> > }
> >+static void quorum_rewrite_aio_cb(void *opaque, int ret)
> >+{
> >+ QuorumAIOCB *acb = opaque;
> >+
> >+ /* one less rewrite to do */
> >+ acb->rewrite_count--;
> >+
> >+ /* wait until all rewrite callback have completed */
>
> *callbacks
>
> >+ if (acb->rewrite_count) {
> >+ return;
> >+ }
> >+
> >+ quorum_aio_finalize(acb);
> >+}
> >+
> > static void quorum_aio_cb(void *opaque, int ret)
> > {
> > QuorumChildRequest *sacb = opaque;
> > QuorumAIOCB *acb = sacb->parent;
> > BDRVQuorumState *s = acb->common.bs->opaque;
> >+ bool rewrite = false;
> > sacb->ret = ret;
> > acb->count++;
> >@@ -262,12 +287,15 @@ static void quorum_aio_cb(void *opaque, int ret)
> > /* Do the vote on read */
> > if (acb->is_read) {
> >- quorum_vote(acb);
> >+ rewrite = quorum_vote(acb);
> > } else {
> > quorum_has_too_much_io_failed(acb);
> > }
> >- quorum_aio_finalize(acb);
> >+ /* if no rewrite is done the code will finish right away */
> >+ if (!rewrite) {
> >+ quorum_aio_finalize(acb);
> >+ }
> > }
> > static void quorum_report_bad_versions(BDRVQuorumState *s,
> >@@ -287,6 +315,43 @@ static void quorum_report_bad_versions(BDRVQuorumState
> >*s,
> > }
> > }
> >+static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB
> >*acb,
> >+ QuorumVoteValue *value)
> >+{
> >+ QuorumVoteVersion *version;
> >+ QuorumVoteItem *item;
> >+ int count = 0;
> >+
> >+ /* first count the number of bad versions: done first to avoid
> >concurency
>
> *concurrency
>
> >+ * issues.
> >+ */
> >+ QLIST_FOREACH(version, &acb->votes.vote_list, next) {
> >+ if (acb->votes.compare(&version->value, value)) {
> >+ continue;
> >+ }
> >+ QLIST_FOREACH(item, &version->items, next) {
> >+ count++;
> >+ }
> >+ }
> >+
> >+ /* quorum_rewrite_aio_cb will count down this to zero */
> >+ acb->rewrite_count = count;
> >+
> >+ /* now fire the correcting rewrites */
> >+ QLIST_FOREACH(version, &acb->votes.vote_list, next) {
> >+ if (acb->votes.compare(&version->value, value)) {
> >+ continue;
> >+ }
> >+ QLIST_FOREACH(item, &version->items, next) {
> >+ bdrv_aio_writev(s->bs[item->index], acb->sector_num, acb->qiov,
> >+ acb->nb_sectors, quorum_rewrite_aio_cb, acb);
> >+ }
> >+ }
> >+
> >+ /* return true if any rewrite is done else false */
> >+ return count;
> >+}
> >+
> > static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
> > {
> > int i;
> >@@ -477,16 +542,17 @@ static int quorum_vote_error(QuorumAIOCB *acb)
> > return ret;
> > }
> >-static void quorum_vote(QuorumAIOCB *acb)
> >+static bool quorum_vote(QuorumAIOCB *acb)
> > {
> > bool quorum = true;
> >+ bool rewrite = false;
> > int i, j, ret;
> > QuorumVoteValue hash;
> > BDRVQuorumState *s = acb->common.bs->opaque;
> > QuorumVoteVersion *winner;
> > if (quorum_has_too_much_io_failed(acb)) {
> >- return;
> >+ return false;;
>
> Two semicolons here.
>
> > }
> > /* get the index of the first successful read */
> >@@ -514,7 +580,7 @@ static void quorum_vote(QuorumAIOCB *acb)
> > /* Every successful read agrees */
> > if (quorum) {
> > quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
> >- return;
> >+ return false;;
>
> And here.
>
> > }
> > /* compute hashes for each successful read, also store indexes */
> >@@ -547,9 +613,15 @@ static void quorum_vote(QuorumAIOCB *acb)
> > /* some versions are bad print them */
> > quorum_report_bad_versions(s, acb, &winner->value);
> >+ /* corruption correction is actived */
>
> I'd vote for "enabled" rather than "actived". But I like "corruption
> correction". :-)
>
> >+ if (s->rewrite_corrupted) {
> >+ rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value);
> >+ }
> >+
> > free_exit:
> > /* free lists */
> > quorum_free_vote_list(&acb->votes);
> >+ return rewrite;
> > }
> > static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
> >@@ -709,6 +781,11 @@ static QemuOptsList quorum_runtime_opts = {
> > .type = QEMU_OPT_BOOL,
> > .help = "Trigger block verify mode if set",
> > },
> >+ {
> >+ .name = QUORUM_OPT_REWRITE,
> >+ .type = QEMU_OPT_BOOL,
> >+ .help = "Rewrite corrupted block on read quorum",
> >+ },
> > { /* end of list */ }
> > },
> > };
> >@@ -770,6 +847,14 @@ static int quorum_open(BlockDriverState *bs, QDict
> >*options, int flags,
> > "and using two files with vote_threshold=2\n");
> > }
> >+ s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE,
> >false);
> >+ if (s->rewrite_corrupted && s->is_blkverify) {
> >+ error_setg(&local_err,
> >+ "rewrite-corrupted=on cannot be used with blkverify=on");
> >+ ret = -EINVAL;
> >+ goto exit;
> >+ }
> >+
> > /* allocate the children BlockDriverState array */
> > s->bs = g_new0(BlockDriverState *, s->num_children);
> > opened = g_new0(bool, s->num_children);
> >diff --git a/qapi-schema.json b/qapi-schema.json
> >index 6476d4a..f5a8767 100644
> >--- a/qapi-schema.json
> >+++ b/qapi-schema.json
> >@@ -4542,12 +4542,15 @@
> > #
> > # @vote-threshold: the vote limit under which a read will fail
> > #
> >+# @rewrite-corrupted: #optional rewrite corrupted data when quorum is
> >reached
> >+# (Since 2.1)
> >+#
> > # Since: 2.0
> > ##
> > { 'type': 'BlockdevOptionsQuorum',
> > 'data': { '*blkverify': 'bool',
> > 'children': [ 'BlockdevRef' ],
> >- 'vote-threshold': 'int' } }
> >+ 'vote-threshold': 'int', '*rewrite-corrupted': 'bool' } }
> > ##
> > # @BlockdevOptions
> >diff --git a/tests/qemu-iotests/081 b/tests/qemu-iotests/081
> >index b512d00..bb211d6 100755
> >--- a/tests/qemu-iotests/081
> >+++ b/tests/qemu-iotests/081
> >@@ -95,6 +95,18 @@ echo "== checking quorum correction =="
> > $QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
> > echo
> >+echo "== using quorum rewrite corrupted mode =="
> >+
> >+quorum="$quorum,file.rewrite-corrupted=on"
> >+
> >+$QEMU_IO -c "open -o $quorum" -c "read -P 0x32 0 $size" | _filter_qemu_io
> >+
> >+echo
> >+echo "== checking that quorum has corrected the corrupted file =="
> >+
> >+$QEMU_IO -c "read -P 0x32 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io
> >+
>
> I'd prefer it if you could put these new test cases below the
> "checking mixed reference/option specification" test, just because
> I'd like that test to output the quorum warning - especially there
> is no other point in the test where we can see that QMP message.
>
> But that's up to you and if you at least fix the double semicolons:
Ok I will do these changes.
Thanks for the review
Best regards
Benoît
>
> Reviewed-by: Max Reitz <address@hidden>
>
> >+echo
> > echo "== checking mixed reference/option specification =="
> > run_qemu -drive "file=$TEST_DIR/2.raw,format=$IMGFMT,if=none,id=drive2"
> > <<EOF
> >@@ -137,6 +149,8 @@ echo
> > echo "== breaking quorum =="
> > $QEMU_IO -c "write -P 0x41 0 $size" "$TEST_DIR/1.raw" | _filter_qemu_io
> >+$QEMU_IO -c "write -P 0x42 0 $size" "$TEST_DIR/2.raw" | _filter_qemu_io
> >+
> > echo
> > echo "== checking that quorum is broken =="
> >diff --git a/tests/qemu-iotests/081.out b/tests/qemu-iotests/081.out
> >index 84aeb0c..2d8b290 100644
> >--- a/tests/qemu-iotests/081.out
> >+++ b/tests/qemu-iotests/081.out
> >@@ -25,12 +25,19 @@ wrote 10485760/10485760 bytes at offset 0
> > read 10485760/10485760 bytes at offset 0
> > 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> >+== using quorum rewrite corrupted mode ==
> >+read 10485760/10485760 bytes at offset 0
> >+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> >+
> >+== checking that quorum has corrected the corrupted file ==
> >+read 10485760/10485760 bytes at offset 0
> >+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> >+
> > == checking mixed reference/option specification ==
> > Testing: -drive file=TEST_DIR/2.IMGFMT,format=IMGFMT,if=none,id=drive2
> > QMP_VERSION
> > {"return": {}}
> > {"return": {}}
> >-{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event":
> >"QUORUM_REPORT_BAD", "data": {"node-name": "", "sectors-count": 20480,
> >"sector-num": 0}}
> > read 10485760/10485760 bytes at offset 0
> > 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> > {"return": ""}
> >@@ -43,6 +50,8 @@ read 10485760/10485760 bytes at offset 0
> > == breaking quorum ==
> > wrote 10485760/10485760 bytes at offset 0
> > 10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> >+wrote 10485760/10485760 bytes at offset 0
> >+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
> > == checking that quorum is broken ==
> > qemu-io: can't open device (null): Could not read image for determining
> > its format: Input/output error
>
>