qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 8/9] mirror: use synch scheme for drive mirror


From: Denis V. Lunev
Subject: [Qemu-devel] [PATCH 8/9] mirror: use synch scheme for drive mirror
Date: Tue, 14 Jun 2016 18:25:15 +0300

Block commit of the active image to the backing store on a slow disk
could never end. For example with the guest with the following loop
inside
    while true; do
        dd bs=1k count=1 if=/dev/zero of=x
    done
running above slow storage could not complete the operation with a
resonable amount of time:
    virsh blockcommit rhel7 sda --active --shallow
    virsh qemu-monitor-event
    virsh qemu-monitor-command rhel7 \
        '{"execute":"block-job-complete",\
          "arguments":{"device":"drive-scsi0-0-0-0"} }'
    virsh qemu-monitor-event
Completion event is never received.

This problem could not be fixed easily with the current architecture. We
should either prohibit guest writes (making dirty bitmap dirty) or switch
to the sycnchronous scheme.

This patch implements the latter. It adds mirror_before_write_notify
callback. In this case all data written from the guest is synchnonously
written to the mirror target. Though the problem is solved partially.
We should switch from bdrv_dirty_bitmap to simple hbitmap. This will be
done in the next patch.

Signed-off-by: Denis V. Lunev <address@hidden>
Reviewed-by: Vladimir Sementsov-Ogievskiy<address@hidden>
CC: Stefan Hajnoczi <address@hidden>
CC: Fam Zheng <address@hidden>
CC: Kevin Wolf <address@hidden>
CC: Max Reitz <address@hidden>
CC: Jeff Cody <address@hidden>
CC: Eric Blake <address@hidden>
---
 block/mirror.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

diff --git a/block/mirror.c b/block/mirror.c
index 7471211..086256c 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -58,6 +58,9 @@ typedef struct MirrorBlockJob {
     QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
     int buf_free_count;
 
+    NotifierWithReturn before_write;
+    CoQueue dependent_writes;
+
     unsigned long *in_flight_bitmap;
     int in_flight;
     int sectors_in_flight;
@@ -125,6 +128,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
     g_free(op->buf);
     g_free(op);
 
+    qemu_co_queue_restart_all(&s->dependent_writes);
     if (s->waiting_for_io) {
         qemu_coroutine_enter(s->common.co, NULL);
     }
@@ -511,6 +515,74 @@ static void mirror_exit(BlockJob *job, void *opaque)
     bdrv_unref(src);
 }
 
+static int coroutine_fn mirror_before_write_notify(
+        NotifierWithReturn *notifier, void *opaque)
+{
+    MirrorBlockJob *s = container_of(notifier, MirrorBlockJob, before_write);
+    BdrvTrackedRequest *req = opaque;
+    MirrorOp *op;
+    int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+    int64_t sector_num = req->offset >> BDRV_SECTOR_BITS;
+    int nb_sectors = req->bytes >> BDRV_SECTOR_BITS;
+    int64_t end_sector = sector_num + nb_sectors;
+    int64_t aligned_start, aligned_end;
+
+    if (req->type != BDRV_TRACKED_DISCARD && req->type != BDRV_TRACKED_WRITE) {
+        /* this is not discard and write, we do not care */
+        return 0;
+    }
+
+    while (1) {
+        bool waited = false;
+        int64_t sn;
+
+        for (sn = sector_num; sn < end_sector; sn += sectors_per_chunk) {
+            int64_t chunk = sn / sectors_per_chunk;
+            if (test_bit(chunk, s->in_flight_bitmap)) {
+                trace_mirror_yield_in_flight(s, chunk, s->in_flight);
+                qemu_co_queue_wait(&s->dependent_writes);
+                waited = true;
+            }
+        }
+
+        if (!waited) {
+            break;
+        }
+    }
+
+    aligned_start = QEMU_ALIGN_UP(sector_num, sectors_per_chunk);
+    aligned_end = QEMU_ALIGN_DOWN(sector_num + nb_sectors, sectors_per_chunk);
+    if (aligned_end > aligned_start) {
+        bdrv_reset_dirty_bitmap(s->dirty_bitmap, aligned_start,
+                                aligned_end - aligned_start);
+    }
+
+    if (req->type == BDRV_TRACKED_DISCARD) {
+        mirror_do_zero_or_discard(s, sector_num, nb_sectors, true);
+        return 0;
+    }
+
+    s->in_flight++;
+    s->sectors_in_flight += nb_sectors;
+
+    /* Allocate a MirrorOp that is used as an AIO callback.  */
+    op = g_new(MirrorOp, 1);
+    op->s = s;
+    op->sector_num = sector_num;
+    op->nb_sectors = nb_sectors;
+    op->buf = qemu_try_blockalign(blk_bs(s->target), req->qiov->size);
+    if (op->buf == NULL) {
+        g_free(op);
+        return -ENOMEM;
+    }
+    qemu_iovec_init(&op->qiov, req->qiov->niov);
+    qemu_iovec_clone(&op->qiov, req->qiov, op->buf);
+
+    blk_aio_pwritev(s->target, req->offset, &op->qiov, 0,
+                    mirror_write_complete, op);
+    return 0;
+}
+
 static int mirror_dirty_init(MirrorBlockJob *s)
 {
     int64_t sector_num, end;
@@ -764,6 +836,8 @@ immediate_exit:
         mirror_drain(s);
     }
 
+    notifier_with_return_remove(&s->before_write);
+
     assert(s->in_flight == 0);
     qemu_vfree(s->buf);
     g_free(s->cow_bitmap);
@@ -905,6 +979,10 @@ static void mirror_start_job(BlockDriverState *bs, 
BlockDriverState *target,
         return;
     }
 
+    qemu_co_queue_init(&s->dependent_writes);
+    s->before_write.notify = mirror_before_write_notify;
+    bdrv_add_before_write_notifier(bs, &s->before_write);
+
     bdrv_op_block_all(target, s->common.blocker);
 
     s->common.co = qemu_coroutine_create(mirror_run);
-- 
2.5.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]