qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [RFC PATCH 16/16] [BROKEN] qcow2: Overwrite COW and allocat


From: Kevin Wolf
Subject: [Qemu-devel] [RFC PATCH 16/16] [BROKEN] qcow2: Overwrite COW and allocate new cluster at the same time
Date: Tue, 18 Sep 2012 13:40:42 +0200

Cancelling COW when it's overwritten by a subsequent write request of
the guest was a good start, however in practice we don't gain
performance yet. The second write request is split in two, the first one
containing the overwritten COW area, and the second one allocating
another cluster.

We can do both at the same time and then we actually do gain
performance (iozone initial writer test up from 22 to 35 MB/s).

Signed-off-by: Kevin Wolf <address@hidden>
---

This patch is not correct at all and potentially corrupts images, it's
ugly too, but it works good enough to try out what gains to expect, so
I decided to include it here anyway.
---
 block/qcow2-cluster.c |   17 ++++++++++++-----
 block/qcow2.c         |   29 ++++++++++++++++++-----------
 block/qcow2.h         |    1 +
 3 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index ff22992..39ef7b0 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -888,7 +888,6 @@ static int handle_dependencies(BlockDriverState *bs, 
uint64_t guest_offset,
                 uint64_t subcluster_offset;
                 int nb_sectors;
 
-                *nb_clusters = 1;
                 subcluster_offset = offset_into_cluster(s, guest_offset);
                 nb_sectors = (subcluster_offset + bytes) >> BDRV_SECTOR_BITS;
 
@@ -1032,7 +1031,7 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, 
uint64_t offset,
     BDRVQcowState *s = bs->opaque;
     int l2_index, ret, sectors;
     uint64_t *l2_table;
-    unsigned int nb_clusters, keep_clusters;
+    unsigned int nb_clusters, keep_clusters = 0;
     uint64_t cluster_offset = 0;
 
     trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset,
@@ -1079,17 +1078,21 @@ again:
     } else if (ret < 0) {
         return ret;
     } else if (*m) {
+        /* FIXME There could be more dependencies */
         keep_clusters = 1;
-        nb_clusters = 0;
-        goto done;
+        nb_clusters -= keep_clusters;
     }
 
+
     /* Find L2 entry for the first involved cluster */
     ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
     if (ret < 0) {
         return ret;
     }
 
+    if (cluster_offset != 0) {
+        goto do_alloc;
+    }
     cluster_offset = be64_to_cpu(l2_table[l2_index]);
 
     /*
@@ -1111,6 +1114,7 @@ again:
         cluster_offset = 0;
     }
 
+do_alloc:
     if (nb_clusters > 0) {
         /* For the moment, overwrite compressed clusters one by one */
         uint64_t entry = be64_to_cpu(l2_table[l2_index + keep_clusters]);
@@ -1177,6 +1181,7 @@ again:
                                 << (s->cluster_bits - BDRV_SECTOR_BITS);
             int alloc_n_start = keep_clusters == 0 ? n_start : 0;
             int nb_sectors = MIN(requested_sectors, avail_sectors);
+            QCowL2Meta *old_m = *m;
 
             if (keep_clusters == 0) {
                 cluster_offset = alloc_cluster_offset;
@@ -1185,6 +1190,8 @@ again:
             *m = g_malloc0(sizeof(**m));
 
             **m = (QCowL2Meta) {
+                .next           = old_m,
+
                 .alloc_offset   = alloc_cluster_offset,
                 .offset         = alloc_offset & ~(s->cluster_size - 1),
                 .nb_clusters    = nb_clusters,
@@ -1198,6 +1205,7 @@ again:
                     .offset     = nb_sectors * BDRV_SECTOR_SIZE,
                     .nb_sectors = avail_sectors - nb_sectors,
                 },
+
             };
             qemu_co_queue_init(&(*m)->dependent_requests);
             qemu_co_rwlock_init(&(*m)->l2_writeback_lock);
@@ -1206,7 +1214,6 @@ again:
     }
 
     /* Some cleanup work */
-done:
     sectors = (keep_clusters + nb_clusters) << (s->cluster_bits - 9);
     if (sectors > n_end) {
         sectors = n_end;
diff --git a/block/qcow2.c b/block/qcow2.c
index abc3de3..e6fa616 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -959,19 +959,21 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState 
*bs,
         }
 
         if (l2meta != NULL) {
-            ProcessL2Meta p = {
-                .bs = bs,
-                .m  = l2meta,
-            };
-
             qemu_co_mutex_unlock(&s->lock);
-            qemu_co_rwlock_rdlock(&s->l2meta_flush);
 
-            l2meta->is_written = true;
-            l2meta->co = qemu_coroutine_create(process_l2meta);
-            qemu_coroutine_enter(l2meta->co, &p);
+            while (l2meta != NULL) {
+                ProcessL2Meta p = {
+                    .bs = bs,
+                    .m  = l2meta,
+                };
+
+                qemu_co_rwlock_rdlock(&s->l2meta_flush);
+                l2meta->is_written = true;
+                l2meta->co = qemu_coroutine_create(process_l2meta);
+                qemu_coroutine_enter(l2meta->co, &p);
+                l2meta = l2meta->next;
+            }
 
-            l2meta = NULL;
             qemu_co_mutex_lock(&s->lock);
         }
 
@@ -985,12 +987,17 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState 
*bs,
 fail:
     qemu_co_mutex_unlock(&s->lock);
 
-    if (l2meta != NULL) {
+    while (l2meta != NULL) {
+        QCowL2Meta *next;
+
         if (l2meta->nb_clusters != 0) {
             QLIST_REMOVE(l2meta, next_in_flight);
         }
         run_dependent_requests(s, l2meta);
+
+        next = l2meta->next;
         g_free(l2meta);
+        l2meta = next;
     }
 
     qemu_iovec_destroy(&hd_qiov);
diff --git a/block/qcow2.h b/block/qcow2.h
index 7ed082b..9ebb5f9 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -311,6 +311,7 @@ typedef struct QCowL2Meta
      *   data even though the L2 table is not updated yet.
      */
     struct QCowL2Meta *parent;
+    struct QCowL2Meta *next;
 
     QLIST_ENTRY(QCowL2Meta) next_in_flight;
 } QCowL2Meta;
-- 
1.7.6.5




reply via email to

[Prev in Thread] Current Thread [Next in Thread]