qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 08/12] qcow2: implement lazy refcounts


From: Kevin Wolf
Subject: [Qemu-devel] [PATCH 08/12] qcow2: implement lazy refcounts
Date: Mon, 6 Aug 2012 22:44:47 +0200

From: Stefan Hajnoczi <address@hidden>

Lazy refcounts is a performance optimization for qcow2 that postpones
refcount metadata updates and instead marks the image dirty.  In the
case of crash or power failure the image will be left in a dirty state
and repaired next time it is opened.

Reducing metadata I/O is important for cache=writethrough and
cache=directsync because these modes guarantee that data is on disk
after each write (hence we cannot take advantage of caching updates in
RAM).  Refcount metadata is not needed for guest->file block address
translation and therefore does not need to be on-disk at the time of
write completion - this is the motivation behind the lazy refcount
optimization.

The lazy refcount optimization must be enabled at image creation time:

  qemu-img create -f qcow2 -o compat=1.1,lazy_refcounts=on a.qcow2 10G
  qemu-system-x86_64 -drive if=virtio,file=a.qcow2,cache=writethrough

Update qemu-iotests 031 and 036 since the extension header size changes
when we add feature bit table entries.

Signed-off-by: Stefan Hajnoczi <address@hidden>
Signed-off-by: Kevin Wolf <address@hidden>
---
 block/qcow2-cluster.c      |    5 ++-
 block/qcow2.c              |   73 +++++++++++++++++++++++++++++++++++++++++--
 block/qcow2.h              |   13 ++++++++
 block_int.h                |   26 ++++++++-------
 tests/qemu-iotests/031.out |   12 +++---
 tests/qemu-iotests/036.out |    2 +-
 6 files changed, 107 insertions(+), 24 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index d7e0e19..e179211 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -662,7 +662,10 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, 
QCowL2Meta *m)
         qcow2_cache_depends_on_flush(s->l2_table_cache);
     }
 
-    qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
+    if (qcow2_need_accurate_refcounts(s)) {
+        qcow2_cache_set_dependency(bs, s->l2_table_cache,
+                                   s->refcount_block_cache);
+    }
     ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index);
     if (ret < 0) {
         goto err;
diff --git a/block/qcow2.c b/block/qcow2.c
index 7fe1567..fd5e214 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -215,6 +215,41 @@ static void report_unsupported_feature(BlockDriverState 
*bs,
 }
 
 /*
+ * Sets the dirty bit and flushes afterwards if necessary.
+ *
+ * The incompatible_features bit is only set if the image file header was
+ * updated successfully.  Therefore it is not required to check the return
+ * value of this function.
+ */
+static int qcow2_mark_dirty(BlockDriverState *bs)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t val;
+    int ret;
+
+    assert(s->qcow_version >= 3);
+
+    if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
+        return 0; /* already dirty */
+    }
+
+    val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
+    ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
+                      &val, sizeof(val));
+    if (ret < 0) {
+        return ret;
+    }
+    ret = bdrv_flush(bs->file);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* Only treat image as dirty if the header was updated successfully */
+    s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
+    return 0;
+}
+
+/*
  * Clears the dirty bit and flushes before if necessary.  Only call this
  * function when there are no pending requests, it does not guard against
  * concurrent requests dirtying the image.
@@ -752,6 +787,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState 
*bs,
             goto fail;
         }
 
+        if (l2meta.nb_clusters > 0 &&
+            (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)) {
+            qcow2_mark_dirty(bs);
+        }
+
         cluster_offset = l2meta.cluster_offset;
         assert((cluster_offset & 511) == 0);
 
@@ -994,6 +1034,11 @@ int qcow2_update_header(BlockDriverState *bs)
             .bit  = QCOW2_INCOMPAT_DIRTY_BITNR,
             .name = "dirty bit",
         },
+        {
+            .type = QCOW2_FEAT_TYPE_COMPATIBLE,
+            .bit  = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
+            .name = "lazy refcounts",
+        },
     };
 
     ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
@@ -1176,6 +1221,11 @@ static int qcow2_create2(const char *filename, int64_t 
total_size,
         header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
     }
 
+    if (flags & BLOCK_FLAG_LAZY_REFCOUNTS) {
+        header.compatible_features |=
+            cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
+    }
+
     ret = bdrv_pwrite(bs, 0, &header, sizeof(header));
     if (ret < 0) {
         goto out;
@@ -1289,6 +1339,8 @@ static int qcow2_create(const char *filename, 
QEMUOptionParameter *options)
                     options->value.s);
                 return -EINVAL;
             }
+        } else if (!strcmp(options->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
+            flags |= options->value.n ? BLOCK_FLAG_LAZY_REFCOUNTS : 0;
         }
         options++;
     }
@@ -1299,6 +1351,12 @@ static int qcow2_create(const char *filename, 
QEMUOptionParameter *options)
         return -EINVAL;
     }
 
+    if (version < 3 && (flags & BLOCK_FLAG_LAZY_REFCOUNTS)) {
+        fprintf(stderr, "Lazy refcounts only supported with compatibility "
+                "level 1.1 and above (use compat=1.1 or greater)\n");
+        return -EINVAL;
+    }
+
     return qcow2_create2(filename, sectors, backing_file, backing_fmt, flags,
                          cluster_size, prealloc, options, version);
 }
@@ -1485,10 +1543,12 @@ static coroutine_fn int 
qcow2_co_flush_to_os(BlockDriverState *bs)
         return ret;
     }
 
-    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
-    if (ret < 0) {
-        qemu_co_mutex_unlock(&s->lock);
-        return ret;
+    if (qcow2_need_accurate_refcounts(s)) {
+        ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+        if (ret < 0) {
+            qemu_co_mutex_unlock(&s->lock);
+            return ret;
+        }
     }
     qemu_co_mutex_unlock(&s->lock);
 
@@ -1603,6 +1663,11 @@ static QEMUOptionParameter qcow2_create_options[] = {
         .type = OPT_STRING,
         .help = "Preallocation mode (allowed values: off, metadata)"
     },
+    {
+        .name = BLOCK_OPT_LAZY_REFCOUNTS,
+        .type = OPT_FLAG,
+        .help = "Postpone refcount updates",
+    },
     { NULL }
 };
 
diff --git a/block/qcow2.h b/block/qcow2.h
index b5fefc0..b4eb654 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -118,6 +118,14 @@ enum {
     QCOW2_INCOMPAT_MASK          = QCOW2_INCOMPAT_DIRTY,
 };
 
+/* Compatible feature bits */
+enum {
+    QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR = 0,
+    QCOW2_COMPAT_LAZY_REFCOUNTS       = 1 << QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
+
+    QCOW2_COMPAT_FEAT_MASK            = QCOW2_COMPAT_LAZY_REFCOUNTS,
+};
+
 typedef struct Qcow2Feature {
     uint8_t type;
     uint8_t bit;
@@ -245,6 +253,11 @@ static inline int qcow2_get_cluster_type(uint64_t l2_entry)
     }
 }
 
+/* Check whether refcounts are eager or lazy */
+static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s)
+{
+    return !(s->incompatible_features & QCOW2_INCOMPAT_DIRTY);
+}
 
 // FIXME Need qcow2_ prefix to global functions
 
diff --git a/block_int.h b/block_int.h
index d72317f..6c1d9ca 100644
--- a/block_int.h
+++ b/block_int.h
@@ -31,8 +31,9 @@
 #include "qemu-timer.h"
 #include "qapi-types.h"
 
-#define BLOCK_FLAG_ENCRYPT     1
-#define BLOCK_FLAG_COMPAT6     4
+#define BLOCK_FLAG_ENCRYPT          1
+#define BLOCK_FLAG_COMPAT6          4
+#define BLOCK_FLAG_LAZY_REFCOUNTS   8
 
 #define BLOCK_IO_LIMIT_READ     0
 #define BLOCK_IO_LIMIT_WRITE    1
@@ -41,16 +42,17 @@
 #define BLOCK_IO_SLICE_TIME     100000000
 #define NANOSECONDS_PER_SECOND  1000000000.0
 
-#define BLOCK_OPT_SIZE          "size"
-#define BLOCK_OPT_ENCRYPT       "encryption"
-#define BLOCK_OPT_COMPAT6       "compat6"
-#define BLOCK_OPT_BACKING_FILE  "backing_file"
-#define BLOCK_OPT_BACKING_FMT   "backing_fmt"
-#define BLOCK_OPT_CLUSTER_SIZE  "cluster_size"
-#define BLOCK_OPT_TABLE_SIZE    "table_size"
-#define BLOCK_OPT_PREALLOC      "preallocation"
-#define BLOCK_OPT_SUBFMT        "subformat"
-#define BLOCK_OPT_COMPAT_LEVEL  "compat"
+#define BLOCK_OPT_SIZE              "size"
+#define BLOCK_OPT_ENCRYPT           "encryption"
+#define BLOCK_OPT_COMPAT6           "compat6"
+#define BLOCK_OPT_BACKING_FILE      "backing_file"
+#define BLOCK_OPT_BACKING_FMT       "backing_fmt"
+#define BLOCK_OPT_CLUSTER_SIZE      "cluster_size"
+#define BLOCK_OPT_TABLE_SIZE        "table_size"
+#define BLOCK_OPT_PREALLOC          "preallocation"
+#define BLOCK_OPT_SUBFMT            "subformat"
+#define BLOCK_OPT_COMPAT_LEVEL      "compat"
+#define BLOCK_OPT_LAZY_REFCOUNTS    "lazy_refcounts"
 
 typedef struct BdrvTrackedRequest BdrvTrackedRequest;
 
diff --git a/tests/qemu-iotests/031.out b/tests/qemu-iotests/031.out
index 297b458..796c993 100644
--- a/tests/qemu-iotests/031.out
+++ b/tests/qemu-iotests/031.out
@@ -54,7 +54,7 @@ header_length             72
 
 Header extension:
 magic                     0x6803f857
-length                    48
+length                    96
 data                      <binary>
 
 Header extension:
@@ -68,7 +68,7 @@ No errors were found on the image.
 
 magic                     0x514649fb
 version                   2
-backing_file_offset       0xc8
+backing_file_offset       0xf8
 backing_file_size         0x17
 cluster_bits              16
 size                      67108864
@@ -92,7 +92,7 @@ data                      'host_device'
 
 Header extension:
 magic                     0x6803f857
-length                    48
+length                    96
 data                      <binary>
 
 Header extension:
@@ -155,7 +155,7 @@ header_length             104
 
 Header extension:
 magic                     0x6803f857
-length                    48
+length                    96
 data                      <binary>
 
 Header extension:
@@ -169,7 +169,7 @@ No errors were found on the image.
 
 magic                     0x514649fb
 version                   3
-backing_file_offset       0xe8
+backing_file_offset       0x118
 backing_file_size         0x17
 cluster_bits              16
 size                      67108864
@@ -193,7 +193,7 @@ data                      'host_device'
 
 Header extension:
 magic                     0x6803f857
-length                    48
+length                    96
 data                      <binary>
 
 Header extension:
diff --git a/tests/qemu-iotests/036.out b/tests/qemu-iotests/036.out
index ca0fda1..063ca22 100644
--- a/tests/qemu-iotests/036.out
+++ b/tests/qemu-iotests/036.out
@@ -46,7 +46,7 @@ header_length             104
 
 Header extension:
 magic                     0x6803f857
-length                    48
+length                    96
 data                      <binary>
 
 *** done
-- 
1.7.6.5




reply via email to

[Prev in Thread] Current Thread [Next in Thread]