qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH v2 4/5] qcow2: More complete consistency check


From: Max Reitz
Subject: [Qemu-devel] [PATCH v2 4/5] qcow2: More complete consistency check
Date: Wed, 28 Aug 2013 16:55:05 +0200

The qcow2_check_refcounts function has been extended to be able to fix
OFLAG_COPIED errors and multiple references on refcount blocks.

If no corruptions remain after an image repair (and no errors have been
encountered), clear the corrupt flag in qcow2_check.

Signed-off-by: Max Reitz <address@hidden>
---
 block/qcow2-cluster.c  |   4 +-
 block/qcow2-refcount.c | 249 ++++++++++++++++++++++++++++++++++++++++++-------
 block/qcow2.c          |   6 +-
 block/qcow2.h          |   1 +
 include/block/block.h  |   1 +
 5 files changed, 222 insertions(+), 39 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 7c248aa..2d5aa92 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -145,7 +145,7 @@ static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
  * and we really don't want bdrv_pread to perform a read-modify-write)
  */
 #define L1_ENTRIES_PER_SECTOR (512 / 8)
-static int write_l1_entry(BlockDriverState *bs, int l1_index)
+int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
 {
     BDRVQcowState *s = bs->opaque;
     uint64_t buf[L1_ENTRIES_PER_SECTOR];
@@ -254,7 +254,7 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, 
uint64_t **table)
     /* update the L1 entry */
     trace_qcow2_l2_allocate_write_l1(bs, l1_index);
     s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
-    ret = write_l1_entry(bs, l1_index);
+    ret = qcow2_write_l1_entry(bs, l1_index);
     if (ret < 0) {
         goto fail;
     }
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index d06a9df..333d10b 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1034,7 +1034,7 @@ static int check_refcounts_l2(BlockDriverState *bs, 
BdrvCheckResult *res,
     BDRVQcowState *s = bs->opaque;
     uint64_t *l2_table, l2_entry;
     uint64_t next_contiguous_offset = 0;
-    int i, l2_size, nb_csectors, refcount;
+    int i, l2_size, nb_csectors;
 
     /* Read L2 table from disk */
     l2_size = s->l2_size * sizeof(uint64_t);
@@ -1086,23 +1086,8 @@ static int check_refcounts_l2(BlockDriverState *bs, 
BdrvCheckResult *res,
 
         case QCOW2_CLUSTER_NORMAL:
         {
-            /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
             uint64_t offset = l2_entry & L2E_OFFSET_MASK;
 
-            if (flags & CHECK_OFLAG_COPIED) {
-                refcount = get_refcount(bs, offset >> s->cluster_bits);
-                if (refcount < 0) {
-                    fprintf(stderr, "Can't get refcount for offset %"
-                        PRIx64 ": %s\n", l2_entry, strerror(-refcount));
-                    goto fail;
-                }
-                if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
-                    fprintf(stderr, "ERROR OFLAG_COPIED: offset=%"
-                        PRIx64 " refcount=%d\n", l2_entry, refcount);
-                    res->corruptions++;
-                }
-            }
-
             if (flags & CHECK_FRAG_INFO) {
                 res->bfi.allocated_clusters++;
                 if (next_contiguous_offset &&
@@ -1159,7 +1144,7 @@ static int check_refcounts_l1(BlockDriverState *bs,
 {
     BDRVQcowState *s = bs->opaque;
     uint64_t *l1_table, l2_offset, l1_size2;
-    int i, refcount, ret;
+    int i, ret;
 
     l1_size2 = l1_size * sizeof(uint64_t);
 
@@ -1183,22 +1168,6 @@ static int check_refcounts_l1(BlockDriverState *bs,
     for(i = 0; i < l1_size; i++) {
         l2_offset = l1_table[i];
         if (l2_offset) {
-            /* QCOW_OFLAG_COPIED must be set iff refcount == 1 */
-            if (flags & CHECK_OFLAG_COPIED) {
-                refcount = get_refcount(bs, (l2_offset & ~QCOW_OFLAG_COPIED)
-                    >> s->cluster_bits);
-                if (refcount < 0) {
-                    fprintf(stderr, "Can't get refcount for l2_offset %"
-                        PRIx64 ": %s\n", l2_offset, strerror(-refcount));
-                    goto fail;
-                }
-                if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED) != 0)) 
{
-                    fprintf(stderr, "ERROR OFLAG_COPIED: l2_offset=%" PRIx64
-                        " refcount=%d\n", l2_offset, refcount);
-                    res->corruptions++;
-                }
-            }
-
             /* Mark L2 table as used */
             l2_offset &= L1E_OFFSET_MASK;
             inc_refcounts(bs, res, refcount_table, refcount_table_size,
@@ -1230,6 +1199,40 @@ fail:
 }
 
 /*
+ * Writes one sector of the refcount table to the disk
+ */
+#define RT_ENTRIES_PER_SECTOR (512 / sizeof(uint64_t))
+static int write_reftable_entry(BlockDriverState *bs, int rt_index)
+{
+    BDRVQcowState *s = bs->opaque;
+    uint64_t buf[RT_ENTRIES_PER_SECTOR];
+    int rt_start_index;
+    int i, ret;
+
+    rt_start_index = rt_index & ~(RT_ENTRIES_PER_SECTOR - 1);
+    for (i = 0; i < RT_ENTRIES_PER_SECTOR; i++) {
+        buf[i] = cpu_to_be64(s->refcount_table[rt_start_index + i]);
+    }
+
+    ret = qcow2_pre_write_overlap_check(bs,
+            QCOW2_OL_DEFAULT & ~QCOW2_OL_REFCOUNT_TABLE,
+            s->refcount_table_offset + rt_start_index * sizeof(uint64_t),
+            sizeof(buf));
+    if (ret < 0) {
+        return ret;
+    }
+
+    BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
+    ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset +
+            rt_start_index * sizeof(uint64_t), buf, sizeof(buf));
+    if (ret < 0) {
+        return ret;
+    }
+
+    return 0;
+}
+
+/*
  * Checks an image for refcount consistency.
  *
  * Returns 0 if no errors are found, the number of errors in case the image is
@@ -1240,7 +1243,8 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
 {
     BDRVQcowState *s = bs->opaque;
     int64_t size, i, highest_cluster;
-    int nb_clusters, refcount1, refcount2;
+    uint64_t *l2_table = NULL;
+    int nb_clusters, refcount1, refcount2, j;
     QCowSnapshot *sn;
     uint16_t *refcount_table;
     int ret;
@@ -1305,10 +1309,85 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
             inc_refcounts(bs, res, refcount_table, nb_clusters,
                 offset, s->cluster_size);
             if (refcount_table[cluster] != 1) {
-                fprintf(stderr, "ERROR refcount block %" PRId64
+                fprintf(stderr, "%s refcount block %" PRId64
                     " refcount=%d\n",
+                    fix & BDRV_FIX_ERRORS ? "Repairing" :
+                                            "ERROR",
                     i, refcount_table[cluster]);
-                res->corruptions++;
+                if (fix & BDRV_FIX_ERRORS) {
+                    int64_t new_offset;
+                    void *refcount_block;
+
+                    /* allocate new refcount block */
+                    new_offset = qcow2_alloc_clusters(bs, s->cluster_size);
+                    if (new_offset < 0) {
+                        fprintf(stderr, "Could not allocate new cluster\n");
+                        res->corruptions++;
+                        continue;
+                    }
+                    /* fetch current content */
+                    ret = qcow2_cache_get(bs, s->refcount_block_cache, offset,
+                            &refcount_block);
+                    if (ret < 0) {
+                        fprintf(stderr, "Could not fetch refcount block\n");
+                        qcow2_free_clusters(bs, new_offset, s->cluster_size,
+                                QCOW2_DISCARD_ALWAYS);
+                        res->corruptions++;
+                        continue;
+                    }
+                    /* new block has not yet been entered into refcount table,
+                     * therefore it is no refcount block yet (regarding this
+                     * check) */
+                    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
+                            new_offset, s->cluster_sectors * BDRV_SECTOR_SIZE);
+                    if (ret < 0) {
+                        fprintf(stderr, "Could not write refcount block (would 
"
+                                "overlap with existing metadata)\n");
+                        /* the image will be marked corrupt here, so don't even
+                         * attempt on freeing the cluster */
+                        res->corruptions++;
+                        goto fail;
+                    }
+                    /* write to new block */
+                    ret = bdrv_write(bs->file, new_offset >> BDRV_SECTOR_BITS,
+                            refcount_block, s->cluster_sectors);
+                    if (ret < 0) {
+                        fprintf(stderr, "Could not write refcount block\n");
+                        qcow2_free_clusters(bs, new_offset, s->cluster_size,
+                                QCOW2_DISCARD_ALWAYS);
+                        res->corruptions++;
+                        continue;
+                    }
+                    /* update refcount table */
+                    assert(!(new_offset & (s->cluster_size - 1)));
+                    s->refcount_table[i] = new_offset;
+                    ret = write_reftable_entry(bs, i);
+                    if (ret < 0) {
+                        fprintf(stderr, "Could not update refcount table\n");
+                        s->refcount_table[i] = offset;
+                        qcow2_free_clusters(bs, new_offset, s->cluster_size,
+                                QCOW2_DISCARD_ALWAYS);
+                        res->corruptions++;
+                        continue;
+                    }
+                    qcow2_cache_put(bs, s->refcount_block_cache,
+                            &refcount_block);
+                    /* update refcounts */
+                    if ((new_offset >> s->cluster_bits) >= nb_clusters) {
+                        /* increase refcount_table size if necessary */
+                        int old_nb_clusters = nb_clusters;
+                        nb_clusters = (new_offset >> s->cluster_bits) + 1;
+                        refcount_table = g_realloc(refcount_table,
+                                nb_clusters * sizeof(uint16_t));
+                        memset(&refcount_table[old_nb_clusters], 0, 
(nb_clusters
+                                - old_nb_clusters) * sizeof(uint16_t));
+                    }
+                    refcount_table[cluster]--;
+                    inc_refcounts(bs, res, refcount_table, nb_clusters,
+                            new_offset, s->cluster_size);
+                } else {
+                    res->corruptions++;
+                }
             }
         }
     }
@@ -1364,10 +1443,108 @@ int qcow2_check_refcounts(BlockDriverState *bs, 
BdrvCheckResult *res,
         }
     }
 
+    l2_table = g_malloc(s->l2_size * sizeof(uint64_t));
+
+    /* check OFLAG_COPIED */
+    for (i = 0; i < s->l1_size; i++) {
+        uint64_t l1_entry = s->l1_table[i];
+        uint64_t l2_offset = l1_entry & L1E_OFFSET_MASK;
+        bool l2_dirty = false;
+        int refcount;
+
+        if (!l2_offset) {
+            continue;
+        }
+
+        refcount = get_refcount(bs, l2_offset >> s->cluster_bits);
+        if (refcount < 0) {
+            /* don't print message nor increment check_errors, since the above
+             * loop will have done this already */
+            continue;
+        }
+        if ((refcount == 1) != ((l1_entry & QCOW_OFLAG_COPIED) != 0)) {
+            fprintf(stderr, "%s OFLAG_COPIED L2 cluster: l1_entry=%" PRIx64
+                    " refcount=%d\n",
+                    fix & BDRV_FIX_ERRORS ? "Repairing" :
+                                            "ERROR",
+                    l1_entry, refcount);
+            if (fix & BDRV_FIX_ERRORS) {
+                s->l1_table[i] = refcount == 1
+                               ? l1_entry |  QCOW_OFLAG_COPIED
+                               : l1_entry & ~QCOW_OFLAG_COPIED;
+                ret = qcow2_write_l1_entry(bs, i);
+                if (ret < 0) {
+                    res->check_errors++;
+                    goto fail;
+                }
+            } else {
+                res->corruptions++;
+            }
+        }
+
+        ret = bdrv_pread(bs->file, l2_offset, l2_table,
+                s->l2_size * sizeof(uint64_t));
+        if (ret != s->l2_size * sizeof(uint64_t)) {
+            fprintf(stderr, "ERROR: Could not read L2 table\n");
+            res->check_errors++;
+            if (ret >= 0) {
+                ret = -EIO;
+            }
+            goto fail;
+        }
+
+        for (j = 0; j < s->l2_size; j++) {
+            uint64_t l2_entry = be64_to_cpu(l2_table[j]);
+            uint64_t data_offset;
+
+            if (qcow2_get_cluster_type(l2_entry) != QCOW2_CLUSTER_NORMAL) {
+                continue;
+            }
+
+            data_offset = l2_entry & L2E_OFFSET_MASK;
+
+            refcount = get_refcount(bs, data_offset >> s->cluster_bits);
+            if (refcount < 0) {
+                /* don't print message nor increment check_errors */
+                continue;
+            }
+            if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
+                fprintf(stderr, "%s OFLAG_COPIED data cluster: l2_entry=%"
+                        PRIx64 " refcount=%d\n",
+                        fix & BDRV_FIX_ERRORS ? "Repairing" :
+                                                "ERROR",
+                        l2_entry, refcount);
+                if (fix & BDRV_FIX_ERRORS) {
+                    l2_table[j] = cpu_to_be64(refcount == 1
+                                ? l2_entry |  QCOW_OFLAG_COPIED
+                                : l2_entry & ~QCOW_OFLAG_COPIED);
+                    l2_dirty = true;
+                } else {
+                    res->corruptions++;
+                }
+            }
+        }
+
+        if (l2_dirty) {
+            ret = bdrv_pwrite(bs->file, l2_offset, l2_table,
+                    s->l2_size * sizeof(uint64_t));
+            if (ret != s->l2_size * sizeof(uint64_t)) {
+                fprintf(stderr, "ERROR: Could not write L2 table\n");
+                res->check_errors++;
+                if (ret >= 0) {
+                    ret = -EIO;
+                }
+                goto fail;
+            }
+        }
+    }
+
+
     res->image_end_offset = (highest_cluster + 1) * s->cluster_size;
     ret = 0;
 
 fail:
+    g_free(l2_table);
     g_free(refcount_table);
 
     return ret;
diff --git a/block/qcow2.c b/block/qcow2.c
index e860834..7ed4c5d 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -312,7 +312,11 @@ static int qcow2_check(BlockDriverState *bs, 
BdrvCheckResult *result,
     }
 
     if (fix && result->check_errors == 0 && result->corruptions == 0) {
-        return qcow2_mark_clean(bs);
+        ret = qcow2_mark_clean(bs);
+        if (ret < 0) {
+            return ret;
+        }
+        return qcow2_mark_consistent(bs);
     }
     return ret;
 }
diff --git a/block/qcow2.h b/block/qcow2.h
index fa2425b..1dd9f2d 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -421,6 +421,7 @@ int qcow2_pre_write_overlap_check(BlockDriverState *bs, int 
chk, int64_t offset,
 /* qcow2-cluster.c functions */
 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
                         bool exact_size);
+int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
 void qcow2_l2_cache_reset(BlockDriverState *bs);
 int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
 void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
diff --git a/include/block/block.h b/include/block/block.h
index 742fce5..e6b391c 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -413,6 +413,7 @@ typedef enum {
 
     BLKDBG_REFTABLE_LOAD,
     BLKDBG_REFTABLE_GROW,
+    BLKDBG_REFTABLE_UPDATE,
 
     BLKDBG_REFBLOCK_LOAD,
     BLKDBG_REFBLOCK_UPDATE,
-- 
1.8.3.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]