qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel][PATCH,RFC] Zero cluster dedup - qemu-img extentions


From: Shahar Frank
Subject: Re: [Qemu-devel][PATCH,RFC] Zero cluster dedup - qemu-img extentions
Date: Wed, 3 Sep 2008 06:13:46 -0700

The qemu-img extensions were impleted to help me debug the zero cluster
optimization. However, they seem to be pretty generic and useful also
for many other things.

The new verbs are:
        check - perform an internal validity check (implemented only for
qcow2)
        map - map logical to physical clusters (implemented only for
qcow2)

Exiting verbs extensions:
        info -r  do a recursive info

Signed-off-by: Shahar Frank <address@hidden>

Shahar

diff --git a/block-qcow2.c b/block-qcow2.c
index ca3faf1..f9a786b 100644
--- a/block-qcow2.c
+++ b/block-qcow2.c
@@ -2522,18 +2522,17 @@ static void update_refcount(BlockDriverState
*bs,
     }
 }
 
-#ifdef DEBUG_ALLOC
-static void inc_refcounts(BlockDriverState *bs,
+static int inc_refcounts(BlockDriverState *bs,
                           uint16_t *refcount_table,
                           int refcount_table_size,
                           int64_t offset, int64_t size)
 {
     BDRVQcowState *s = bs->opaque;
     int64_t start, last, cluster_offset;
-    int k;
+    int k, err = 0;
 
     if (size <= 0)
-        return;
+        return 1;
 
     start = offset & ~(s->cluster_size - 1);
     last = (offset + size - 1) & ~(s->cluster_size - 1);
@@ -2541,13 +2540,16 @@ static void inc_refcounts(BlockDriverState *bs,
         cluster_offset += s->cluster_size) {
         k = cluster_offset >> s->cluster_bits;
         if (k < 0 || k >= refcount_table_size) {
-            printf("ERROR: invalid cluster offset=0x%llx\n",
cluster_offset);
+            printf("ERROR: invalid cluster offset=0x%" PRIx64 "\n",
cluster_offset);
+            err++;
         } else {
             if (++refcount_table[k] == 0) {
-                printf("ERROR: overflow cluster offset=0x%llx\n",
cluster_offset);
+                printf("ERROR: overflow cluster offset=0x%" PRIx64
"\n", cluster_offset);
+                err++;
             }
         }
     }
+    return err;
 }
 
 static int check_refcounts_l1(BlockDriverState *bs,
@@ -2559,11 +2561,12 @@ static int check_refcounts_l1(BlockDriverState
*bs,
     BDRVQcowState *s = bs->opaque;
     uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2;
     int l2_size, i, j, nb_csectors, refcount;
+    int err = 0;
 
     l2_table = NULL;
     l1_size2 = l1_size * sizeof(uint64_t);
 
-    inc_refcounts(bs, refcount_table, refcount_table_size,
+    err += inc_refcounts(bs, refcount_table, refcount_table_size,
                   l1_table_offset, l1_size2);
 
     l1_table = qemu_malloc(l1_size2);
@@ -2585,8 +2588,9 @@ static int check_refcounts_l1(BlockDriverState
*bs,
             if (check_copied) {
                 refcount = get_refcount(bs, (l2_offset &
~QCOW_OFLAG_COPIED) >> s->cluster_bits);
                 if ((refcount == 1) != ((l2_offset & QCOW_OFLAG_COPIED)
!= 0)) {
-                    printf("ERROR OFLAG_COPIED: l2_offset=%llx
refcount=%d\n",
-                           l2_offset, refcount);
+                    printf("ERROR OFLAG_COPIED: L1 entry i %d L2 block
l2_offset=%" PRIx64 " refcount=%d\n",
+                           i, l2_offset, refcount);
+                    err++;
                 }
             }
             l2_offset &= ~QCOW_OFLAG_COPIED;
@@ -2597,32 +2601,34 @@ static int check_refcounts_l1(BlockDriverState
*bs,
                 if (offset != 0) {
                     if (offset & QCOW_OFLAG_COMPRESSED) {
                         if (offset & QCOW_OFLAG_COPIED) {
-                            printf("ERROR: cluster %lld: copied flag
must never be set for compressed clusters\n",
+                            printf("ERROR: cluster %ld: copied flag
must never be set for compressed clusters\n",
                                    offset >> s->cluster_bits);
+                            err++;
                             offset &= ~QCOW_OFLAG_COPIED;
                         }
                         nb_csectors = ((offset >> s->csize_shift) &
                                        s->csize_mask) + 1;
                         offset &= s->cluster_offset_mask;
-                        inc_refcounts(bs, refcount_table,
+                        err += inc_refcounts(bs, refcount_table,
                                       refcount_table_size,
                                       offset & ~511, nb_csectors *
512);
                     } else {
                         if (check_copied) {
                             refcount = get_refcount(bs, (offset &
~QCOW_OFLAG_COPIED) >> s->cluster_bits);
                             if ((refcount == 1) != ((offset &
QCOW_OFLAG_COPIED) != 0)) {
-                                printf("ERROR OFLAG_COPIED: offset=%llx
refcount=%d\n",
-                                       offset, refcount);
+                                printf("WARN OFLAG_COPIED: L1 %d L2 %"
PRIx64 " data j %d offset=%" PRIx64 " refcount=%d\n",
+                                       i, l2_offset, j, offset,
refcount);
+                                /*err++;   may be valid if it is the
zero cluster from this or from previous runs */
                             }
                         }
                         offset &= ~QCOW_OFLAG_COPIED;
-                        inc_refcounts(bs, refcount_table,
+                        err += inc_refcounts(bs, refcount_table,
                                       refcount_table_size,
                                       offset, s->cluster_size);
                     }
                 }
             }
-            inc_refcounts(bs, refcount_table,
+            err += inc_refcounts(bs, refcount_table,
                           refcount_table_size,
                           l2_offset,
                           s->cluster_size);
@@ -2630,7 +2636,7 @@ static int check_refcounts_l1(BlockDriverState
*bs,
     }
     qemu_free(l1_table);
     qemu_free(l2_table);
-    return 0;
+    return err;
  fail:
     printf("ERROR: I/O error in check_refcounts_l1\n");
     qemu_free(l1_table);
@@ -2638,43 +2644,45 @@ static int check_refcounts_l1(BlockDriverState
*bs,
     return -EIO;
 }
 
-static void check_refcounts(BlockDriverState *bs)
+static int check_refcounts(BlockDriverState *bs)
 {
     BDRVQcowState *s = bs->opaque;
     int64_t size;
     int nb_clusters, refcount1, refcount2, i;
     QCowSnapshot *sn;
     uint16_t *refcount_table;
+    int err = 0;
 
     size = bdrv_getlength(s->hd);
     nb_clusters = (size + s->cluster_size - 1) >> s->cluster_bits;
     refcount_table = qemu_mallocz(nb_clusters * sizeof(uint16_t));
 
     /* header */
-    inc_refcounts(bs, refcount_table, nb_clusters,
+    err += inc_refcounts(bs, refcount_table, nb_clusters,
                   0, s->cluster_size);
 
-    check_refcounts_l1(bs, refcount_table, nb_clusters,
+    err += check_refcounts_l1(bs, refcount_table, nb_clusters,
                        s->l1_table_offset, s->l1_size, 1);
 
     /* snapshots */
     for(i = 0; i < s->nb_snapshots; i++) {
         sn = s->snapshots + i;
-        check_refcounts_l1(bs, refcount_table, nb_clusters,
+        err += check_refcounts_l1(bs, refcount_table, nb_clusters,
                            sn->l1_table_offset, sn->l1_size, 0);
     }
-    inc_refcounts(bs, refcount_table, nb_clusters,
-                  s->snapshots_offset, s->snapshots_size);
+    if (s->nb_snapshots)
+        err += inc_refcounts(bs, refcount_table, nb_clusters,
+                             s->snapshots_offset, s->snapshots_size);
 
     /* refcount data */
-    inc_refcounts(bs, refcount_table, nb_clusters,
+    err += inc_refcounts(bs, refcount_table, nb_clusters,
                   s->refcount_table_offset,
                   s->refcount_table_size * sizeof(uint64_t));
     for(i = 0; i < s->refcount_table_size; i++) {
         int64_t offset;
         offset = s->refcount_table[i];
         if (offset != 0) {
-            inc_refcounts(bs, refcount_table, nb_clusters,
+            err += inc_refcounts(bs, refcount_table, nb_clusters,
                           offset, s->cluster_size);
         }
     }
@@ -2683,12 +2691,15 @@ static void check_refcounts(BlockDriverState
*bs)
     for(i = 0; i < nb_clusters; i++) {
         refcount1 = get_refcount(bs, i);
         refcount2 = refcount_table[i];
-        if (refcount1 != refcount2)
+        if (refcount1 != refcount2) {
             printf("ERROR cluster %d refcount=%d reference=%d\n",
                    i, refcount1, refcount2);
+            err++;
+        }
     }
 
     qemu_free(refcount_table);
+    return err;
 }
 
 #if 0
@@ -2710,7 +2721,37 @@ static void dump_refcounts(BlockDriverState *bs)
     }
 }
 #endif
-#endif
+
+
+int64_t qcow_map(BlockDriverState *bs, int64_t offset, int local,
BlockDriverState **dst)
+{
+    uint64_t cluster_offset;
+    int n;
+
+    if (dst)
+        *dst = bs;
+
+    //fprintf(stderr, "bs %p offset %llx local %d dst %p\n", bs,
offset, local, dst);
+    n = 1;
+    cluster_offset = get_cluster_offset(bs, offset, &n);
+    if (cluster_offset || local)
+        return cluster_offset;
+
+    do {
+        if (!(bs = bs->backing_hd))
+            return 0;   /* unmapped */
+
+        //fprintf(stderr, ">> bs %p offset %llx local %d dst %p\n", bs,
offset, local, dst);
+        if (!bs->drv)
+            return -ENOENT; /* no media */
+
+        if (!bs->drv->bdrv_map)
+            return -ENOTSUP;
+
+        cluster_offset = bs->drv->bdrv_map(bs, offset, 0, dst);
+    } while (!cluster_offset);
+    return cluster_offset & ~QCOW_OFLAG_COMPRESSED;
+}
 
 BlockDriver bdrv_qcow2 = {
     "qcow2",
@@ -2737,4 +2778,6 @@ BlockDriver bdrv_qcow2 = {
     .bdrv_snapshot_delete = qcow_snapshot_delete,
     .bdrv_snapshot_list = qcow_snapshot_list,
     .bdrv_get_info = qcow_get_info,
+    .bdrv_map = qcow_map,
+    .bdrv_check = check_refcounts,
 };
diff --git a/block_int.h b/block_int.h
index 137000e..673c134 100644
--- a/block_int.h
+++ b/block_int.h
@@ -85,6 +85,9 @@ struct BlockDriver {
     /* to control generic scsi devices */
     int (*bdrv_ioctl)(BlockDriverState *bs, unsigned long int req, void
*buf);
 
+    /* misc */
+    int64_t (*bdrv_map)(BlockDriverState *bs, int64_t offset, int
local, BlockDriverState **dst);
+    int (*bdrv_check)(BlockDriverState *bs);
     BlockDriverAIOCB *free_aiocb;
     struct BlockDriver *next;
 };
diff --git a/qemu-img.c b/qemu-img.c
index 70c2403..ff5670b 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -53,10 +53,12 @@ static void help(void)
            "QEMU disk image utility\n"
            "\n"
            "Command syntax:\n"
+           "  check [ -f ] image\n"
            "  create [-e] [-6] [-b base_image] [-f fmt] filename
[size]\n"
            "  commit [-f fmt] filename\n"
            "  convert [-c] [-e] [-6] [-f fmt] [-O output_fmt] [-B
output_base_image] filename [filename2 [...]] output_filename\n"
-           "  info [-f fmt] filename\n"
+           "  info [-f fmt] [ -r ]filename\n"
+           "  map [ -f fmt] [ -r ] filename\n"
            "\n"
            "Command parameters:\n"
            "  'filename' is a disk image filename\n"
@@ -74,6 +76,7 @@ static void help(void)
            "  '-c' indicates that target image must be compressed (qcow
format only)\n"
            "  '-e' indicates that the target image must be encrypted
(qcow format only)\n"
            "  '-6' indicates that the target image must use
compatibility level 6 (vmdk format only)\n"
+           "  '-r' indicated that the base images are recursed in (for
info and map)\n"
            );
     printf("\nSupported format:");
     bdrv_iterate_format(format_print, NULL);
@@ -343,6 +346,152 @@ static int img_commit(int argc, char **argv)
     return 0;
 }
 
+#define IO_BUF_SIZE 65536
+
+static int img_map(int argc, char **argv)
+{
+    int c, ret = 0, cluster_size, cluster_sectors;
+    const char *filename, *fmt;
+    BlockDriver *drv;
+    BlockDriverState *bs, *store;
+    int64_t offset, poffset, e;
+    uint64_t bs_sectors;
+    BlockDriverInfo bdi;
+    int local = 1;
+
+    fmt = NULL;
+    for(;;) {
+        c = getopt(argc, argv, "f:hr");
+        if (c == -1)
+            break;
+        switch(c) {
+        case 'h':
+            help();
+            break;
+        case 'f':
+            fmt = optarg;
+            break;
+        case 'r':
+            local = 0;
+            break;
+        }
+    }
+    if (optind >= argc)
+        help();
+    filename = argv[optind++];
+
+    bs = bdrv_new("");
+    if (!bs)
+        error("Not enough memory");
+    if (fmt) {
+        drv = bdrv_find_format(fmt);
+        if (!drv)
+            error("Unknown file format '%s'", fmt);
+    } else {
+        drv = NULL;
+    }
+    if (bdrv_open2(bs, filename, 0, drv) < 0) {
+        error("Could not open '%s'", filename);
+    }
+    bdrv_get_geometry(bs, &bs_sectors);
+
+    if (bdrv_get_info(bs, &bdi) < 0)
+        error("could not get block driver info");
+    cluster_size = bdi.cluster_size;
+    if (cluster_size <= 0 || cluster_size > IO_BUF_SIZE)
+            error("invalid cluster size");
+    cluster_sectors = cluster_size >> 9;
+    e = bs_sectors << 9;
+    for (offset = 0; offset < e; offset += cluster_size) {
+        if ((poffset = bs->drv->bdrv_map(bs, offset, local, &store)))
+            printf("%s %016llx %016llx\n", store->filename, (long long
unsigned int)offset, (long long unsigned int)poffset);
+        if (poffset < 0) {
+            ret = poffset;
+            break;
+        }
+    }
+
+    switch(ret) {
+    case 0:
+        printf("\n");
+        break;
+    case -ENOENT:
+        error("No disk inserted");
+        break;
+    case -ENOTSUP:
+        error("Image is already committed");
+        break;
+    default:
+        error("Error during map (%d)", ret);
+        break;
+    }
+
+    bdrv_delete(bs);
+    return 0;
+}
+
+static int img_check(int argc, char **argv)
+{
+    int c, ret = 0, cluster_size, cluster_sectors;
+    const char *filename, *fmt;
+    BlockDriver *drv;
+    BlockDriverState *bs;
+    int64_t e;
+    uint64_t bs_sectors;
+    BlockDriverInfo bdi;
+
+    fmt = NULL;
+    for(;;) {
+        c = getopt(argc, argv, "f:h");
+        if (c == -1)
+            break;
+        switch(c) {
+        case 'h':
+            help();
+            break;
+        case 'f':
+            fmt = optarg;
+            break;
+        }
+    }
+    if (optind >= argc)
+        help();
+    filename = argv[optind++];
+
+    bs = bdrv_new("");
+    if (!bs)
+        error("Not enough memory");
+    if (fmt) {
+        drv = bdrv_find_format(fmt);
+        if (!drv)
+            error("Unknown file format '%s'", fmt);
+    } else {
+        drv = NULL;
+    }
+    if (bdrv_open2(bs, filename, 0, drv) < 0) {
+        error("Could not open '%s'", filename);
+    }
+    bdrv_get_geometry(bs, &bs_sectors);
+
+    if (bdrv_get_info(bs, &bdi) < 0)
+        error("could not get block driver info");
+    cluster_size = bdi.cluster_size;
+    if (cluster_size <= 0 || cluster_size > IO_BUF_SIZE)
+            error("invalid cluster size");
+    cluster_sectors = cluster_size >> 9;
+    e = bs_sectors << 9;
+    printf("%s: size %" PRId64 " bytes, cluster size %d bytes: ",
bs->filename, e, cluster_size);
+    if (bs->drv && bs->drv->bdrv_check) {
+        ret = bs->drv->bdrv_check(bs);
+        printf("check %s\n", ret ? "FAILED" : "PASSED");
+    } else
+        error("Check not supported");
+
+    bdrv_delete(bs);
+    return ret;
+}
+
 static int is_not_zero(const uint8_t *sector, int len)
 {
     int i;
@@ -379,7 +528,6 @@ static int is_allocated_sectors(const uint8_t *buf,
int n, int *pnum)
     return v;
 }
 
-#define IO_BUF_SIZE 65536
 
 static int img_convert(int argc, char **argv)
 {
@@ -650,22 +798,68 @@ static void dump_snapshots(BlockDriverState *bs)
     qemu_free(sn_tab);
 }
 
-static int img_info(int argc, char **argv)
+static int _info(BlockDriverState *bs, int recursive)
 {
-    int c;
-    const char *filename, *fmt;
-    BlockDriver *drv;
-    BlockDriverState *bs;
-    char fmt_name[128], size_buf[128], dsize_buf[128];
+    char fmt_name[128], size_buf[128], dsize_buf[128], indent[128] =
"";
     uint64_t total_sectors;
     int64_t allocated_size;
     char backing_filename[1024];
     char backing_filename2[1024];
     BlockDriverInfo bdi;
 
+    bdrv_get_format(bs, fmt_name, sizeof(fmt_name));
+    bdrv_get_geometry(bs, &total_sectors);
+    get_human_readable_size(size_buf, sizeof(size_buf), total_sectors *
512);
+    allocated_size = get_allocated_file_size(bs->filename);
+    if (allocated_size < 0)
+        sprintf(dsize_buf, "unavailable");
+    else
+        get_human_readable_size(dsize_buf, sizeof(dsize_buf),
+                                allocated_size);
+    if (recursive) {
+        memset(indent, ' ', (recursive-1)*2);
+        printf("%slevel: %d\n", indent, recursive - 1);
+    }
+    printf("%simage: %s\n"
+           "%sfile format: %s\n"
+           "%svirtual size: %s (%" PRId64 " bytes)\n"
+           "%sdisk size: %s\n",
+           indent, bs->filename,
+           indent, fmt_name,
+           indent, size_buf, (total_sectors * 512),
+           indent, dsize_buf);
+    if (bdrv_is_encrypted(bs))
+        printf("%sencrypted: yes\n", indent);
+    if (bdrv_get_info(bs, &bdi) >= 0) {
+        if (bdi.cluster_size != 0)
+            printf("%scluster_size: %d\n", indent, bdi.cluster_size);
+    }
+    bdrv_get_backing_filename(bs, backing_filename,
sizeof(backing_filename));
+    if (backing_filename[0] != '\0') {
+        path_combine(backing_filename2, sizeof(backing_filename2),
+                     bs->filename, backing_filename);
+        printf("%sbacking file: %s (actual path: %s)\n",
+               indent,
+               backing_filename,
+               backing_filename2);
+    }
+    if (recursive <= 1)
+        dump_snapshots(bs);
+    if (recursive && bs->backing_hd)
+        _info(bs->backing_hd, recursive+1);
+    return 0;
+}
+
+static int img_info(int argc, char **argv)
+{
+    int c, recursive = 0;
+    const char *filename, *fmt;
+    BlockDriver *drv;
+    BlockDriverState *bs;
+
     fmt = NULL;
     for(;;) {
-        c = getopt(argc, argv, "f:h");
+        c = getopt(argc, argv, "f:hr");
         if (c == -1)
             break;
         switch(c) {
@@ -675,6 +869,9 @@ static int img_info(int argc, char **argv)
         case 'f':
             fmt = optarg;
             break;
+        case 'r':
+            recursive = 1;
+            break;
         }
     }
     if (optind >= argc)
@@ -694,37 +891,7 @@ static int img_info(int argc, char **argv)
     if (bdrv_open2(bs, filename, 0, drv) < 0) {
         error("Could not open '%s'", filename);
     }
-    bdrv_get_format(bs, fmt_name, sizeof(fmt_name));
-    bdrv_get_geometry(bs, &total_sectors);
-    get_human_readable_size(size_buf, sizeof(size_buf), total_sectors *
512);
-    allocated_size = get_allocated_file_size(filename);
-    if (allocated_size < 0)
-        snprintf(dsize_buf, sizeof(dsize_buf), "unavailable");
-    else
-        get_human_readable_size(dsize_buf, sizeof(dsize_buf),
-                                allocated_size);
-    printf("image: %s\n"
-           "file format: %s\n"
-           "virtual size: %s (%" PRId64 " bytes)\n"
-           "disk size: %s\n",
-           filename, fmt_name, size_buf,
-           (total_sectors * 512),
-           dsize_buf);
-    if (bdrv_is_encrypted(bs))
-        printf("encrypted: yes\n");
-    if (bdrv_get_info(bs, &bdi) >= 0) {
-        if (bdi.cluster_size != 0)
-            printf("cluster_size: %d\n", bdi.cluster_size);
-    }
-    bdrv_get_backing_filename(bs, backing_filename,
sizeof(backing_filename));
-    if (backing_filename[0] != '\0') {
-        path_combine(backing_filename2, sizeof(backing_filename2),
-                     filename, backing_filename);
-        printf("backing file: %s (actual path: %s)\n",
-               backing_filename,
-               backing_filename2);
-    }
-    dump_snapshots(bs);
+    _info(bs, recursive);
     bdrv_delete(bs);
     return 0;
 }
@@ -732,6 +899,7 @@ static int img_info(int argc, char **argv)
 int main(int argc, char **argv)
 {
     const char *cmd;
+    int ret = 0;
 
     bdrv_init();
     if (argc < 2)
@@ -746,8 +914,12 @@ int main(int argc, char **argv)
         img_convert(argc, argv);
     } else if (!strcmp(cmd, "info")) {
         img_info(argc, argv);
+    } else if (!strcmp(cmd, "map")) {
+        img_map(argc, argv);
+    } else if (!strcmp(cmd, "check")) {
+        ret = img_check(argc, argv);
     } else {
         help();
     }
-    return 0;
+    return ret;
 }

Attachment: qemu-img-zopt.patch
Description: qemu-img-zopt.patch


reply via email to

[Prev in Thread] Current Thread [Next in Thread]