qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH] qemu-img create: add -o nocow option


From: Chunyan Liu
Subject: [Qemu-devel] [PATCH] qemu-img create: add -o nocow option
Date: Wed, 20 Nov 2013 16:50:29 +0800

Add 'nocow' create option so that users could set nocow flag to newly created
images, which could solve performance issues on btrfs.

Btrfs has terrible performance when hosting VM images, even more when the guest
in those VM are also using btrfs as file system. One way to mitigate this bad
performance is to turn off COW attributes on VM files (since having copy on
write for this kind of data is not useful).

Signed-off-by: Chunyan Liu <address@hidden>
---

Changes made following thread:
    http://lists.gnu.org/archive/html/qemu-devel/2013-11/msg01743.html
    * change to use -o nocow option instead of setting nocow flag as default
    * change to do IOC_GETFLAGS/IOC_SETFLAGS pair
    * fix the READ conflict in header definition in some file

---
 block/cow.c               |   22 ++++++++++++++++++++++
 block/qcow.c              |   22 ++++++++++++++++++++++
 block/qcow2.c             |   22 ++++++++++++++++++++++
 block/raw-posix.c         |   26 ++++++++++++++++++++++++++
 block/vdi.c               |   30 ++++++++++++++++++++++++++++++
 block/vmdk.c              |   36 ++++++++++++++++++++++++++++++++++--
 include/block/block_int.h |    1 +
 7 files changed, 157 insertions(+), 2 deletions(-)

diff --git a/block/cow.c b/block/cow.c
index 909c3e7..13268ba 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -305,6 +305,7 @@ static int cow_create(const char *filename, 
QEMUOptionParameter *options,
     Error *local_err = NULL;
     int ret;
     BlockDriverState *cow_bs;
+    int nocow = 0;
 
     /* Read out options */
     while (options && options->name) {
@@ -312,10 +313,26 @@ static int cow_create(const char *filename, 
QEMUOptionParameter *options,
             image_sectors = options->value.n / 512;
         } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
             image_filename = options->value.s;
+        } else if (!strcmp(options->name, BLOCK_OPT_NOCOW)) {
+            nocow = options->value.n ? 1 : 0;
         }
         options++;
     }
 
+    if (nocow) {
+        QEMUOptionParameter list[] = {
+            {
+                .name = BLOCK_OPT_NOCOW,
+                .type = OPT_FLAG,
+                .value.n = 1,
+                .help = "No copy-on-write",
+                .assigned = true
+            },
+            { NULL }
+        };
+        options = list;
+    }
+
     ret = bdrv_create_file(filename, options, &local_err);
     if (ret < 0) {
         qerror_report_err(local_err);
@@ -375,6 +392,11 @@ static QEMUOptionParameter cow_create_options[] = {
         .type = OPT_STRING,
         .help = "File name of a base image"
     },
+    {
+        .name = BLOCK_OPT_NOCOW,
+        .type = OPT_FLAG,
+        .help = "No copy-on-write"
+    },
     { NULL }
 };
 
diff --git a/block/qcow.c b/block/qcow.c
index c470e05..563ed70 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -671,6 +671,7 @@ static int qcow_create(const char *filename, 
QEMUOptionParameter *options,
     Error *local_err = NULL;
     int ret;
     BlockDriverState *qcow_bs;
+    int nocow = 0;
 
     /* Read out options */
     while (options && options->name) {
@@ -680,10 +681,26 @@ static int qcow_create(const char *filename, 
QEMUOptionParameter *options,
             backing_file = options->value.s;
         } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
             flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
+        } else if (!strcmp(options->name, BLOCK_OPT_NOCOW)) {
+            nocow = options->value.n ? 1 : 0;
         }
         options++;
     }
 
+    if (nocow) {
+        QEMUOptionParameter list[] = {
+            {
+                .name = BLOCK_OPT_NOCOW,
+                .type = OPT_FLAG,
+                .value.n = 1,
+                .help = "No copy-on-write",
+                .assigned = true
+            },
+            { NULL }
+        };
+        options = list;
+    }
+
     ret = bdrv_create_file(filename, options, &local_err);
     if (ret < 0) {
         qerror_report_err(local_err);
@@ -895,6 +912,11 @@ static QEMUOptionParameter qcow_create_options[] = {
         .type = OPT_FLAG,
         .help = "Encrypt the image"
     },
+    {
+        .name = BLOCK_OPT_NOCOW,
+        .type = OPT_FLAG,
+        .help = "No copy-on-write"
+    },
     { NULL }
 };
 
diff --git a/block/qcow2.c b/block/qcow2.c
index 6e5d98d..f7ff6c3 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1612,6 +1612,7 @@ static int qcow2_create(const char *filename, 
QEMUOptionParameter *options,
     int version = 3;
     Error *local_err = NULL;
     int ret;
+    int nocow = 0;
 
     /* Read out options */
     while (options && options->name) {
@@ -1651,6 +1652,8 @@ static int qcow2_create(const char *filename, 
QEMUOptionParameter *options,
             }
         } else if (!strcmp(options->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
             flags |= options->value.n ? BLOCK_FLAG_LAZY_REFCOUNTS : 0;
+        } else if (!strcmp(options->name, BLOCK_OPT_NOCOW)) {
+            nocow = options->value.n ? 1 : 0;
         }
         options++;
     }
@@ -1667,6 +1670,20 @@ static int qcow2_create(const char *filename, 
QEMUOptionParameter *options,
         return -EINVAL;
     }
 
+    if (nocow) {
+        QEMUOptionParameter list[] = {
+            {
+                .name = BLOCK_OPT_NOCOW,
+                .type = OPT_FLAG,
+                .value.n = 1,
+                .help = "No copy-on-write",
+                .assigned = true
+            },
+            { NULL }
+        };
+        options = list;
+    }
+
     ret = qcow2_create2(filename, sectors, backing_file, backing_fmt, flags,
                         cluster_size, prealloc, options, version, &local_err);
     if (error_is_set(&local_err)) {
@@ -2222,6 +2239,11 @@ static QEMUOptionParameter qcow2_create_options[] = {
         .type = OPT_FLAG,
         .help = "Postpone refcount updates",
     },
+    {
+        .name = BLOCK_OPT_NOCOW,
+        .type = OPT_FLAG,
+        .help = "No copy-on-write"
+    },
     { NULL }
 };
 
diff --git a/block/raw-posix.c b/block/raw-posix.c
index f6d48bb..392e3e0 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -55,6 +55,9 @@
 #include <linux/cdrom.h>
 #include <linux/fd.h>
 #include <linux/fs.h>
+#ifndef FS_NOCOW_FL
+#define FS_NOCOW_FL                     0x00800000 /* Do not cow file */
+#endif
 #endif
 #ifdef CONFIG_FIEMAP
 #include <linux/fiemap.h>
@@ -1057,11 +1060,14 @@ static int raw_create(const char *filename, 
QEMUOptionParameter *options,
     int fd;
     int result = 0;
     int64_t total_size = 0;
+    int nocow = 0;
 
     /* Read out options */
     while (options && options->name) {
         if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
             total_size = options->value.n / BDRV_SECTOR_SIZE;
+        } else if (!strcmp(options->name, BLOCK_OPT_NOCOW)) {
+            nocow = options->value.n ? 1 : 0;
         }
         options++;
     }
@@ -1072,6 +1078,21 @@ static int raw_create(const char *filename, 
QEMUOptionParameter *options,
         result = -errno;
         error_setg_errno(errp, -result, "Could not create file");
     } else {
+        if (nocow) {
+#ifdef __linux__
+            /* Set NOCOW flag to solve performance issue on fs like btrfs.
+             * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value 
will
+             * be ignored since any failure of this operation should not block 
the
+             * left work.
+             */
+            int attr;
+            if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
+                attr |= FS_NOCOW_FL;
+                ioctl(fd, FS_IOC_SETFLAGS, &attr);
+            }
+#endif
+        }
+
         if (ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
             result = -errno;
             error_setg_errno(errp, -result, "Could not resize file");
@@ -1206,6 +1227,11 @@ static QEMUOptionParameter raw_create_options[] = {
         .type = OPT_SIZE,
         .help = "Virtual disk size"
     },
+    {
+        .name = BLOCK_OPT_NOCOW,
+        .type = OPT_FLAG,
+        .help = "No copy-on-write"
+    },
     { NULL }
 };
 
diff --git a/block/vdi.c b/block/vdi.c
index b6ec002..a569b19 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -53,6 +53,13 @@
 #include "block/block_int.h"
 #include "qemu/module.h"
 #include "migration/migration.h"
+#ifdef __linux__
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#ifndef FS_NOCOW_FL
+#define FS_NOCOW_FL                     0x00800000 /* Do not cow file */
+#endif
+#endif
 
 #if defined(CONFIG_UUID)
 #include <uuid/uuid.h>
@@ -657,6 +664,7 @@ static int vdi_create(const char *filename, 
QEMUOptionParameter *options,
     VdiHeader header;
     size_t i;
     size_t bmap_size;
+    int nocow = 0;
 
     logout("\n");
 
@@ -677,6 +685,8 @@ static int vdi_create(const char *filename, 
QEMUOptionParameter *options,
                 image_type = VDI_TYPE_STATIC;
             }
 #endif
+        } else if (!strcmp(options->name, BLOCK_OPT_NOCOW)) {
+            nocow = options->value.n ? 1 : 0;
         }
         options++;
     }
@@ -688,6 +698,21 @@ static int vdi_create(const char *filename, 
QEMUOptionParameter *options,
         return -errno;
     }
 
+    if (nocow) {
+#ifdef __linux__
+        /* Set NOCOW flag to solve performance issue on fs like btrfs.
+         * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will
+         * be ignored since any failure of this operation should not block the
+         * left work.
+         */
+        int attr;
+        if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
+            attr |= FS_NOCOW_FL;
+            ioctl(fd, FS_IOC_SETFLAGS, &attr);
+        }
+#endif
+    }
+
     /* We need enough blocks to store the given disk size,
        so always round up. */
     blocks = (bytes + block_size - 1) / block_size;
@@ -780,6 +805,11 @@ static QEMUOptionParameter vdi_create_options[] = {
         .help = "VDI static (pre-allocated) image"
     },
 #endif
+    {
+        .name = BLOCK_OPT_NOCOW,
+        .type = OPT_FLAG,
+        .help = "No copy-on-write"
+    },
     /* TODO: An additional option to set UUID values might be useful. */
     { NULL }
 };
diff --git a/block/vmdk.c b/block/vmdk.c
index a7ebd0f..d662dec 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -28,6 +28,13 @@
 #include "qemu/module.h"
 #include "migration/migration.h"
 #include <zlib.h>
+#ifdef __linux__
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#ifndef FS_NOCOW_FL
+#define FS_NOCOW_FL                     0x00800000 /* Do not cow file */
+#endif
+#endif
 
 #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
@@ -1435,7 +1442,8 @@ static int coroutine_fn 
vmdk_co_write_zeroes(BlockDriverState *bs,
 }
 
 static int vmdk_create_extent(const char *filename, int64_t filesize,
-                              bool flat, bool compress, bool zeroed_grain)
+                              bool flat, bool compress, bool zeroed_grain,
+                              int nocow)
 {
     int ret, i;
     int fd = 0;
@@ -1447,7 +1455,23 @@ static int vmdk_create_extent(const char *filename, 
int64_t filesize,
                    0644);
     if (fd < 0) {
         return -errno;
+    } 
+    
+    if (nocow) {
+#ifdef __linux__
+        /* Set NOCOW flag to solve performance issue on fs like btrfs.
+         * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will
+         * be ignored since any failure of this operation should not block the
+         * left work.
+         */
+        int attr;
+        if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
+            attr |= FS_NOCOW_FL;
+            ioctl(fd, FS_IOC_SETFLAGS, &attr);
+        }
+#endif
     }
+
     if (flat) {
         ret = ftruncate(fd, filesize);
         if (ret < 0) {
@@ -1604,6 +1628,7 @@ static int vmdk_create(const char *filename, 
QEMUOptionParameter *options,
     uint32_t parent_cid = 0xffffffff;
     uint32_t number_heads = 16;
     bool zeroed_grain = false;
+    int nocow = 0;
     const char desc_template[] =
         "# Disk DescriptorFile\n"
         "version=1\n"
@@ -1641,6 +1666,8 @@ static int vmdk_create(const char *filename, 
QEMUOptionParameter *options,
             fmt = options->value.s;
         } else if (!strcmp(options->name, BLOCK_OPT_ZEROED_GRAIN)) {
             zeroed_grain |= options->value.n;
+        } else if (!strcmp(options->name, BLOCK_OPT_NOCOW)) {
+            nocow = options->value.n ? 1 : 0;
         }
         options++;
     }
@@ -1729,7 +1756,7 @@ static int vmdk_create(const char *filename, 
QEMUOptionParameter *options,
                 path, desc_filename);
 
         if (vmdk_create_extent(ext_filename, size,
-                               flat, compress, zeroed_grain)) {
+                               flat, compress, zeroed_grain, nocow)) {
             return -EINVAL;
         }
         filesize -= size;
@@ -1926,6 +1953,11 @@ static QEMUOptionParameter vmdk_create_options[] = {
         .type = OPT_FLAG,
         .help = "Enable efficient zero writes using the zeroed-grain GTE 
feature"
     },
+    {
+        .name = BLOCK_OPT_NOCOW,
+        .type = OPT_FLAG,
+        .help = "No copy-on-write"
+    },
     { NULL }
 };
 
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 1666066..f03568d 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -53,6 +53,7 @@
 #define BLOCK_OPT_COMPAT_LEVEL      "compat"
 #define BLOCK_OPT_LAZY_REFCOUNTS    "lazy_refcounts"
 #define BLOCK_OPT_ADAPTER_TYPE      "adapter_type"
+#define BLOCK_OPT_NOCOW             "nocow"
 
 typedef struct BdrvTrackedRequest {
     BlockDriverState *bs;
-- 
1.6.0.2




reply via email to

[Prev in Thread] Current Thread [Next in Thread]