qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [5485] Expand cache= option and use write-through cachi


From: C . W . Betts
Subject: Re: [Qemu-devel] [5485] Expand cache= option and use write-through caching by default
Date: Tue, 14 Oct 2008 10:19:01 -0600

Mac OS X doesn't seem to have O_DSYNC in it:
gcc-4.2 -O2 -g -fno-strict-aliasing -Wall -Wundef -Wendif-labels -Wwrite-strings -mdynamic-no-pic -m32 -I. -I/Users/cwbetts/makestuff/qemu-allmac/src -MMD -MP -MT block-raw-posix.o -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -I/Users/cwbetts/makestuff/qemu-allmac/src/slirp -c -o block-raw-posix.o /Users/cwbetts/makestuff/qemu-allmac/src/block-raw-posix.c
/Users/cwbetts/makestuff/qemu-allmac/src/block-raw-posix.c: In function ‘raw_open’:
/Users/cwbetts/makestuff/qemu-allmac/src/block-raw-posix.c:139: error: ‘O_DSYNC’ undeclared (first use in this function)
/Users/cwbetts/makestuff/qemu-allmac/src/block-raw-posix.c:139: error: (Each undeclared identifier is reported only once
/Users/cwbetts/makestuff/qemu-allmac/src/block-raw-posix.c:139: error: for each function it appears in.)
/Users/cwbetts/makestuff/qemu-allmac/src/block-raw-posix.c: In function ‘hdev_open’:
/Users/cwbetts/makestuff/qemu-allmac/src/block-raw-posix.c:1000: error: ‘O_DSYNC’ undeclared (first use in this function)
On Oct 14, 2008, at 8:42 AM, Anthony Liguori wrote:

Revision: 5485
         http://svn.sv.gnu.org/viewvc/?view=rev&root=qemu&revision=5485
Author:   aliguori
Date:     2008-10-14 14:42:54 +0000 (Tue, 14 Oct 2008)

Log Message:
-----------
Expand cache= option and use write-through caching by default

This patch changes the cache= option to accept none, writeback, or writethough
to control the host page cache behavior.  By default, writethrough caching is
now used which internally is implemented by using O_DSYNC to open the disk
images.  When using -snapshot, writeback is used by default since data integrity
it not at all an issue.

cache=none has the same behavior as cache=off previously.  The later syntax is
still supported by now deprecated.  I also cleaned up the O_DIRECT
implementation to avoid many of the #ifdefs.

Signed-off-by: Anthony Liguori <address@hidden>

Modified Paths:
--------------
   trunk/block-raw-posix.c
   trunk/block-raw-win32.c
   trunk/block.c
   trunk/block.h
   trunk/qemu-doc.texi
   trunk/qemu-nbd.c
   trunk/vl.c

Modified: trunk/block-raw-posix.c
===================================================================
--- trunk/block-raw-posix.c 2008-10-13 21:08:55 UTC (rev 5484)
+++ trunk/block-raw-posix.c 2008-10-14 14:42:54 UTC (rev 5485)
@@ -73,6 +73,11 @@
#define DEBUG_BLOCK_PRINT(formatCstr, args...)
#endif

+/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
+#ifndef O_DIRECT
+#define O_DIRECT O_DSYNC
+#endif
+
#define FTYPE_FILE   0
#define FTYPE_CD     1
#define FTYPE_FD     2
@@ -101,9 +106,7 @@
    int fd_got_error;
    int fd_media_changed;
#endif
-#if defined(O_DIRECT)
    uint8_t* aligned_buf;
-#endif
} BDRVRawState;

static int posix_aio_init(void);
@@ -129,10 +132,13 @@
    }
    if (flags & BDRV_O_CREAT)
        open_flags |= O_CREAT | O_TRUNC;
-#ifdef O_DIRECT
-    if (flags & BDRV_O_DIRECT)
+
+    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
+     * and O_DIRECT for no caching. */
+    if ((flags & BDRV_O_NOCACHE))
        open_flags |= O_DIRECT;
-#endif
+    else if (!(flags & BDRV_O_CACHE_WB))
+        open_flags |= O_DSYNC;

    s->type = FTYPE_FILE;

@@ -146,9 +152,8 @@
    s->fd = fd;
    for (i = 0; i < RAW_FD_POOL_SIZE; i++)
        s->fd_pool[i] = -1;
-#if defined(O_DIRECT)
    s->aligned_buf = NULL;
-    if (flags & BDRV_O_DIRECT) {
+    if ((flags & BDRV_O_NOCACHE)) {
        s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE);
        if (s->aligned_buf == NULL) {
            ret = -errno;
@@ -156,7 +161,6 @@
            return ret;
        }
    }
-#endif
    return 0;
}

@@ -281,7 +285,6 @@
}


-#if defined(O_DIRECT)
/*
 * offset and count are in bytes and possibly not aligned. For files opened
 * with O_DIRECT, necessary alignments are ensured before calling
@@ -432,12 +435,6 @@
    return raw_pwrite_aligned(bs, offset, buf, count) + sum;
}

-#else
-#define raw_pread raw_pread_aligned
-#define raw_pwrite raw_pwrite_aligned
-#endif
-
-
#ifdef CONFIG_AIO
/***********************************************************/
/* Unix AIO using POSIX AIO */
@@ -661,7 +658,6 @@
     * If O_DIRECT is used and the buffer is not aligned fall back
     * to synchronous IO.
     */
-#if defined(O_DIRECT)
    BDRVRawState *s = bs->opaque;

    if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
@@ -672,7 +668,6 @@
        qemu_bh_schedule(bh);
        return &acb->common;
    }
-#endif

    acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
    if (!acb)
@@ -694,7 +689,6 @@
     * If O_DIRECT is used and the buffer is not aligned fall back
     * to synchronous IO.
     */
-#if defined(O_DIRECT)
    BDRVRawState *s = bs->opaque;

    if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
@@ -705,7 +699,6 @@
        qemu_bh_schedule(bh);
        return &acb->common;
    }
-#endif

    acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
    if (!acb)
@@ -770,10 +763,8 @@
    if (s->fd >= 0) {
        close(s->fd);
        s->fd = -1;
-#if defined(O_DIRECT)
        if (s->aligned_buf != NULL)
            qemu_free(s->aligned_buf);
-#endif
    }
    raw_close_fd_pool(s);
}
@@ -1003,10 +994,12 @@
        open_flags |= O_RDONLY;
        bs->read_only = 1;
    }
-#ifdef O_DIRECT
-    if (flags & BDRV_O_DIRECT)
+    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
+     * and O_DIRECT for no caching. */
+    if ((flags & BDRV_O_NOCACHE))
        open_flags |= O_DIRECT;
-#endif
+    else if (!(flags & BDRV_O_CACHE_WB))
+        open_flags |= O_DSYNC;

    s->type = FTYPE_FILE;
#if defined(__linux__)

Modified: trunk/block-raw-win32.c
===================================================================
--- trunk/block-raw-win32.c 2008-10-13 21:08:55 UTC (rev 5484)
+++ trunk/block-raw-win32.c 2008-10-14 14:42:54 UTC (rev 5485)
@@ -104,8 +104,10 @@
#else
    overlapped = FILE_ATTRIBUTE_NORMAL;
#endif
-    if (flags & BDRV_O_DIRECT)
+    if ((flags & BDRV_O_NOCACHE))
        overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH;
+    else if (!(flags & BDRV_O_CACHE_WB))
+        overlapped |= FILE_FLAG_WRITE_THROUGH;
    s->hfile = CreateFile(filename, access_flags,
                          FILE_SHARE_READ, NULL,
                          create_flags, overlapped, NULL);
@@ -440,8 +442,10 @@
#else
    overlapped = FILE_ATTRIBUTE_NORMAL;
#endif
-    if (flags & BDRV_O_DIRECT)
+    if ((flags & BDRV_O_NOCACHE))
        overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH;
+    else if (!(flags & BDRV_O_CACHE_WB))
+        overlapped |= FILE_FLAG_WRITE_THROUGH;
    s->hfile = CreateFile(filename, access_flags,
                          FILE_SHARE_READ, NULL,
                          create_flags, overlapped, NULL);

Modified: trunk/block.c
===================================================================
--- trunk/block.c 2008-10-13 21:08:55 UTC (rev 5484)
+++ trunk/block.c 2008-10-14 14:42:54 UTC (rev 5485)
@@ -395,12 +395,12 @@
    /* Note: for compatibility, we open disk image files as RDWR, and
       RDONLY as fallback */
    if (!(flags & BDRV_O_FILE))
-        open_flags = BDRV_O_RDWR | (flags & BDRV_O_DIRECT);
+        open_flags = BDRV_O_RDWR | (flags & BDRV_O_CACHE_MASK);
    else
        open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
    ret = drv->bdrv_open(bs, filename, open_flags);
    if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
-        ret = drv->bdrv_open(bs, filename, BDRV_O_RDONLY);
+        ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
        bs->read_only = 1;
    }
    if (ret < 0) {
@@ -427,7 +427,7 @@
        }
        path_combine(backing_filename, sizeof(backing_filename),
                     filename, bs->backing_file);
-        if (bdrv_open(bs->backing_hd, backing_filename, 0) < 0)
+        if (bdrv_open(bs->backing_hd, backing_filename, open_flags) < 0)
            goto fail;
    }


Modified: trunk/block.h
===================================================================
--- trunk/block.h 2008-10-13 21:08:55 UTC (rev 5484)
+++ trunk/block.h 2008-10-14 14:42:54 UTC (rev 5485)
@@ -47,8 +47,11 @@
                                     use a disk image format on top of
                                     it (default for
                                     bdrv_file_open()) */
-#define BDRV_O_DIRECT      0x0020
+#define BDRV_O_NOCACHE     0x0020 /* do not use the host page cache */
+#define BDRV_O_CACHE_WB    0x0040 /* use write-back caching */

+#define BDRV_O_CACHE_MASK  (BDRV_O_NOCACHE | BDRV_O_CACHE_WB)
+
void bdrv_info(void);
void bdrv_info_stats(void);


Modified: trunk/qemu-doc.texi
===================================================================
--- trunk/qemu-doc.texi 2008-10-13 21:08:55 UTC (rev 5484)
+++ trunk/qemu-doc.texi 2008-10-14 14:42:54 UTC (rev 5485)
@@ -267,13 +267,28 @@
@item address@hidden
@var{snapshot} is "on" or "off" and allows to enable snapshot for given drive (see @option{-snapshot}).
@item address@hidden
address@hidden is "on" or "off" and allows to disable host cache to access data.
address@hidden is "none", "writeback", or "writethrough" and controls how the host cache is used to access block data.
@item address@hidden
Specify which disk @var{format} will be used rather than detecting
the format.  Can be used to specifiy format=raw to avoid interpreting
an untrusted format header.
@end table

+By default, writethrough caching is used for all block device.  This means that
+the host page cache will be used to read and write data but write notification
+will be sent to the guest only when the data has been reported as written by
+the storage subsystem.
+
+Writeback caching will report data writes as completed as soon as the data is
+present in the host page cache.  This is safe as long as you trust your host.
+If your host crashes or loses power, then the guest may experience data
+corruption.  When using the @option{-snapshot} option, writeback caching is
+used by default.
+
+The host page can be avoided entirely with @option{cache=none}.  This will
+attempt to do disk IO directly to the guests memory.  QEMU may still perform
+an internal copy of the data.
+
Instead of @option{-cdrom} you can use:
@example
qemu -drive file=file,index=2,media=cdrom

Modified: trunk/qemu-nbd.c
===================================================================
--- trunk/qemu-nbd.c 2008-10-13 21:08:55 UTC (rev 5484)
+++ trunk/qemu-nbd.c 2008-10-14 14:42:54 UTC (rev 5485)
@@ -232,7 +232,7 @@
            flags |= BDRV_O_SNAPSHOT;
            break;
        case 'n':
-            flags |= BDRV_O_DIRECT;
+            flags |= BDRV_O_NOCACHE;
            break;
        case 'b':
            bindto = optarg;

Modified: trunk/vl.c
===================================================================
--- trunk/vl.c 2008-10-13 21:08:55 UTC (rev 5484)
+++ trunk/vl.c 2008-10-14 14:42:54 UTC (rev 5485)
@@ -5648,10 +5648,12 @@
    }

    if (get_param_value(buf, sizeof(buf), "cache", str)) {
-        if (!strcmp(buf, "off"))
+        if (!strcmp(buf, "off") || !strcmp(buf, "none"))
            cache = 0;
-        else if (!strcmp(buf, "on"))
+        else if (!strcmp(buf, "writethrough"))
            cache = 1;
+        else if (!strcmp(buf, "writeback"))
+            cache = 2;
        else {
           fprintf(stderr, "qemu: invalid cache option\n");
           return -1;
@@ -5770,10 +5772,14 @@
    if (!file[0])
        return 0;
    bdrv_flags = 0;
-    if (snapshot)
+    if (snapshot) {
        bdrv_flags |= BDRV_O_SNAPSHOT;
-    if (!cache)
-        bdrv_flags |= BDRV_O_DIRECT;
+        cache = 2; /* always use write-back with snapshot */
+    }
+    if (cache == 0) /* no caching */
+        bdrv_flags |= BDRV_O_NOCACHE;
+    else if (cache == 2) /* write-back */
+        bdrv_flags |= BDRV_O_CACHE_WB;
    if (bdrv_open2(bdrv, file, bdrv_flags, drv) < 0 || qemu_key_check(bdrv, file)) {
        fprintf(stderr, "qemu: could not open disk image %s\n",
                        file);
@@ -8145,7 +8151,7 @@
           "-cdrom file     use 'file' as IDE cdrom image (cdrom is ide1 master)\n"
  "-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n"
           "       [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off]\n"
-           "       [,cache=on|off][,format=f]\n"
+           "       [,cache=writethrough|writeback|none][,format=f]\n"
  "                use 'file' as a drive image\n"
           "-mtdblock file  use 'file' as on-board Flash memory image\n"
           "-sd file        use 'file' as SecureDigital card image\n"






reply via email to

[Prev in Thread] Current Thread [Next in Thread]