qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [4599] Align file accesses with cache=off (O_DIRECT) (Kevin


From: Fabrice Bellard
Subject: [Qemu-devel] [4599] Align file accesses with cache=off (O_DIRECT) (Kevin Wolf, Laurent Vivier)
Date: Wed, 28 May 2008 09:51:10 +0000

Revision: 4599
          http://svn.sv.gnu.org/viewvc/?view=rev&root=qemu&revision=4599
Author:   bellard
Date:     2008-05-28 09:51:09 +0000 (Wed, 28 May 2008)

Log Message:
-----------
Align file accesses with cache=off (O_DIRECT) (Kevin Wolf, Laurent Vivier)

Modified Paths:
--------------
    trunk/block-raw-posix.c

Modified: trunk/block-raw-posix.c
===================================================================
--- trunk/block-raw-posix.c     2008-05-27 21:18:39 UTC (rev 4598)
+++ trunk/block-raw-posix.c     2008-05-28 09:51:09 UTC (rev 4599)
@@ -70,6 +70,8 @@
 #define FTYPE_CD     1
 #define FTYPE_FD     2
 
+#define ALIGNED_BUFFER_SIZE (32 * 512)
+
 /* if the FD is not accessed during that time (in ms), we try to
    reopen it to see if the disk has been changed */
 #define FD_OPEN_TIMEOUT 1000
@@ -86,6 +88,9 @@
     int fd_got_error;
     int fd_media_changed;
 #endif
+#if defined(O_DIRECT) && !defined(QEMU_IMG)
+    uint8_t* aligned_buf;
+#endif
 } BDRVRawState;
 
 static int fd_open(BlockDriverState *bs);
@@ -121,6 +126,17 @@
         return ret;
     }
     s->fd = fd;
+#if defined(O_DIRECT) && !defined(QEMU_IMG)
+    s->aligned_buf = NULL;
+    if (flags & BDRV_O_DIRECT) {
+        s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE);
+        if (s->aligned_buf == NULL) {
+            ret = -errno;
+            close(fd);
+            return ret;
+        }
+    }
+#endif
     return 0;
 }
 
@@ -141,7 +157,14 @@
 #endif
 */
 
-static int raw_pread(BlockDriverState *bs, int64_t offset,
+/*
+ * offset and count are in bytes, but must be multiples of 512 for files
+ * opened with O_DIRECT. buf must be aligned to 512 bytes then.
+ *
+ * This function may be called without alignment if the caller ensures
+ * that O_DIRECT is not in effect.
+ */
+static int raw_pread_aligned(BlockDriverState *bs, int64_t offset,
                      uint8_t *buf, int count)
 {
     BDRVRawState *s = bs->opaque;
@@ -194,7 +217,14 @@
     return ret;
 }
 
-static int raw_pwrite(BlockDriverState *bs, int64_t offset,
+/*
+ * offset and count are in bytes, but must be multiples of 512 for files
+ * opened with O_DIRECT. buf must be aligned to 512 bytes then.
+ *
+ * This function may be called without alignment if the caller ensures
+ * that O_DIRECT is not in effect.
+ */
+static int raw_pwrite_aligned(BlockDriverState *bs, int64_t offset,
                       const uint8_t *buf, int count)
 {
     BDRVRawState *s = bs->opaque;
@@ -230,6 +260,164 @@
     return ret;
 }
 
+
+#if defined(O_DIRECT) && !defined(QEMU_IMG)
+/*
+ * offset and count are in bytes and possibly not aligned. For files opened
+ * with O_DIRECT, necessary alignments are ensured before calling
+ * raw_pread_aligned to do the actual read.
+ */
+static int raw_pread(BlockDriverState *bs, int64_t offset,
+                     uint8_t *buf, int count)
+{
+    BDRVRawState *s = bs->opaque;
+    int size, ret, shift, sum;
+
+    sum = 0;
+
+    if (s->aligned_buf != NULL)  {
+
+        if (offset & 0x1ff) {
+            /* align offset on a 512 bytes boundary */
+
+            shift = offset & 0x1ff;
+            size = (shift + count + 0x1ff) & ~0x1ff;
+            if (size > ALIGNED_BUFFER_SIZE)
+                size = ALIGNED_BUFFER_SIZE;
+            ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, size);
+            if (ret < 0)
+                return ret;
+
+            size = 512 - shift;
+            if (size > count)
+                size = count;
+            memcpy(buf, s->aligned_buf + shift, size);
+
+            buf += size;
+            offset += size;
+            count -= size;
+            sum += size;
+
+            if (count == 0)
+                return sum;
+        }
+        if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
+
+            /* read on aligned buffer */
+
+            while (count) {
+
+                size = (count + 0x1ff) & ~0x1ff;
+                if (size > ALIGNED_BUFFER_SIZE)
+                    size = ALIGNED_BUFFER_SIZE;
+
+                ret = raw_pread_aligned(bs, offset, s->aligned_buf, size);
+                if (ret < 0)
+                    return ret;
+
+                size = ret;
+                if (size > count)
+                    size = count;
+
+                memcpy(buf, s->aligned_buf, size);
+
+                buf += size;
+                offset += size;
+                count -= size;
+                sum += size;
+            }
+
+            return sum;
+        }
+    }
+
+    return raw_pread_aligned(bs, offset, buf, count) + sum;
+}
+
+/*
+ * offset and count are in bytes and possibly not aligned. For files opened
+ * with O_DIRECT, necessary alignments are ensured before calling
+ * raw_pwrite_aligned to do the actual write.
+ */
+static int raw_pwrite(BlockDriverState *bs, int64_t offset,
+                      const uint8_t *buf, int count)
+{
+    BDRVRawState *s = bs->opaque;
+    int size, ret, shift, sum;
+
+    sum = 0;
+
+    if (s->aligned_buf != NULL) {
+
+        if (offset & 0x1ff) {
+            /* align offset on a 512 bytes boundary */
+            shift = offset & 0x1ff;
+            ret = raw_pread_aligned(bs, offset - shift, s->aligned_buf, 512);
+            if (ret < 0)
+                return ret;
+
+            size = 512 - shift;
+            if (size > count)
+                size = count;
+            memcpy(s->aligned_buf + shift, buf, size);
+
+            ret = raw_pwrite_aligned(bs, offset - shift, s->aligned_buf, 512);
+            if (ret < 0)
+                return ret;
+
+            buf += size;
+            offset += size;
+            count -= size;
+            sum += size;
+
+            if (count == 0)
+                return sum;
+        }
+        if (count & 0x1ff || (uintptr_t) buf & 0x1ff) {
+
+            while ((size = (count & ~0x1ff)) != 0) {
+
+                if (size > ALIGNED_BUFFER_SIZE)
+                    size = ALIGNED_BUFFER_SIZE;
+
+                memcpy(s->aligned_buf, buf, size);
+
+                ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, size);
+                if (ret < 0)
+                    return ret;
+
+                buf += ret;
+                offset += ret;
+                count -= ret;
+                sum += ret;
+            }
+            /* here, count < 512 because (count & ~0x1ff) == 0 */
+            if (count) {
+                ret = raw_pread_aligned(bs, offset, s->aligned_buf, 512);
+                if (ret < 0)
+                    return ret;
+                 memcpy(s->aligned_buf, buf, count);
+
+                 ret = raw_pwrite_aligned(bs, offset, s->aligned_buf, 512);
+                 if (ret < 0)
+                     return ret;
+                 if (count < ret)
+                     ret = count;
+
+                 sum += ret;
+            }
+            return sum;
+        }
+    }
+    return raw_pwrite_aligned(bs, offset, buf, count) + sum;
+}
+
+#else
+#define raw_pread raw_pread_aligned
+#define raw_pwrite raw_pwrite_aligned
+#endif
+
+
 /***********************************************************/
 /* Unix AIO using POSIX AIO */
 
@@ -237,6 +425,7 @@
     BlockDriverAIOCB common;
     struct aiocb aiocb;
     struct RawAIOCB *next;
+    int ret;
 } RawAIOCB;
 
 static int aio_sig_num = SIGUSR2;
@@ -397,12 +586,38 @@
     return acb;
 }
 
+#ifndef QEMU_IMG
+static void raw_aio_em_cb(void* opaque)
+{
+    RawAIOCB *acb = opaque;
+    acb->common.cb(acb->common.opaque, acb->ret);
+    qemu_aio_release(acb);
+}
+#endif
+
 static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
         int64_t sector_num, uint8_t *buf, int nb_sectors,
         BlockDriverCompletionFunc *cb, void *opaque)
 {
     RawAIOCB *acb;
 
+    /*
+     * If O_DIRECT is used and the buffer is not aligned fall back
+     * to synchronous IO.
+     */
+#if defined(O_DIRECT) && !defined(QEMU_IMG)
+    BDRVRawState *s = bs->opaque;
+
+    if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
+        QEMUBH *bh;
+        acb = qemu_aio_get(bs, cb, opaque);
+        acb->ret = raw_pread(bs, 512 * sector_num, buf, 512 * nb_sectors);
+        bh = qemu_bh_new(raw_aio_em_cb, acb);
+        qemu_bh_schedule(bh);
+        return &acb->common;
+    }
+#endif
+
     acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
     if (!acb)
         return NULL;
@@ -419,6 +634,23 @@
 {
     RawAIOCB *acb;
 
+    /*
+     * If O_DIRECT is used and the buffer is not aligned fall back
+     * to synchronous IO.
+     */
+#if defined(O_DIRECT) && !defined(QEMU_IMG)
+    BDRVRawState *s = bs->opaque;
+
+    if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
+        QEMUBH *bh;
+        acb = qemu_aio_get(bs, cb, opaque);
+        acb->ret = raw_pwrite(bs, 512 * sector_num, buf, 512 * nb_sectors);
+        bh = qemu_bh_new(raw_aio_em_cb, acb);
+        qemu_bh_schedule(bh);
+        return &acb->common;
+    }
+#endif
+
     acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
     if (!acb)
         return NULL;
@@ -462,6 +694,10 @@
     if (s->fd >= 0) {
         close(s->fd);
         s->fd = -1;
+#if defined(O_DIRECT) && !defined(QEMU_IMG)
+        if (s->aligned_buf != NULL)
+            qemu_free(s->aligned_buf);
+#endif
     }
 }
 






reply via email to

[Prev in Thread] Current Thread [Next in Thread]