qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH v3] block/raw-posix: Try both FIEMAP and SEEK_HOLE


From: Max Reitz
Subject: [Qemu-devel] [PATCH v3] block/raw-posix: Try both FIEMAP and SEEK_HOLE
Date: Wed, 7 May 2014 00:18:40 +0200

The current version of raw-posix always uses ioctl(FS_IOC_FIEMAP) if
FIEMAP is available; lseek with SEEK_HOLE/SEEK_DATA are not even
compiled in in this case. However, there may be implementations which
support the latter but not the former (e.g., NFSv4.2) as well as vice
versa.

To cover both cases, always try SEEK_HOLE/SEEK_DATA (as this will
probably be covered by POSIX soon) and if that does not work, fall back
to FIEMAP; and if that does not work either, treat everything as
allocated.

Signed-off-by: Max Reitz <address@hidden>
---
v3:
 - use a tristate for keeping the information whether or not to use
   FIEMAP and/or SEEK_HOLE/SEEK_DATA [Eric]
 - fall through to another implementation (i.e. FIEMAP) if the first
   (i.e. SEEK_HOLE/SEEK_DATA) reports everything as allocated; this will
   not result in that implementation being skipped the next time,
   however [Eric]
---
 block/raw-posix.c | 182 +++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 131 insertions(+), 51 deletions(-)

diff --git a/block/raw-posix.c b/block/raw-posix.c
index 3ce026d..fd6bac6 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -123,6 +123,18 @@
 
 #define MAX_BLOCKSIZE  4096
 
+/* In case there are multiple implementations for the same feature provided by
+ * the environment, this enumeration may be used to represent the status of
+ * these alternatives. */
+typedef enum ImplementationAlternativeStatus {
+    /* The status is (yet) unknown. */
+    IMPLSTAT_UNKNOWN = 0,
+    /* This implementation is known to return correct results. */
+    IMPLSTAT_WORKING,
+    /* This implementation is known not to return correct results. */
+    IMPLSTAT_SKIP,
+} ImplementationAlternativeStatus;
+
 typedef struct BDRVRawState {
     int fd;
     int type;
@@ -146,6 +158,12 @@ typedef struct BDRVRawState {
     bool has_discard:1;
     bool has_write_zeroes:1;
     bool discard_zeroes:1;
+#if defined SEEK_HOLE && defined SEEK_DATA
+    ImplementationAlternativeStatus seek_hole_status;
+#endif
+#ifdef CONFIG_FIEMAP
+    ImplementationAlternativeStatus fiemap_status;
+#endif
 } BDRVRawState;
 
 typedef struct BDRVRawReopenState {
@@ -1272,98 +1290,160 @@ static int raw_create(const char *filename, 
QEMUOptionParameter *options,
     return result;
 }
 
-/*
- * Returns true iff the specified sector is present in the disk image. Drivers
- * not implementing the functionality are assumed to not support backing files,
- * hence all their sectors are reported as allocated.
- *
- * If 'sector_num' is beyond the end of the disk image the return value is 0
- * and 'pnum' is set to 0.
- *
- * 'pnum' is set to the number of sectors (including and immediately following
- * the specified sector) that are known to be in the same
- * allocated/unallocated state.
- *
- * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
- * beyond the end of the disk image it will be clamped.
- */
-static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
-                                            int64_t sector_num,
-                                            int nb_sectors, int *pnum)
+static int64_t try_fiemap(BlockDriverState *bs, off_t start, off_t end,
+                          off_t *data, off_t *hole, int nb_sectors, int *pnum)
 {
-    off_t start, data, hole;
-    int64_t ret;
-
-    ret = fd_open(bs);
-    if (ret < 0) {
-        return ret;
-    }
-
-    start = sector_num * BDRV_SECTOR_SIZE;
-    ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
-
 #ifdef CONFIG_FIEMAP
-
     BDRVRawState *s = bs->opaque;
+    int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
     struct {
         struct fiemap fm;
         struct fiemap_extent fe;
     } f;
 
+    if (s->fiemap_status == IMPLSTAT_SKIP) {
+        return -ENOTSUP;
+    }
+
     f.fm.fm_start = start;
     f.fm.fm_length = (int64_t)nb_sectors * BDRV_SECTOR_SIZE;
     f.fm.fm_flags = 0;
     f.fm.fm_extent_count = 1;
     f.fm.fm_reserved = 0;
     if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) {
-        /* Assume everything is allocated.  */
-        *pnum = nb_sectors;
-        return ret;
+        s->fiemap_status = IMPLSTAT_SKIP;
+        return -errno;
+    }
+
+    if (s->fiemap_status == IMPLSTAT_UNKNOWN) {
+        if (f.fm.fm_extent_count == 1 &&
+            f.fe.fe_logical == 0 && f.fe.fe_length >= end)
+        {
+            /* FIEMAP returned a single extent spanning the entire file; maybe
+             * this was just a default response and therefore we cannot be sure
+             * whether it actually works; fall back to an alternative
+             * implementation. */
+            return -ENOTSUP;
+        } else {
+            s->fiemap_status = IMPLSTAT_WORKING;
+        }
     }
 
     if (f.fm.fm_mapped_extents == 0) {
         /* No extents found, data is beyond f.fm.fm_start + f.fm.fm_length.
          * f.fm.fm_start + f.fm.fm_length must be clamped to the file size!
          */
-        off_t length = lseek(s->fd, 0, SEEK_END);
-        hole = f.fm.fm_start;
-        data = MIN(f.fm.fm_start + f.fm.fm_length, length);
+        *hole = f.fm.fm_start;
+        *data = MIN(f.fm.fm_start + f.fm.fm_length, end);
     } else {
-        data = f.fe.fe_logical;
-        hole = f.fe.fe_logical + f.fe.fe_length;
+        *data = f.fe.fe_logical;
+        *hole = f.fe.fe_logical + f.fe.fe_length;
         if (f.fe.fe_flags & FIEMAP_EXTENT_UNWRITTEN) {
             ret |= BDRV_BLOCK_ZERO;
         }
     }
 
-#elif defined SEEK_HOLE && defined SEEK_DATA
+    return ret;
+#else
+    return -ENOTSUP;
+#endif
+}
 
+static int64_t try_seek_hole(BlockDriverState *bs, off_t start, off_t end,
+                             off_t *data, off_t *hole, int *pnum)
+{
+#if defined SEEK_HOLE && defined SEEK_DATA
     BDRVRawState *s = bs->opaque;
 
-    hole = lseek(s->fd, start, SEEK_HOLE);
-    if (hole == -1) {
+    if (s->seek_hole_status == IMPLSTAT_SKIP) {
+        return -ENOTSUP;
+    }
+
+    *hole = lseek(s->fd, start, SEEK_HOLE);
+    if (*hole == -1) {
         /* -ENXIO indicates that sector_num was past the end of the file.
          * There is a virtual hole there.  */
         assert(errno != -ENXIO);
 
-        /* Most likely EINVAL.  Assume everything is allocated.  */
-        *pnum = nb_sectors;
-        return ret;
+        s->seek_hole_status = IMPLSTAT_SKIP;
+        return -errno;
     }
 
-    if (hole > start) {
-        data = start;
+    if (s->seek_hole_status == IMPLSTAT_UNKNOWN) {
+        if (*hole >= end) {
+            /* lseek() returned the EOF, therefore it is unknown whether
+             * SEEK_HOLE actually works; fall back to an alternative
+             * implementation. */
+            return -ENOTSUP;
+        } else {
+            s->seek_hole_status = IMPLSTAT_WORKING;
+        }
+    }
+
+    if (*hole > start) {
+        *data = start;
     } else {
         /* On a hole.  We need another syscall to find its end.  */
-        data = lseek(s->fd, start, SEEK_DATA);
-        if (data == -1) {
-            data = lseek(s->fd, 0, SEEK_END);
+        *data = lseek(s->fd, start, SEEK_DATA);
+        if (*data == -1) {
+            *data = lseek(s->fd, 0, SEEK_END);
         }
     }
+
+    return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
 #else
-    data = 0;
-    hole = start + nb_sectors * BDRV_SECTOR_SIZE;
+    return -ENOTSUP;
 #endif
+}
+
+/*
+ * Returns true iff the specified sector is present in the disk image. Drivers
+ * not implementing the functionality are assumed to not support backing files,
+ * hence all their sectors are reported as allocated.
+ *
+ * If 'sector_num' is beyond the end of the disk image the return value is 0
+ * and 'pnum' is set to 0.
+ *
+ * 'pnum' is set to the number of sectors (including and immediately following
+ * the specified sector) that are known to be in the same
+ * allocated/unallocated state.
+ *
+ * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
+ * beyond the end of the disk image it will be clamped.
+ */
+static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
+                                                    int64_t sector_num,
+                                                    int nb_sectors, int *pnum)
+{
+    off_t start, end, data = 0, hole = 0;
+    int64_t ret;
+
+    ret = fd_open(bs);
+    if (ret < 0) {
+        return ret;
+    }
+
+    start = sector_num * BDRV_SECTOR_SIZE;
+    end   = bdrv_getlength(bs);
+    if (end < 0) {
+        /* This function is used solely by "file" which does not have
+         * variable-length images; therefore, bdrv_getlength() should use
+         * bs->total_sectors for calculating the length and never return an
+         * error; if it does, it will be -ENOMEDIUM which should be returned
+         * to the caller. */
+        return end;
+    }
+
+    ret = try_seek_hole(bs, start, end, &data, &hole, pnum);
+    if (ret < 0) {
+        ret = try_fiemap(bs, start, end, &data, &hole, nb_sectors, pnum);
+        if (ret < 0) {
+            /* Assume everything is allocated. */
+            data = 0;
+            hole = start + nb_sectors * BDRV_SECTOR_SIZE;
+            ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
+        }
+    }
 
     if (data <= start) {
         /* On a data extent, compute sectors to the end of the extent.  */
-- 
1.9.2




reply via email to

[Prev in Thread] Current Thread [Next in Thread]