---
block/raw-posix.c | 182 +++++++++++++++++++++++++++++++++++++++---------------
1 file changed, 131 insertions(+), 51 deletions(-)
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 3ce026d..fd6bac6 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -123,6 +123,18 @@
#define MAX_BLOCKSIZE 4096
+/* In case there are multiple implementations for the same feature provided by
+ * the environment, this enumeration may be used to represent the status of
+ * these alternatives. */
+typedef enum ImplementationAlternativeStatus {
+ /* The status is (yet) unknown. */
+ IMPLSTAT_UNKNOWN = 0,
+ /* This implementation is known to return correct results. */
+ IMPLSTAT_WORKING,
+ /* This implementation is known not to return correct results. */
+ IMPLSTAT_SKIP,
+} ImplementationAlternativeStatus;
+
typedef struct BDRVRawState {
int fd;
int type;
@@ -146,6 +158,12 @@ typedef struct BDRVRawState {
bool has_discard:1;
bool has_write_zeroes:1;
bool discard_zeroes:1;
+#if defined SEEK_HOLE && defined SEEK_DATA
+ ImplementationAlternativeStatus seek_hole_status;
+#endif
+#ifdef CONFIG_FIEMAP
+ ImplementationAlternativeStatus fiemap_status;
+#endif
} BDRVRawState;
typedef struct BDRVRawReopenState {
@@ -1272,98 +1290,160 @@ static int raw_create(const char *filename,
QEMUOptionParameter *options,
return result;
}
-/*
- * Returns true iff the specified sector is present in the disk image. Drivers
- * not implementing the functionality are assumed to not support backing files,
- * hence all their sectors are reported as allocated.
- *
- * If 'sector_num' is beyond the end of the disk image the return value is 0
- * and 'pnum' is set to 0.
- *
- * 'pnum' is set to the number of sectors (including and immediately following
- * the specified sector) that are known to be in the same
- * allocated/unallocated state.
- *
- * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
- * beyond the end of the disk image it will be clamped.
- */
-static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors, int *pnum)
+static int64_t try_fiemap(BlockDriverState *bs, off_t start, off_t end,
+ off_t *data, off_t *hole, int nb_sectors, int *pnum)
{
- off_t start, data, hole;
- int64_t ret;
-
- ret = fd_open(bs);
- if (ret < 0) {
- return ret;
- }
-
- start = sector_num * BDRV_SECTOR_SIZE;
- ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
-
#ifdef CONFIG_FIEMAP
-
BDRVRawState *s = bs->opaque;
+ int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
struct {
struct fiemap fm;
struct fiemap_extent fe;
} f;
+ if (s->fiemap_status == IMPLSTAT_SKIP) {
+ return -ENOTSUP;
+ }
+
f.fm.fm_start = start;
f.fm.fm_length = (int64_t)nb_sectors * BDRV_SECTOR_SIZE;
f.fm.fm_flags = 0;
f.fm.fm_extent_count = 1;
f.fm.fm_reserved = 0;
if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) {
- /* Assume everything is allocated. */
- *pnum = nb_sectors;
- return ret;
+ s->fiemap_status = IMPLSTAT_SKIP;
+ return -errno;
+ }
+
+ if (s->fiemap_status == IMPLSTAT_UNKNOWN) {
+ if (f.fm.fm_extent_count == 1 &&
+ f.fe.fe_logical == 0 && f.fe.fe_length >= end)
+ {
+ /* FIEMAP returned a single extent spanning the entire file; maybe
+ * this was just a default response and therefore we cannot be sure
+ * whether it actually works; fall back to an alternative
+ * implementation. */
+ return -ENOTSUP;
+ } else {
+ s->fiemap_status = IMPLSTAT_WORKING;
+ }
}
if (f.fm.fm_mapped_extents == 0) {
/* No extents found, data is beyond f.fm.fm_start + f.fm.fm_length.
* f.fm.fm_start + f.fm.fm_length must be clamped to the file size!
*/
- off_t length = lseek(s->fd, 0, SEEK_END);
- hole = f.fm.fm_start;
- data = MIN(f.fm.fm_start + f.fm.fm_length, length);
+ *hole = f.fm.fm_start;
+ *data = MIN(f.fm.fm_start + f.fm.fm_length, end);
} else {
- data = f.fe.fe_logical;
- hole = f.fe.fe_logical + f.fe.fe_length;
+ *data = f.fe.fe_logical;
+ *hole = f.fe.fe_logical + f.fe.fe_length;
if (f.fe.fe_flags & FIEMAP_EXTENT_UNWRITTEN) {
ret |= BDRV_BLOCK_ZERO;
}
}
-#elif defined SEEK_HOLE && defined SEEK_DATA
+ return ret;
+#else
+ return -ENOTSUP;
+#endif
+}
+static int64_t try_seek_hole(BlockDriverState *bs, off_t start, off_t end,
+ off_t *data, off_t *hole, int *pnum)
+{
+#if defined SEEK_HOLE && defined SEEK_DATA
BDRVRawState *s = bs->opaque;
- hole = lseek(s->fd, start, SEEK_HOLE);
- if (hole == -1) {
+ if (s->seek_hole_status == IMPLSTAT_SKIP) {
+ return -ENOTSUP;
+ }
+
+ *hole = lseek(s->fd, start, SEEK_HOLE);
+ if (*hole == -1) {
/* -ENXIO indicates that sector_num was past the end of the file.
* There is a virtual hole there. */
assert(errno != -ENXIO);
- /* Most likely EINVAL. Assume everything is allocated. */
- *pnum = nb_sectors;
- return ret;
+ s->seek_hole_status = IMPLSTAT_SKIP;
+ return -errno;
}
- if (hole > start) {
- data = start;
+ if (s->seek_hole_status == IMPLSTAT_UNKNOWN) {
+ if (*hole >= end) {
+ /* lseek() returned the EOF, therefore it is unknown whether
+ * SEEK_HOLE actually works; fall back to an alternative
+ * implementation. */
+ return -ENOTSUP;
+ } else {
+ s->seek_hole_status = IMPLSTAT_WORKING;
+ }
+ }
+
+ if (*hole > start) {
+ *data = start;
} else {
/* On a hole. We need another syscall to find its end. */
- data = lseek(s->fd, start, SEEK_DATA);
- if (data == -1) {
- data = lseek(s->fd, 0, SEEK_END);
+ *data = lseek(s->fd, start, SEEK_DATA);
+ if (*data == -1) {
+ *data = lseek(s->fd, 0, SEEK_END);
}
}
+
+ return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
#else
- data = 0;
- hole = start + nb_sectors * BDRV_SECTOR_SIZE;
+ return -ENOTSUP;
#endif
+}
+
+/*
+ * Returns true iff the specified sector is present in the disk image. Drivers
+ * not implementing the functionality are assumed to not support backing files,
+ * hence all their sectors are reported as allocated.
+ *
+ * If 'sector_num' is beyond the end of the disk image the return value is 0
+ * and 'pnum' is set to 0.
+ *
+ * 'pnum' is set to the number of sectors (including and immediately following
+ * the specified sector) that are known to be in the same
+ * allocated/unallocated state.
+ *
+ * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
+ * beyond the end of the disk image it will be clamped.
+ */
+static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ off_t start, end, data = 0, hole = 0;
+ int64_t ret;
+
+ ret = fd_open(bs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ start = sector_num * BDRV_SECTOR_SIZE;
+ end = bdrv_getlength(bs);
+ if (end < 0) {
+ /* This function is used solely by "file" which does not have
+ * variable-length images; therefore, bdrv_getlength() should use
+ * bs->total_sectors for calculating the length and never return an
+ * error; if it does, it will be -ENOMEDIUM which should be returned
+ * to the caller. */
+ return end;
+ }
+
+ ret = try_seek_hole(bs, start, end, &data, &hole, pnum);
+ if (ret < 0) {
+ ret = try_fiemap(bs, start, end, &data, &hole, nb_sectors, pnum);
+ if (ret < 0) {
+ /* Assume everything is allocated. */
+ data = 0;
+ hole = start + nb_sectors * BDRV_SECTOR_SIZE;
+ ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
+ }
+ }
if (data <= start) {
/* On a data extent, compute sectors to the end of the extent. */