qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCHv2 RESEND] block/iscsi: speed up read for unalloc


From: Paolo Bonzini
Subject: Re: [Qemu-devel] [PATCHv2 RESEND] block/iscsi: speed up read for unallocated sectors
Date: Mon, 28 Apr 2014 15:22:11 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Thunderbird/24.4.0

Il 28/04/2014 13:11, Peter Lieven ha scritto:
diff --git a/block/iscsi.c b/block/iscsi.c
index b490e98..9f5b4a0 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -30,6 +30,8 @@
 #include "qemu-common.h"
 #include "qemu/config-file.h"
 #include "qemu/error-report.h"
+#include "qemu/bitops.h"
+#include "qemu/bitmap.h"
 #include "block/block_int.h"
 #include "trace.h"
 #include "block/scsi.h"
@@ -59,6 +61,8 @@ typedef struct IscsiLun {
     struct scsi_inquiry_logical_block_provisioning lbp;
     struct scsi_inquiry_block_limits bl;
     unsigned char *zeroblock;
+    unsigned long *allocationmap;
+    int cluster_sectors;
 } IscsiLun;

 typedef struct IscsiTask {
@@ -91,6 +95,7 @@ typedef struct IscsiAIOCB {
 #define NOP_INTERVAL 5000
 #define MAX_NOP_FAILURES 3
 #define ISCSI_CMD_RETRIES 5
+#define ISCSI_CHECKALLOC_THRES 63

One thing that crossed my mind. You have a threshold of 64 sectors, which is a nice power of two and usually smaller than the minimum "interesting" opt_unmap_gran (64K). So perhaps one of the following is true:

a) the threshold should be 128

b) the allocationmap should be allocated even for out-of-range opt_unmap_gran, using a granularity of 64 sectors in that case.

c) an opt_unmap_gran of 32K should be used by QEMU as a cluster size

d) the threshold should be dynamic and equal to iscsilun->cluster_sectors

e) c+d are both true

If you respin for any of the above changes, please change uses of ISCSI_CHECK_ALLOC_THRES to compare with >=. It would simplify the logic a bit, and the non-power-of-two makes people scratch their heads. Otherwise please post a patch to be applied on top. For now, I'm holding this patch.

Paolo

 static void
 iscsi_bh_cb(void *p)
@@ -273,6 +278,32 @@ static bool is_request_lun_aligned(int64_t sector_num, int 
nb_sectors,
     return 1;
 }

+static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
+                                    int nb_sectors)
+{
+    if (iscsilun->allocationmap == NULL) {
+        return;
+    }
+    bitmap_set(iscsilun->allocationmap,
+               sector_num / iscsilun->cluster_sectors,
+               DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors));
+}
+
+static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
+                                      int nb_sectors)
+{
+    int64_t cluster_num, nb_clusters;
+    if (iscsilun->allocationmap == NULL) {
+        return;
+    }
+    cluster_num = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
+    nb_clusters = (sector_num + nb_sectors) / iscsilun->cluster_sectors
+                  - cluster_num;
+    if (nb_clusters > 0) {
+        bitmap_clear(iscsilun->allocationmap, cluster_num, nb_clusters);
+    }
+}
+
 static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
                                         int64_t sector_num, int nb_sectors,
                                         QEMUIOVector *iov)
@@ -336,9 +367,127 @@ retry:
         return -EIO;
     }

+    iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
+
     return 0;
 }

+
+static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
+                                             int64_t sector_num, int 
nb_sectors)
+{
+    unsigned long size;
+    if (iscsilun->allocationmap == NULL) {
+        return true;
+    }
+    size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
+    return !(find_next_bit(iscsilun->allocationmap, size,
+                           sector_num / iscsilun->cluster_sectors) == size);
+}
+
+
+#if defined(LIBISCSI_FEATURE_IOVECTOR)
+
+static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
+                                                  int64_t sector_num,
+                                                  int nb_sectors, int *pnum)
+{
+    IscsiLun *iscsilun = bs->opaque;
+    struct scsi_get_lba_status *lbas = NULL;
+    struct scsi_lba_status_descriptor *lbasd = NULL;
+    struct IscsiTask iTask;
+    int64_t ret;
+
+    iscsi_co_init_iscsitask(iscsilun, &iTask);
+
+    if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    /* default to all sectors allocated */
+    ret = BDRV_BLOCK_DATA;
+    ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
+    *pnum = nb_sectors;
+
+    /* LUN does not support logical block provisioning */
+    if (iscsilun->lbpme == 0) {
+        goto out;
+    }
+
+retry:
+    if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
+                                  sector_qemu2lun(sector_num, iscsilun),
+                                  8 + 16, iscsi_co_generic_cb,
+                                  &iTask) == NULL) {
+        ret = -ENOMEM;
+        goto out;
+    }
+
+    while (!iTask.complete) {
+        iscsi_set_events(iscsilun);
+        qemu_coroutine_yield();
+    }
+
+    if (iTask.do_retry) {
+        if (iTask.task != NULL) {
+            scsi_free_scsi_task(iTask.task);
+            iTask.task = NULL;
+        }
+        iTask.complete = 0;
+        goto retry;
+    }
+
+    if (iTask.status != SCSI_STATUS_GOOD) {
+        /* in case the get_lba_status_callout fails (i.e.
+         * because the device is busy or the cmd is not
+         * supported) we pretend all blocks are allocated
+         * for backwards compatibility */
+        goto out;
+    }
+
+    lbas = scsi_datain_unmarshall(iTask.task);
+    if (lbas == NULL) {
+        ret = -EIO;
+        goto out;
+    }
+
+    lbasd = &lbas->descriptors[0];
+
+    if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
+        ret = -EIO;
+        goto out;
+    }
+
+    *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
+
+    if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
+        lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
+        ret &= ~BDRV_BLOCK_DATA;
+        if (iscsilun->lbprz) {
+            ret |= BDRV_BLOCK_ZERO;
+        }
+    }
+
+    if (ret & BDRV_BLOCK_ZERO) {
+        iscsi_allocationmap_clear(iscsilun, sector_num, *pnum);
+    } else {
+        iscsi_allocationmap_set(iscsilun, sector_num, *pnum);
+    }
+
+    if (*pnum > nb_sectors) {
+        *pnum = nb_sectors;
+    }
+out:
+    if (iTask.task != NULL) {
+        scsi_free_scsi_task(iTask.task);
+    }
+    return ret;
+}
+
+#endif /* LIBISCSI_FEATURE_IOVECTOR */
+
+
 static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
                                        int64_t sector_num, int nb_sectors,
                                        QEMUIOVector *iov)
@@ -355,6 +504,22 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState 
*bs,
         return -EINVAL;
     }

+#if defined(LIBISCSI_FEATURE_IOVECTOR)
+    if (iscsilun->lbprz && nb_sectors > ISCSI_CHECKALLOC_THRES &&
+        !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
+        int64_t ret;
+        int pnum;
+        ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum);
+        if (ret < 0) {
+            return ret;
+        }
+        if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors) {
+            qemu_iovec_memset(iov, 0, 0x00, iov->size);
+            return 0;
+        }
+    }
+#endif
+
     lba = sector_qemu2lun(sector_num, iscsilun);
     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);

@@ -639,101 +804,6 @@ iscsi_getlength(BlockDriverState *bs)
     return len;
 }

-#if defined(LIBISCSI_FEATURE_IOVECTOR)
-
-static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
-                                                  int64_t sector_num,
-                                                  int nb_sectors, int *pnum)
-{
-    IscsiLun *iscsilun = bs->opaque;
-    struct scsi_get_lba_status *lbas = NULL;
-    struct scsi_lba_status_descriptor *lbasd = NULL;
-    struct IscsiTask iTask;
-    int64_t ret;
-
-    iscsi_co_init_iscsitask(iscsilun, &iTask);
-
-    if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
-        ret = -EINVAL;
-        goto out;
-    }
-
-    /* default to all sectors allocated */
-    ret = BDRV_BLOCK_DATA;
-    ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
-    *pnum = nb_sectors;
-
-    /* LUN does not support logical block provisioning */
-    if (iscsilun->lbpme == 0) {
-        goto out;
-    }
-
-retry:
-    if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
-                                  sector_qemu2lun(sector_num, iscsilun),
-                                  8 + 16, iscsi_co_generic_cb,
-                                  &iTask) == NULL) {
-        ret = -ENOMEM;
-        goto out;
-    }
-
-    while (!iTask.complete) {
-        iscsi_set_events(iscsilun);
-        qemu_coroutine_yield();
-    }
-
-    if (iTask.do_retry) {
-        if (iTask.task != NULL) {
-            scsi_free_scsi_task(iTask.task);
-            iTask.task = NULL;
-        }
-        iTask.complete = 0;
-        goto retry;
-    }
-
-    if (iTask.status != SCSI_STATUS_GOOD) {
-        /* in case the get_lba_status_callout fails (i.e.
-         * because the device is busy or the cmd is not
-         * supported) we pretend all blocks are allocated
-         * for backwards compatibility */
-        goto out;
-    }
-
-    lbas = scsi_datain_unmarshall(iTask.task);
-    if (lbas == NULL) {
-        ret = -EIO;
-        goto out;
-    }
-
-    lbasd = &lbas->descriptors[0];
-
-    if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
-        ret = -EIO;
-        goto out;
-    }
-
-    *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
-    if (*pnum > nb_sectors) {
-        *pnum = nb_sectors;
-    }
-
-    if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
-        lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
-        ret &= ~BDRV_BLOCK_DATA;
-        if (iscsilun->lbprz) {
-            ret |= BDRV_BLOCK_ZERO;
-        }
-    }
-
-out:
-    if (iTask.task != NULL) {
-        scsi_free_scsi_task(iTask.task);
-    }
-    return ret;
-}
-
-#endif /* LIBISCSI_FEATURE_IOVECTOR */
-
 static int
 coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
                                    int nb_sectors)
@@ -787,6 +857,8 @@ retry:
         return -EIO;
     }

+    iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
+
     return 0;
 }

@@ -859,6 +931,12 @@ retry:
         return -EIO;
     }

+    if (flags & BDRV_REQ_MAY_UNMAP) {
+        iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
+    } else {
+        iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
+    }
+
     return 0;
 }

@@ -1288,6 +1366,22 @@ static int iscsi_open(BlockDriverState *bs, QDict 
*options, int flags,
     timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 
NOP_INTERVAL);
 #endif

+    /* Guess the internal cluster (page) size of the iscsi target by the means
+     * of opt_unmap_gran. Transfer the unmap granularity only if it has a
+     * reasonable size */
+    if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 64 * 1024 &&
+        iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 
1024) {
+        iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
+                                     iscsilun->block_size) >> BDRV_SECTOR_BITS;
+#if defined(LIBISCSI_FEATURE_IOVECTOR)
+        if (iscsilun->lbprz && !(bs->open_flags & BDRV_O_NOCACHE)) {
+            iscsilun->allocationmap =
+                bitmap_new(DIV_ROUND_UP(bs->total_sectors,
+                                        iscsilun->cluster_sectors));
+        }
+#endif
+    }
+
 out:
     qemu_opts_del(opts);
     if (initiator_name != NULL) {
@@ -1321,6 +1415,7 @@ static void iscsi_close(BlockDriverState *bs)
     qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL);
     iscsi_destroy_context(iscsi);
     g_free(iscsilun->zeroblock);
+    g_free(iscsilun->allocationmap);
     memset(iscsilun, 0, sizeof(IscsiLun));
 }

@@ -1379,6 +1474,13 @@ static int iscsi_truncate(BlockDriverState *bs, int64_t 
offset)
         return -EINVAL;
     }

+    if (iscsilun->allocationmap != NULL) {
+        g_free(iscsilun->allocationmap);
+        iscsilun->allocationmap =
+            bitmap_new(DIV_ROUND_UP(bs->total_sectors,
+                                    iscsilun->cluster_sectors));
+    }
+
     return 0;
 }

@@ -1441,13 +1543,7 @@ static int iscsi_get_info(BlockDriverState *bs, 
BlockDriverInfo *bdi)
     IscsiLun *iscsilun = bs->opaque;
     bdi->unallocated_blocks_are_zero = !!iscsilun->lbprz;
     bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
-    /* Guess the internal cluster (page) size of the iscsi target by the means
-     * of opt_unmap_gran. Transfer the unmap granularity only if it has a
-     * reasonable size for bdi->cluster_size */
-    if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 64 * 1024 &&
-        iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 
1024) {
-        bdi->cluster_size = iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
-    }
+    bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
     return 0;
 }






reply via email to

[Prev in Thread] Current Thread [Next in Thread]