[Qemu-block] [PATCH RFC 17/22] block/pcache: skip readahead for non-sequ

qemu-block

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-block] [PATCH RFC 17/22] block/pcache: skip readahead for non-sequ

From:	Pavel Butsykin
Subject:	[Qemu-block] [PATCH RFC 17/22] block/pcache: skip readahead for non-sequential requests
Date:	Thu, 25 Aug 2016 16:44:16 +0300

When randomly reading data will be a lot of readahead, resulting in a loss of
productivity. In order to avoid added checking the requests line before
making the readahead. It also makes no sense to cache new requests,
because a cache hit on this data is very unlikely.

Signed-off-by: Pavel Butsykin <address@hidden>
---
 block/pcache.c | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 138 insertions(+), 3 deletions(-)

diff --git a/block/pcache.c b/block/pcache.c
index ae7ac8d..7a317fc 100644
--- a/block/pcache.c
+++ b/block/pcache.c
@@ -73,6 +73,10 @@ typedef struct PCNode {
     CoMutex                  lock;
 } PCNode;
 
+typedef struct LRNode {
+    BlockNode cm;
+} LRNode;
+
 typedef struct ReqStor {
     struct {
         struct RbRoot root;
@@ -91,10 +95,12 @@ typedef struct BDRVPCacheState {
     BlockDriverState **bs;
 
     ReqStor pcache;
+    ReqStor lreq;
 
     struct {
         uint32_t cache_size;
         uint32_t readahead_size;
+        uint32_t lreq_pool_size;
     } cfg;
 
 #ifdef PCACHE_DEBUG
@@ -166,6 +172,7 @@ static QemuOptsList runtime_opts = {
 #define MB_BITS 20
 #define PCACHE_DEFAULT_CACHE_SIZE (4 << MB_BITS)
 #define PCACHE_DEFAULT_READAHEAD_SIZE (128 << KB_BITS)
+#define PCACHE_DEFAULT_POOL_STAT_SIZE (1 << MB_BITS)
 
 enum {
     NODE_SUCCESS_STATUS = 0,
@@ -181,6 +188,7 @@ enum {
 };
 
 #define PCNODE(_n) ((PCNode *)(_n))
+#define LRNODE(_n) ((LRNode *)(_n))
 
 static inline void pcache_node_unref(BDRVPCacheState *s, PCNode *node)
 {
@@ -262,6 +270,11 @@ static PCNode *pcache_node_search(struct RbRoot *root, 
RbNodeKey *key)
     return node == NULL ? NULL : pcache_node_ref(node);
 }
 
+static inline LRNode *lreq_node_search(struct RbRoot *root, RbNodeKey *key)
+{
+    return node_search(root, key);
+}
+
 static void *node_insert(struct RbRoot *root, BlockNode *node)
 {
     struct RbNode **new = &(root->rb_node), *parent = NULL;
@@ -288,6 +301,11 @@ static inline PCNode *pcache_node_insert(struct RbRoot 
*root, PCNode *node)
     return pcache_node_ref(node_insert(root, &node->cm));
 }
 
+static inline LRNode *lreq_node_insert(struct RbRoot *root, LRNode *node)
+{
+    return node_insert(root, &node->cm);
+}
+
 static inline void *pcache_node_alloc(RbNodeKey* key)
 {
     PCNode *node = g_slice_alloc(sizeof(*node));
@@ -364,6 +382,34 @@ static void pcache_try_shrink(BDRVPCacheState *s)
     }
 }
 
+static void lreq_try_shrink(BDRVPCacheState *s)
+{
+    while (s->lreq.curr_size > s->cfg.lreq_pool_size) {
+        LRNode *rmv_node;
+            /* XXX: need to filter large requests */
+        if (QTAILQ_EMPTY(&s->lreq.lru.list)) {
+            DPRINTF("lru lreq list is empty, but curr_size: %d\n",
+                    s->lreq.curr_size);
+            break;
+        }
+
+        qemu_co_mutex_lock(&s->lreq.lru.lock);
+        rmv_node = LRNODE(QTAILQ_LAST(&s->lreq.lru.list, lru_head));
+        qemu_co_mutex_unlock(&s->lreq.lru.lock);
+
+        atomic_sub(&s->lreq.curr_size, rmv_node->cm.nb_sectors);
+
+        qemu_co_mutex_lock(&s->lreq.lru.lock);
+        QTAILQ_REMOVE(&s->lreq.lru.list, &rmv_node->cm, entry);
+        qemu_co_mutex_unlock(&s->lreq.lru.lock);
+
+        qemu_co_mutex_lock(&s->lreq.tree.lock);
+        rb_erase(&rmv_node->cm.rb_node, &s->lreq.tree.root);
+        qemu_co_mutex_unlock(&s->lreq.tree.lock);
+        g_slice_free1(sizeof(*rmv_node), rmv_node);
+    }
+}
+
 static PrefCachePartReq *pcache_req_get(PrefCacheAIOCB *acb, PCNode *node)
 {
     PrefCachePartReq *req = g_slice_alloc(sizeof(*req));
@@ -437,6 +483,34 @@ static inline PCNode *pcache_node_add(PrefCacheAIOCB *acb, 
RbNodeKey *key)
     return node;
 }
 
+static LRNode *lreq_node_add(PrefCacheAIOCB *acb, RbNodeKey *key)
+{
+    BDRVPCacheState *s = acb->s;
+    LRNode *new_node = g_slice_alloc(sizeof(*new_node));
+    LRNode *found;
+
+    new_node->cm.sector_num = key->num;
+    new_node->cm.nb_sectors = key->size;
+
+    qemu_co_mutex_lock(&s->lreq.tree.lock);
+    found = lreq_node_insert(&s->lreq.tree.root, new_node);
+    qemu_co_mutex_unlock(&s->lreq.tree.lock);
+    if (found != new_node) {
+        g_slice_free1(sizeof(*new_node), new_node);
+        return NULL;
+    }
+
+    atomic_add(&s->lreq.curr_size, new_node->cm.nb_sectors);
+
+    lreq_try_shrink(s);
+
+    qemu_co_mutex_lock(&s->lreq.lru.lock);
+    QTAILQ_INSERT_HEAD(&s->lreq.lru.list, &new_node->cm, entry);
+    qemu_co_mutex_unlock(&s->lreq.lru.lock);
+
+    return new_node;
+}
+
 static uint64_t ranges_overlap_size(uint64_t node1, uint32_t size1,
                                     uint64_t node2, uint32_t size2)
 {
@@ -552,13 +626,24 @@ enum {
 
 static int32_t pcache_prefetch(PrefCacheAIOCB *acb)
 {
+    BDRVPCacheState *s = acb->s;
     RbNodeKey key;
-    PCNode *node = NULL;
+    PCNode *node;
 
     prefetch_init_key(acb, &key);
-    if (pcache_node_find_and_create(acb, &key, &node)) {
+
+    /* add request statistics */
+    lreq_node_add(acb, &key);
+
+    qemu_co_mutex_lock(&s->pcache.tree.lock); /* XXX: use get_next_node */
+    node = pcache_node_search(&s->pcache.tree.root, &key);
+    qemu_co_mutex_unlock(&s->pcache.tree.lock);
+    if (node == NULL) {
         return PREFETCH_NEW_NODE;
     }
+    if (node->status == NODE_SUCCESS_STATUS) {
+        pcache_lru_node_up(s, node);
+    }
 
     /* Node covers the whole request */
     if (node->cm.sector_num <= acb->sector_num &&
@@ -795,6 +880,30 @@ static bool check_allocated_blocks(BlockDriverState *bs, 
int64_t sector_num,
     return true;
 }
 
+static bool check_lreq_sequence(BDRVPCacheState *s, uint64_t sector_num)
+{
+    RbNodeKey key;
+    LRNode *node;
+    uint32_t cache_line_sz = s->cfg.readahead_size;
+
+    if (sector_num <= cache_line_sz) {
+        return false;
+    }
+            /* check is a previous cache block */
+    key.num = sector_num - cache_line_sz;
+    key.size = cache_line_sz;
+
+    qemu_co_mutex_lock(&s->lreq.tree.lock);
+    node = lreq_node_search(&s->lreq.tree.root, &key);
+    qemu_co_mutex_unlock(&s->lreq.tree.lock);
+    if (node == NULL) { /* requests isn't consistent,
+                         * most likely there is no sense to make readahead.
+                         */
+        return false;
+    }
+    return node->cm.sector_num > key.num ? false : true;
+}
+
 static void pcache_readahead_request(BlockDriverState *bs, PrefCacheAIOCB *acb)
 {
     BDRVPCacheState *s = acb->s;
@@ -803,6 +912,9 @@ static void pcache_readahead_request(BlockDriverState *bs, 
PrefCacheAIOCB *acb)
     uint64_t total_sectors = bdrv_nb_sectors(bs);
     PCNode *node = NULL;
 
+    if (!check_lreq_sequence(acb->s, acb->sector_num)) {
+        return;
+    }
     prefetch_init_key(acb, &key);
 
     key.num = key.num + key.size;
@@ -841,7 +953,13 @@ static BlockAIOCB *pcache_aio_readv(BlockDriverState *bs,
     PrefCacheAIOCB *acb = pcache_aio_get(bs, sector_num, qiov, nb_sectors, cb,
                                          opaque, PCACHE_AIO_READ);
     int32_t status = pcache_prefetch(acb);
-    if (status == PREFETCH_FULL_UP) {
+    if (status == PREFETCH_NEW_NODE) {
+        BlockAIOCB *ret = bdrv_aio_readv(bs->file, sector_num, qiov, 
nb_sectors,
+                                         cb, opaque);
+        pcache_readahead_request(bs, acb);
+        qemu_aio_unref(acb); /* XXX: fix superfluous alloc */
+        return ret;
+    } else if (status == PREFETCH_FULL_UP) {
         assert(acb->requests.cnt == 0);
         complete_aio_request(acb);
     } else {
@@ -885,8 +1003,15 @@ static void pcache_state_init(QemuOpts *opts, 
BDRVPCacheState *s)
     qemu_co_mutex_init(&s->pcache.lru.lock);
     s->pcache.curr_size = 0;
 
+    s->lreq.tree.root = RB_ROOT;
+    qemu_co_mutex_init(&s->lreq.tree.lock);
+    QTAILQ_INIT(&s->lreq.lru.list);
+    qemu_co_mutex_init(&s->lreq.lru.lock);
+    s->lreq.curr_size = 0;
+
     s->cfg.cache_size = cache_size >> BDRV_SECTOR_BITS;
     s->cfg.readahead_size = readahead_size >> BDRV_SECTOR_BITS;
+    s->cfg.lreq_pool_size = PCACHE_DEFAULT_POOL_STAT_SIZE >> BDRV_SECTOR_BITS;
 
 #ifdef PCACHE_DEBUG
     QTAILQ_INIT(&s->death_node_list);
@@ -946,6 +1071,16 @@ static void pcache_close(BlockDriverState *bs)
     }
     DPRINTF("used %d nodes\n", cnt);
 
+    cnt = 0;
+    if (!QTAILQ_EMPTY(&s->lreq.lru.list)) {
+        QTAILQ_FOREACH_SAFE(node, &s->lreq.lru.list, entry, next) {
+            QTAILQ_REMOVE(&s->lreq.lru.list, node, entry);
+            g_slice_free1(sizeof(*node), node);
+            cnt++;
+        }
+    }
+    DPRINTF("used %d lreq nodes\n", cnt);
+
 #ifdef PCACHE_DEBUG
     if (!QTAILQ_EMPTY(&s->death_node_list)) {
         cnt = 0;
-- 
2.8.3

[Prev in Thread]

Current Thread

[Next in Thread]

[Qemu-block] [PATCH RFC 01/22] block/pcache: empty pcache driver filter, (continued)
- [Qemu-block] [PATCH RFC 01/22] block/pcache: empty pcache driver filter, Pavel Butsykin, 2016/08/25
- [Qemu-block] [PATCH RFC 04/22] block/pcache: add pcache debug build, Pavel Butsykin, 2016/08/25
- [Qemu-block] [PATCH RFC 02/22] block/pcache: add own AIOCB block, Pavel Butsykin, 2016/08/25
- [Qemu-block] [PATCH RFC 07/22] block/pcache: introduce LRU as method of memory, Pavel Butsykin, 2016/08/25
- [Qemu-block] [PATCH RFC 10/22] block/pcache: add check node leak, Pavel Butsykin, 2016/08/25
- [Qemu-block] [PATCH RFC 08/22] block/pcache: implement pickup parts of the cache, Pavel Butsykin, 2016/08/25
- [Qemu-block] [PATCH RFC 11/22] add QEMU style defines for __sync_add_and_fetch, Pavel Butsykin, 2016/08/25
- [Qemu-block] [PATCH RFC 12/22] block/pcache: implement read cache to qiov and drop node during aio write, Pavel Butsykin, 2016/08/25
- [Qemu-block] [PATCH RFC 15/22] block/pcache: simple readahead one chunk forward, Pavel Butsykin, 2016/08/25
- [Qemu-block] [PATCH RFC 22/22] block/pcache: drop used pcache node, Pavel Butsykin, 2016/08/25
- [Qemu-block] [PATCH RFC 17/22] block/pcache: skip readahead for non-sequential requests, Pavel Butsykin <=
- [Qemu-block] [PATCH RFC 16/22] block/pcache: pcache readahead node around, Pavel Butsykin, 2016/08/25
- [Qemu-block] [PATCH RFC 20/22] block/pcache: implement pcache error handling of aio cb, Pavel Butsykin, 2016/08/25
- [Qemu-block] [PATCH RFC 21/22] block/pcache: add write through node, Pavel Butsykin, 2016/08/25
- [Qemu-block] [PATCH RFC 05/22] block/pcache: add aio requests into cache, Pavel Butsykin, 2016/08/25
- [Qemu-block] [PATCH RFC 03/22] util/rbtree: add rbtree from linux kernel, Pavel Butsykin, 2016/08/25
- [Qemu-block] [PATCH RFC 18/22] block/pcache: add pcache skip large aio read, Pavel Butsykin, 2016/08/25
- [Qemu-block] [PATCH RFC 14/22] block/pcache: add support for rescheduling requests, Pavel Butsykin, 2016/08/25
- [Qemu-block] [PATCH RFC 13/22] block/pcache: add generic request complete, Pavel Butsykin, 2016/08/25
- [Qemu-block] [PATCH RFC 19/22] block/pcache: add pcache node assert, Pavel Butsykin, 2016/08/25
- [Qemu-block] [PATCH RFC 09/22] block/pcache: separation AIOCB on requests, Pavel Butsykin, 2016/08/25

Prev by Date: [Qemu-block] [PATCH RFC 22/22] block/pcache: drop used pcache node
Next by Date: [Qemu-block] [PATCH RFC 16/22] block/pcache: pcache readahead node around
Previous by thread: [Qemu-block] [PATCH RFC 22/22] block/pcache: drop used pcache node
Next by thread: [Qemu-block] [PATCH RFC 16/22] block/pcache: pcache readahead node around
Index(es):
- Date
- Thread