[Qemu-block] [PATCH RFC v2 09/22] block/pcache: separation AIOCB on requ

qemu-block

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-block] [PATCH RFC v2 09/22] block/pcache: separation AIOCB on requ

From:	Pavel Butsykin
Subject:	[Qemu-block] [PATCH RFC v2 09/22] block/pcache: separation AIOCB on requests
Date:	Mon, 29 Aug 2016 20:10:08 +0300

for case when the cache partially covers request we are part of the request
is filled from the cache, and the other part request from disk. Also add
reference counting for nodes, as way to maintain multithreading.

There is still no full synchronization in multithreaded mode.

Signed-off-by: Pavel Butsykin <address@hidden>
---
 block/pcache.c | 169 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 155 insertions(+), 14 deletions(-)

diff --git a/block/pcache.c b/block/pcache.c
index 28bd056..6114289 100644
--- a/block/pcache.c
+++ b/block/pcache.c
@@ -58,7 +58,10 @@ typedef struct BlockNode {
 typedef struct PCNode {
     BlockNode cm;
 
+    uint32_t                 status;
+    uint32_t                 ref;
     uint8_t                  *data;
+    CoMutex                  lock;
 } PCNode;
 
 typedef struct ReqStor {
@@ -95,9 +98,23 @@ typedef struct PrefCacheAIOCB {
     uint64_t sector_num;
     uint32_t nb_sectors;
     int      aio_type;
+    struct {
+        QTAILQ_HEAD(req_head, PrefCachePartReq) list;
+        CoMutex lock;
+    } requests;
     int      ret;
 } PrefCacheAIOCB;
 
+typedef struct PrefCachePartReq {
+    uint64_t sector_num;
+    uint32_t nb_sectors;
+
+    QEMUIOVector qiov;
+    PCNode *node;
+    PrefCacheAIOCB *acb;
+    QTAILQ_ENTRY(PrefCachePartReq) entry;
+} PrefCachePartReq;
+
 static const AIOCBInfo pcache_aiocb_info = {
     .aiocb_size = sizeof(PrefCacheAIOCB),
 };
@@ -126,8 +143,39 @@ static QemuOptsList runtime_opts = {
 #define MB_BITS 20
 #define PCACHE_DEFAULT_CACHE_SIZE (4 << MB_BITS)
 
+enum {
+    NODE_SUCCESS_STATUS = 0,
+    NODE_WAIT_STATUS    = 1,
+    NODE_REMOVE_STATUS  = 2,
+    NODE_GHOST_STATUS   = 3 /* only for debugging */
+};
+
 #define PCNODE(_n) ((PCNode *)(_n))
 
+static inline void pcache_node_unref(PCNode *node)
+{
+    assert(node->status == NODE_SUCCESS_STATUS ||
+           node->status == NODE_REMOVE_STATUS);
+
+    if (atomic_fetch_dec(&node->ref) == 0) {
+        assert(node->status == NODE_REMOVE_STATUS);
+
+        node->status = NODE_GHOST_STATUS;
+        g_free(node->data);
+        g_slice_free1(sizeof(*node), node);
+    }
+}
+
+static inline PCNode *pcache_node_ref(PCNode *node)
+{
+    assert(node->status == NODE_SUCCESS_STATUS ||
+           node->status == NODE_WAIT_STATUS);
+    assert(atomic_read(&node->ref) == 0);/* XXX: only for sequential requests 
*/
+    atomic_inc(&node->ref);
+
+    return node;
+}
+
 static int pcache_key_cmp(const RbNodeKey *key1, const RbNodeKey *key2)
 {
     assert(key1 != NULL);
@@ -184,13 +232,7 @@ static void *node_insert(struct RbRoot *root, BlockNode 
*node)
 
 static inline PCNode *pcache_node_insert(struct RbRoot *root, PCNode *node)
 {
-    return node_insert(root, &node->cm);
-}
-
-static inline void pcache_node_free(PCNode *node)
-{
-    g_free(node->data);
-    g_slice_free1(sizeof(*node), node);
+    return pcache_node_ref(node_insert(root, &node->cm));
 }
 
 static inline void *pcache_node_alloc(RbNodeKey* key)
@@ -199,6 +241,9 @@ static inline void *pcache_node_alloc(RbNodeKey* key)
 
     node->cm.sector_num = key->num;
     node->cm.nb_sectors = key->size;
+    node->ref = 0;
+    node->status = NODE_WAIT_STATUS;
+    qemu_co_mutex_init(&node->lock);
     node->data = g_malloc(node->cm.nb_sectors << BDRV_SECTOR_BITS);
 
     return node;
@@ -206,6 +251,12 @@ static inline void *pcache_node_alloc(RbNodeKey* key)
 
 static void pcache_node_drop(BDRVPCacheState *s, PCNode *node)
 {
+    uint32_t prev_status = atomic_xchg(&node->status, NODE_REMOVE_STATUS);
+    if (prev_status == NODE_REMOVE_STATUS) {
+        return;
+    }
+    assert(prev_status != NODE_GHOST_STATUS);
+
     atomic_sub(&s->pcache.curr_size, node->cm.nb_sectors);
 
     qemu_co_mutex_lock(&s->pcache.lru.lock);
@@ -216,7 +267,7 @@ static void pcache_node_drop(BDRVPCacheState *s, PCNode 
*node)
     rb_erase(&node->cm.rb_node, &s->pcache.tree.root);
     qemu_co_mutex_unlock(&s->pcache.tree.lock);
 
-    pcache_node_free(node);
+    pcache_node_unref(node);
 }
 
 static void pcache_try_shrink(BDRVPCacheState *s)
@@ -234,6 +285,30 @@ static void pcache_try_shrink(BDRVPCacheState *s)
     }
 }
 
+static PrefCachePartReq *pcache_req_get(PrefCacheAIOCB *acb, PCNode *node)
+{
+    PrefCachePartReq *req = g_slice_alloc(sizeof(*req));
+
+    req->nb_sectors = node->cm.nb_sectors;
+    req->sector_num = node->cm.sector_num;
+    req->node = node;
+    req->acb = acb;
+
+    assert(acb->sector_num <= node->cm.sector_num + node->cm.nb_sectors);
+
+    qemu_iovec_init(&req->qiov, 1);
+    qemu_iovec_add(&req->qiov, node->data,
+                   node->cm.nb_sectors << BDRV_SECTOR_BITS);
+    return req;
+}
+
+static inline void push_node_request(PrefCacheAIOCB *acb, PCNode *node)
+{
+    PrefCachePartReq *req = pcache_req_get(acb, node);
+
+    QTAILQ_INSERT_HEAD(&acb->requests.list, req, entry);
+}
+
 static inline void pcache_lru_node_up(BDRVPCacheState *s, PCNode *node)
 {
     qemu_co_mutex_lock(&s->pcache.lru.lock);
@@ -253,16 +328,17 @@ static bool pcache_node_find_and_create(PrefCacheAIOCB 
*acb, RbNodeKey *key,
     found = pcache_node_insert(&s->pcache.tree.root, new_node);
     qemu_co_mutex_unlock(&s->pcache.tree.lock);
     if (found != new_node) {
-        pcache_node_free(new_node);
-        pcache_lru_node_up(s, found);
+        g_free(new_node->data);
+        g_slice_free1(sizeof(*new_node), new_node);
+        if (found->status == NODE_SUCCESS_STATUS) {
+            pcache_lru_node_up(s, found);
+        }
         *out_node = found;
         return false;
     }
     atomic_add(&s->pcache.curr_size, new_node->cm.nb_sectors);
 
-    qemu_co_mutex_lock(&s->pcache.lru.lock);
-    QTAILQ_INSERT_HEAD(&s->pcache.lru.list, &new_node->cm, entry);
-    qemu_co_mutex_unlock(&s->pcache.lru.lock);
+    push_node_request(acb, new_node);
 
     pcache_try_shrink(s);
 
@@ -291,6 +367,7 @@ static void pcache_pickup_parts_of_cache(PrefCacheAIOCB 
*acb, PCNode *node,
             up_size = lc_key.size;
 
             if (!pcache_node_find_and_create(acb, &lc_key, &new_node)) {
+                pcache_node_unref(node);
                 node = new_node;
                 continue;
             }
@@ -300,6 +377,8 @@ static void pcache_pickup_parts_of_cache(PrefCacheAIOCB 
*acb, PCNode *node,
         /* XXX: node read */
         up_size = MIN(node->cm.sector_num + node->cm.nb_sectors - num, size);
 
+        pcache_node_unref(node);
+
         size -= up_size;
         num += up_size;
         if (size != 0) {
@@ -336,6 +415,8 @@ static int32_t pcache_prefetch(PrefCacheAIOCB *acb)
         node->cm.sector_num + node->cm.nb_sectors >= acb->sector_num +
                                                      acb->nb_sectors)
     {
+        /* XXX: node read */
+        pcache_node_unref(node);
         return PREFETCH_FULL_UP;
     }
     pcache_pickup_parts_of_cache(acb, node, key.num, key.size);
@@ -343,10 +424,56 @@ static int32_t pcache_prefetch(PrefCacheAIOCB *acb)
     return PREFETCH_PART_UP;
 }
 
+static void pcache_node_submit(PrefCachePartReq *req)
+{
+    PCNode *node = req->node;
+    BDRVPCacheState *s = req->acb->s;
+
+    assert(node != NULL);
+    assert(atomic_read(&node->ref) != 0);
+    assert(node->data != NULL);
+
+    qemu_co_mutex_lock(&node->lock);
+    if (node->status == NODE_WAIT_STATUS) {
+        qemu_co_mutex_lock(&s->pcache.lru.lock);
+        QTAILQ_INSERT_HEAD(&s->pcache.lru.list, &node->cm, entry);
+        qemu_co_mutex_unlock(&s->pcache.lru.lock);
+
+        node->status = NODE_SUCCESS_STATUS;
+    }
+    qemu_co_mutex_unlock(&node->lock);
+}
+
+static void pcache_merge_requests(PrefCacheAIOCB *acb)
+{
+    PrefCachePartReq *req, *next;
+
+    qemu_co_mutex_lock(&acb->requests.lock);
+    QTAILQ_FOREACH_SAFE(req, &acb->requests.list, entry, next) {
+        QTAILQ_REMOVE(&acb->requests.list, req, entry);
+
+        assert(req != NULL);
+        assert(req->node->status == NODE_WAIT_STATUS);
+
+        pcache_node_submit(req);
+
+        /* XXX: pcache read */
+
+        pcache_node_unref(req->node);
+
+        g_slice_free1(sizeof(*req), req);
+    }
+    qemu_co_mutex_unlock(&acb->requests.lock);
+}
+
 static void pcache_aio_cb(void *opaque, int ret)
 {
     PrefCacheAIOCB *acb = opaque;
 
+    if (acb->aio_type & QEMU_AIO_READ) {
+        pcache_merge_requests(acb);
+    }
+
     acb->common.cb(acb->common.opaque, ret);
 
     qemu_aio_unref(acb);
@@ -366,6 +493,9 @@ static PrefCacheAIOCB *pcache_aio_get(BlockDriverState *bs, 
int64_t sector_num,
     acb->aio_type = type;
     acb->ret = 0;
 
+    QTAILQ_INIT(&acb->requests.list);
+    qemu_co_mutex_init(&acb->requests.lock);
+
     return acb;
 }
 
@@ -445,6 +575,17 @@ fail:
     return ret;
 }
 
+static void pcache_node_check_and_free(BDRVPCacheState *s, PCNode *node)
+{
+    assert(node->status == NODE_SUCCESS_STATUS);
+    assert(node->ref == 0);
+
+    node->status = NODE_REMOVE_STATUS;
+    rb_erase(&node->cm.rb_node, &s->pcache.tree.root);
+    g_free(node->data);
+    g_slice_free1(sizeof(*node), node);
+}
+
 static void pcache_close(BlockDriverState *bs)
 {
     uint32_t cnt = 0;
@@ -452,7 +593,7 @@ static void pcache_close(BlockDriverState *bs)
     BlockNode *node, *next;
     QTAILQ_FOREACH_SAFE(node, &s->pcache.lru.list, entry, next) {
         QTAILQ_REMOVE(&s->pcache.lru.list, node, entry);
-        pcache_node_free(PCNODE(node));
+        pcache_node_check_and_free(s, PCNODE(node));
         cnt++;
     }
     DPRINTF("used %d nodes\n", cnt);
-- 
2.8.3

[Prev in Thread]

Current Thread

[Next in Thread]

[Qemu-block] [PATCH RFC v2 00/22] I/O prefetch cache, Pavel Butsykin, 2016/08/29
- [Qemu-block] [PATCH RFC v2 15/22] block/pcache: simple readahead one chunk forward, Pavel Butsykin, 2016/08/29
- [Qemu-block] [PATCH RFC v2 06/22] block/pcache: restrict cache size, Pavel Butsykin, 2016/08/29
- [Qemu-block] [PATCH RFC v2 16/22] block/pcache: pcache readahead node around, Pavel Butsykin, 2016/08/29
- [Qemu-block] [PATCH RFC v2 08/22] block/pcache: implement pickup parts of the cache, Pavel Butsykin, 2016/08/29
- [Qemu-block] [PATCH RFC v2 18/22] block/pcache: add pcache skip large aio read, Pavel Butsykin, 2016/08/29
- [Qemu-block] [PATCH RFC v2 09/22] block/pcache: separation AIOCB on requests, Pavel Butsykin <=
- [Qemu-block] [PATCH RFC v2 07/22] block/pcache: introduce LRU as method of memory, Pavel Butsykin, 2016/08/29
- [Qemu-block] [PATCH RFC v2 20/22] block/pcache: implement pcache error handling of aio cb, Pavel Butsykin, 2016/08/29
- [Qemu-block] [PATCH RFC v2 10/22] block/pcache: add check node leak, Pavel Butsykin, 2016/08/29
- [Qemu-block] [PATCH RFC v2 22/22] block/pcache: drop used pcache node, Pavel Butsykin, 2016/08/29
- [Qemu-block] [PATCH RFC v2 02/22] block/pcache: add own AIOCB block, Pavel Butsykin, 2016/08/29
- [Qemu-block] [PATCH RFC v2 14/22] block/pcache: add support for rescheduling requests, Pavel Butsykin, 2016/08/29
- [Qemu-block] [PATCH RFC v2 05/22] block/pcache: add aio requests into cache, Pavel Butsykin, 2016/08/29
- [Qemu-block] [PATCH RFC v2 04/22] block/pcache: add pcache debug build, Pavel Butsykin, 2016/08/29
- [Qemu-block] [PATCH RFC v2 17/22] block/pcache: skip readahead for non-sequential requests, Pavel Butsykin, 2016/08/29
- [Qemu-block] [PATCH RFC v2 12/22] block/pcache: implement read cache to qiov and drop node during aio write, Pavel Butsykin, 2016/08/29

Prev by Date: [Qemu-block] [PATCH RFC v2 18/22] block/pcache: add pcache skip large aio read
Next by Date: [Qemu-block] [PATCH RFC v2 07/22] block/pcache: introduce LRU as method of memory
Previous by thread: [Qemu-block] [PATCH RFC v2 18/22] block/pcache: add pcache skip large aio read
Next by thread: [Qemu-block] [PATCH RFC v2 07/22] block/pcache: introduce LRU as method of memory
Index(es):
- Date
- Thread