qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [RFC PATCH 1/2] mem: make phys_section and phys_map_nodes p


From: Liu Ping Fan
Subject: [Qemu-devel] [RFC PATCH 1/2] mem: make phys_section and phys_map_nodes prepared for RCU
Date: Mon, 13 May 2013 11:21:03 +0800

From: Liu Ping Fan <address@hidden>

Now, each AddressSpaceDispatch has its own radix-tree, and all of them
lie on phys_section[] and phys_map_nodes[]. When we want lockless
mmio dispatch, we need something like RCU.

Acheive this with PhysPageTable which contains all of the info for all
radix trees. After all address space listeners update (ie. excluding the
readers) we switch from PhysPageTable *cur_pgtbl to *next_pgtbl.
(The real RCU style is adopt by listener, see next patch)

Signed-off-by: Liu Ping Fan <address@hidden>
---
 exec.c                |  197 +++++++++++++++++++++++++------------------------
 include/exec/memory.h |    2 +
 memory.c              |    2 +
 3 files changed, 106 insertions(+), 95 deletions(-)

diff --git a/exec.c b/exec.c
index c5f8082..bb4e540 100644
--- a/exec.c
+++ b/exec.c
@@ -80,23 +80,34 @@ int use_icount;
 #if !defined(CONFIG_USER_ONLY)
 
 #define SUBSECTION_IDX(addr)      ((addr) & ~TARGET_PAGE_MASK)
-#define PHYS_SECTION_ID(psection) ((psection) - phys_sections)
+#define PHYS_SECTION_ID(psection, base) ((psection) - base->phys_sections)
 
 typedef struct PhysSection {
     MemoryRegionSection section;
     uint16_t *sub_section;
 } PhysSection;
 
-static PhysSection *phys_sections;
-static unsigned phys_sections_nb, phys_sections_nb_alloc;
-static uint16_t phys_section_unassigned;
-static uint16_t phys_section_notdirty;
-static uint16_t phys_section_rom;
-static uint16_t phys_section_watch;
+typedef PhysPageEntry Node[L2_SIZE];
 
-/* Simple allocator for PhysPageEntry nodes */
-static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
-static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
+typedef struct PhysPageTable PhysPageTable;
+
+struct PhysPageTable {
+    int ref;
+    PhysSection *phys_sections;
+    unsigned phys_sections_nb;
+    unsigned phys_sections_nb_alloc;
+    uint16_t phys_section_unassigned;
+    uint16_t phys_section_notdirty;
+    uint16_t phys_section_rom;
+    uint16_t phys_section_watch;
+
+    Node *phys_map_nodes;
+    unsigned phys_map_nodes_nb;
+    unsigned phys_map_nodes_nb_alloc;
+};
+
+static PhysPageTable *cur_pgtbl;
+static PhysPageTable *next_pgtbl;
 
 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
 
@@ -111,13 +122,13 @@ static MemoryRegion io_mem_watch;
 
 static void phys_map_node_reserve(unsigned nodes)
 {
-    if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
+    if (next_pgtbl->phys_map_nodes_nb + nodes > 
next_pgtbl->phys_map_nodes_nb_alloc) {
         typedef PhysPageEntry Node[L2_SIZE];
-        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
-        phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
-                                      phys_map_nodes_nb + nodes);
-        phys_map_nodes = g_renew(Node, phys_map_nodes,
-                                 phys_map_nodes_nb_alloc);
+        next_pgtbl->phys_map_nodes_nb_alloc = 
MAX(next_pgtbl->phys_map_nodes_nb_alloc * 2, 16);
+        next_pgtbl->phys_map_nodes_nb_alloc = 
MAX(next_pgtbl->phys_map_nodes_nb_alloc,
+                                      next_pgtbl->phys_map_nodes_nb + nodes);
+        next_pgtbl->phys_map_nodes = g_renew(Node, next_pgtbl->phys_map_nodes,
+                                 next_pgtbl->phys_map_nodes_nb_alloc);
     }
 }
 
@@ -126,22 +137,16 @@ static uint16_t phys_map_node_alloc(void)
     unsigned i;
     uint16_t ret;
 
-    ret = phys_map_nodes_nb++;
+    ret = next_pgtbl->phys_map_nodes_nb++;
     assert(ret != PHYS_MAP_NODE_NIL);
-    assert(ret != phys_map_nodes_nb_alloc);
+    assert(ret != next_pgtbl->phys_map_nodes_nb_alloc);
     for (i = 0; i < L2_SIZE; ++i) {
-        phys_map_nodes[ret][i].is_leaf = 0;
-        phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
+        next_pgtbl->phys_map_nodes[ret][i].is_leaf = 0;
+        next_pgtbl->phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
     }
     return ret;
 }
 
-static void phys_map_nodes_reset(void)
-{
-    phys_map_nodes_nb = 0;
-}
-
-
 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
                                 hwaddr *nb, uint16_t leaf,
                                 int level)
@@ -152,15 +157,15 @@ static void phys_page_set_level(PhysPageEntry *lp, hwaddr 
*index,
 
     if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
         lp->ptr = phys_map_node_alloc();
-        p = phys_map_nodes[lp->ptr];
+        p = next_pgtbl->phys_map_nodes[lp->ptr];
         if (level == 0) {
             for (i = 0; i < L2_SIZE; i++) {
                 p[i].is_leaf = 1;
-                p[i].ptr = phys_section_unassigned;
+                p[i].ptr = next_pgtbl->phys_section_unassigned;
             }
         }
     } else {
-        p = phys_map_nodes[lp->ptr];
+        p = next_pgtbl->phys_map_nodes[lp->ptr];
     }
     lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
 
@@ -192,11 +197,13 @@ static PhysSection 
*phys_section_find(AddressSpaceDispatch *d,
 {
     PhysPageEntry lp = d->phys_map;
     PhysPageEntry *p;
+    PhysSection *phys_sections = cur_pgtbl->phys_sections;
+    Node *phys_map_nodes = cur_pgtbl->phys_map_nodes;
     int i;
 
     for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
         if (lp.ptr == PHYS_MAP_NODE_NIL) {
-            return &phys_sections[phys_section_unassigned];
+            return &phys_sections[cur_pgtbl->phys_section_unassigned];
         }
         p = phys_map_nodes[lp.ptr];
         lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
@@ -209,6 +216,7 @@ static MemoryRegionSection 
*address_space_lookup_region(AddressSpace *as,
 {
     PhysSection *psection;
     uint16_t idx;
+    PhysSection *phys_sections = cur_pgtbl->phys_sections;
 
     psection = phys_section_find(as->dispatch, addr >> TARGET_PAGE_BITS);
     if (psection->sub_section) {
@@ -246,7 +254,7 @@ MemoryRegionSection *address_space_translate(AddressSpace 
*as, hwaddr addr,
                 | (addr & iotlb.addr_mask));
         len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
         if (!iotlb.perm[is_write]) {
-            section = &phys_sections[phys_section_unassigned].section;
+            section = 
&cur_pgtbl->phys_sections[cur_pgtbl->phys_section_unassigned].section;
             break;
         }
 
@@ -690,12 +698,12 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
             + xlat;
         if (!section->readonly) {
-            iotlb |= phys_section_notdirty;
+            iotlb |= cur_pgtbl->phys_section_notdirty;
         } else {
-            iotlb |= phys_section_rom;
+            iotlb |= cur_pgtbl->phys_section_rom;
         }
     } else {
-        iotlb = PHYS_SECTION_ID(container_of(section, PhysSection, section));
+        iotlb = PHYS_SECTION_ID(container_of(section, PhysSection, section), 
cur_pgtbl);
         iotlb += xlat;
     }
 
@@ -705,7 +713,7 @@ hwaddr memory_region_section_get_iotlb(CPUArchState *env,
         if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
             /* Avoid trapping reads of pages with a write breakpoint. */
             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
-                iotlb = phys_section_watch + paddr;
+                iotlb = cur_pgtbl->phys_section_watch + paddr;
                 *address |= TLB_MMIO;
                 break;
             }
@@ -721,59 +729,40 @@ static int subsection_register(PhysSection *psection, 
uint32_t start,
                                uint32_t end, uint16_t section);
 static void subsections_init(PhysSection *psection);
 
-static void destroy_page_desc(uint16_t section_index)
+/* Call after all listener has been commit.
+  * we do not walk over tree, just simply drop.
+  */
+static void destroy_pagetable(PhysPageTable *pgtbl)
 {
-    g_free(phys_sections[section_index].sub_section);
-}
-
-static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
-{
-    unsigned i;
-    PhysPageEntry *p;
+    int i;
 
-    if (lp->ptr == PHYS_MAP_NODE_NIL) {
-        return;
-    }
+    g_free(pgtbl->phys_map_nodes);
 
-    p = phys_map_nodes[lp->ptr];
-    for (i = 0; i < L2_SIZE; ++i) {
-        if (!p[i].is_leaf) {
-            destroy_l2_mapping(&p[i], level - 1);
+    for (i = 0; i < pgtbl->phys_sections_nb_alloc; i++) {
+        if (pgtbl->phys_sections[i].sub_section) {
+            g_free(pgtbl->phys_sections[i].sub_section);
         } else {
-            destroy_page_desc(p[i].ptr);
+            memory_region_unref(pgtbl->phys_sections[i].section.mr);
         }
     }
-    lp->is_leaf = 0;
-    lp->ptr = PHYS_MAP_NODE_NIL;
-}
+    g_free(pgtbl->phys_sections);
 
-static void destroy_all_mappings(AddressSpaceDispatch *d)
-{
-    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
-    phys_map_nodes_reset();
+    g_free(pgtbl);
 }
 
-static uint16_t phys_section_add(MemoryRegionSection *section)
+static uint16_t phys_section_add(MemoryRegionSection *section, PhysPageTable 
*pgtbl)
 {
-    assert(phys_sections_nb < TARGET_PAGE_SIZE);
+    assert(pgtbl->phys_sections_nb < TARGET_PAGE_SIZE);
 
-    if (phys_sections_nb == phys_sections_nb_alloc) {
-        phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
-        phys_sections = g_renew(PhysSection, phys_sections,
-                                phys_sections_nb_alloc);
+    if (pgtbl->phys_sections_nb == pgtbl->phys_sections_nb_alloc) {
+        pgtbl->phys_sections_nb_alloc = MAX(pgtbl->phys_sections_nb_alloc * 2, 
16);
+        pgtbl->phys_sections = g_renew(PhysSection, pgtbl->phys_sections,
+                                pgtbl->phys_sections_nb_alloc);
     }
-    phys_sections[phys_sections_nb].section = *section;
-    phys_sections[phys_sections_nb].sub_section = NULL;
+    pgtbl->phys_sections[pgtbl->phys_sections_nb].section = *section;
+    pgtbl->phys_sections[pgtbl->phys_sections_nb].sub_section = NULL;
     memory_region_ref(section->mr);
-    return phys_sections_nb++;
-}
-
-static void phys_sections_clear(void)
-{
-    while (phys_sections_nb > 0) {
-        PhysSection *phys_section = &phys_sections[--phys_sections_nb];
-        memory_region_unref(phys_section->section.mr);
-    }
+    return pgtbl->phys_sections_nb++;
 }
 
 static void register_subsection(AddressSpaceDispatch *d,
@@ -793,18 +782,18 @@ static void register_subsection(AddressSpaceDispatch *d,
            psection->section.mr == &io_mem_unassigned);
 
     if (!psection->sub_section) {
-        new_section = phys_section_add(&subsection);
-        psection = &phys_sections[new_section];
+        new_section = phys_section_add(&subsection, next_pgtbl);
+        psection = &next_pgtbl->phys_sections[new_section];
         subsections_init(psection);
         phys_page_set(d, base >> TARGET_PAGE_BITS, 1, new_section);
     } else {
-        new_section = PHYS_SECTION_ID(psection);
+        new_section = PHYS_SECTION_ID(psection, next_pgtbl);
     }
 
-    new_subsection = phys_section_add(section);
+    new_subsection = phys_section_add(section, next_pgtbl);
 
     /* phys_section_add invalidates psection, reload it  */
-    psection = &phys_sections[new_section];
+    psection = &next_pgtbl->phys_sections[new_section];
     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
     end = start + section->size - 1;
     subsection_register(psection, start, end, new_subsection);
@@ -816,7 +805,7 @@ static void register_multipage(AddressSpaceDispatch *d, 
MemoryRegionSection *sec
     hwaddr start_addr = section->offset_within_address_space;
     ram_addr_t size = section->size;
     hwaddr addr;
-    uint16_t section_index = phys_section_add(section);
+    uint16_t section_index = phys_section_add(section, next_pgtbl);
 
     assert(size);
 
@@ -1653,7 +1642,7 @@ static void subsections_init(PhysSection *psection)
 {
     psection->sub_section = g_malloc0(sizeof(uint16_t) * TARGET_PAGE_SIZE);
     subsection_register(psection, 0, TARGET_PAGE_SIZE-1,
-                        phys_section_unassigned);
+                        next_pgtbl->phys_section_unassigned);
 }
 
 static uint16_t dummy_section(MemoryRegion *mr)
@@ -1665,12 +1654,12 @@ static uint16_t dummy_section(MemoryRegion *mr)
         .size = UINT64_MAX,
     };
 
-    return phys_section_add(&section);
+    return phys_section_add(&section, next_pgtbl);
 }
 
 MemoryRegion *iotlb_to_region(hwaddr index)
 {
-    return phys_sections[index & ~TARGET_PAGE_MASK].section.mr;
+    return cur_pgtbl->phys_sections[index & ~TARGET_PAGE_MASK].section.mr;
 }
 
 static void io_mem_init(void)
@@ -1685,21 +1674,40 @@ static void io_mem_init(void)
                           "watch", UINT64_MAX);
 }
 
+void global_begin(void)
+{
+    next_pgtbl = g_new0(PhysPageTable, 1);
+    next_pgtbl->ref = 1;
+    next_pgtbl->phys_section_unassigned = dummy_section(&io_mem_unassigned);
+    next_pgtbl->phys_section_notdirty = dummy_section(&io_mem_notdirty);
+    next_pgtbl->phys_section_rom = dummy_section(&io_mem_rom);
+    next_pgtbl->phys_section_watch = dummy_section(&io_mem_watch);
+}
+
+/* other listeners finished */
+void global_commit(void)
+{
+    PhysPageTable *t = cur_pgtbl;
+
+    cur_pgtbl = next_pgtbl;
+    /* Fix me,  currently, we rely on each address space listener agaist its
+      * reader. So when we come here, no readers will touch old phys_map_node.
+      * After rcu, should changed to call_rcu()
+      */
+    if (__sync_sub_and_fetch(&t->ref, 1) == 0) {
+        destroy_pagetable(t);
+    }
+}
+
 static void mem_begin(MemoryListener *listener)
 {
     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, 
listener);
 
-    destroy_all_mappings(d);
     d->phys_map.ptr = PHYS_MAP_NODE_NIL;
 }
 
 static void core_begin(MemoryListener *listener)
 {
-    phys_sections_clear();
-    phys_section_unassigned = dummy_section(&io_mem_unassigned);
-    phys_section_notdirty = dummy_section(&io_mem_notdirty);
-    phys_section_rom = dummy_section(&io_mem_rom);
-    phys_section_watch = dummy_section(&io_mem_watch);
 }
 
 static void tcg_commit(MemoryListener *listener)
@@ -1779,7 +1787,6 @@ void address_space_destroy_dispatch(AddressSpace *as)
     AddressSpaceDispatch *d = as->dispatch;
 
     memory_listener_unregister(&d->listener);
-    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
     g_free(d);
     as->dispatch = NULL;
 }
@@ -2386,7 +2393,7 @@ void stl_phys_notdirty(hwaddr addr, uint32_t val)
 
     if (!memory_region_is_ram(section->mr) || section->readonly) {
         if (memory_region_is_ram(section->mr)) {
-            section = &phys_sections[phys_section_rom].section;
+            section = 
&cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section;
         }
         io_mem_write(section->mr, addr, val, 4);
     } else {
@@ -2422,7 +2429,7 @@ void stq_phys_notdirty(hwaddr addr, uint64_t val)
 
     if (!memory_region_is_ram(section->mr) || section->readonly) {
         if (memory_region_is_ram(section->mr)) {
-            section = &phys_sections[phys_section_rom].section;
+            section = 
&cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section;
         }
 #ifdef TARGET_WORDS_BIGENDIAN
         io_mem_write(section->mr, addr, val >> 32, 4);
@@ -2455,7 +2462,7 @@ static inline void stl_phys_internal(hwaddr addr, 
uint32_t val,
 
     if (!memory_region_is_ram(section->mr) || section->readonly) {
         if (memory_region_is_ram(section->mr)) {
-            section = &phys_sections[phys_section_rom].section;
+            section = 
&cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section;
         }
 #if defined(TARGET_WORDS_BIGENDIAN)
         if (endian == DEVICE_LITTLE_ENDIAN) {
@@ -2526,7 +2533,7 @@ static inline void stw_phys_internal(hwaddr addr, 
uint32_t val,
 
     if (!memory_region_is_ram(section->mr) || section->readonly) {
         if (memory_region_is_ram(section->mr)) {
-            section = &phys_sections[phys_section_rom].section;
+            section = 
&cur_pgtbl->phys_sections[cur_pgtbl->phys_section_rom].section;
         }
 #if defined(TARGET_WORDS_BIGENDIAN)
         if (endian == DEVICE_LITTLE_ENDIAN) {
diff --git a/include/exec/memory.h b/include/exec/memory.h
index b97ace7..cc654fa 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -992,6 +992,8 @@ void *address_space_map(AddressSpace *as, hwaddr addr,
 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
                          int is_write, hwaddr access_len);
 
+void global_begin(void);
+void global_commit(void);
 
 #endif
 
diff --git a/memory.c b/memory.c
index 1a86607..da06dfd 100644
--- a/memory.c
+++ b/memory.c
@@ -805,6 +805,7 @@ void memory_region_transaction_commit(void)
     --memory_region_transaction_depth;
     if (!memory_region_transaction_depth && memory_region_update_pending) {
         memory_region_update_pending = false;
+        global_begin();
         MEMORY_LISTENER_CALL_GLOBAL(begin, Forward);
 
         QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
@@ -812,6 +813,7 @@ void memory_region_transaction_commit(void)
         }
 
         MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
+        global_commit();
     }
 }
 
-- 
1.7.4.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]