[Qemu-devel] [RFC v2 3/7] Add new TLB

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [RFC v2 3/7] Add new TLB_EXCL flag

From:	Alvise Rigo
Subject:	[Qemu-devel] [RFC v2 3/7] Add new TLB_EXCL flag
Date:	Mon, 15 Jun 2015 13:51:24 +0200

Add a new flag for the TLB entries to force all the accesses made to a
page to follow the slow-path.

In the case we remove a TLB entry marked as EXCL, we unset the
corresponding exclusive bit in the bitmap.

Mark the accessed page as dirty to invalidate any pending operation of
LL/SC only if a vCPU writes to the protected address.

Suggested-by: Jani Kokkonen <address@hidden>
Suggested-by: Claudio Fontana <address@hidden>
Signed-off-by: Alvise Rigo <address@hidden>
---
 cputlb.c                |  18 ++++-
 include/exec/cpu-all.h  |   2 +
 include/exec/cpu-defs.h |   4 ++
 softmmu_template.h      | 187 ++++++++++++++++++++++++++++++------------------
 4 files changed, 142 insertions(+), 69 deletions(-)

diff --git a/cputlb.c b/cputlb.c
index a506086..630c11c 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -299,6 +299,16 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong 
vaddr,
     env->tlb_v_table[mmu_idx][vidx] = *te;
     env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
 
+    if (te->addr_write & TLB_EXCL) {
+        /* We are removing an exclusive entry, if the corresponding exclusive
+         * bit is set, unset it. */
+        hwaddr hw_addr = (env->iotlb[mmu_idx][index].addr & TARGET_PAGE_MASK) +
+                                          (te->addr_write & TARGET_PAGE_MASK);
+        if (cpu_physical_memory_excl_is_dirty(hw_addr)) {
+            cpu_physical_memory_set_excl_dirty(hw_addr);
+        }
+    }
+
     /* refill the tlb */
     env->iotlb[mmu_idx][index].addr = iotlb - vaddr;
     env->iotlb[mmu_idx][index].attrs = attrs;
@@ -324,7 +334,13 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong 
vaddr,
                                                    + xlat)) {
             te->addr_write = address | TLB_NOTDIRTY;
         } else {
-            te->addr_write = address;
+            if (!(address & TLB_MMIO) &&
+                !cpu_physical_memory_excl_is_dirty(section->mr->ram_addr
+                                                   + xlat)) {
+                te->addr_write = address | TLB_EXCL;
+            } else {
+                te->addr_write = address;
+            }
         }
     } else {
         te->addr_write = -1;
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index ac06c67..632f6ce 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -311,6 +311,8 @@ extern RAMList ram_list;
 #define TLB_NOTDIRTY    (1 << 4)
 /* Set if TLB entry is an IO callback.  */
 #define TLB_MMIO        (1 << 5)
+/* Set if TLB entry refers a page that requires exclusive access.  */
+#define TLB_EXCL        (1 << 6)
 
 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf);
 void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf);
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index d5aecaf..c73a75f 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -165,5 +165,9 @@ typedef struct CPUIOTLBEntry {
 #define CPU_COMMON                                                      \
     /* soft mmu support */                                              \
     CPU_COMMON_TLB                                                      \
+                                                                        \
+    /* Used for atomic instruction translation. */                      \
+    bool ll_sc_context;                                                 \
+    hwaddr excl_protected_hwaddr;                                       \
 
 #endif
diff --git a/softmmu_template.h b/softmmu_template.h
index 39f571b..f1782f6 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -141,6 +141,21 @@
     vidx >= 0;                                                                \
 })
 
+#define lookup_cpus_ll_addr(addr)                                             \
+({                                                                            \
+    CPUState *cpu;                                                            \
+    bool hit = false;                                                         \
+                                                                              \
+    CPU_FOREACH(cpu) {                                                        \
+        if (cpu != current_cpu && env->excl_protected_hwaddr == addr) {       \
+            hit = true;                                                       \
+            break;                                                            \
+        }                                                                     \
+    }                                                                         \
+                                                                              \
+    hit;                                                                      \
+})
+
 #ifndef SOFTMMU_CODE_ACCESS
 static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
                                               CPUIOTLBEntry *iotlbentry,
@@ -409,43 +424,61 @@ void helper_le_st_name(CPUArchState *env, target_ulong 
addr, DATA_TYPE val,
         tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
     }
 
-    /* Handle an IO access.  */
+    /* Handle an IO access or exclusive access.  */
     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
-        CPUIOTLBEntry *iotlbentry;
-        if ((addr & (DATA_SIZE - 1)) != 0) {
-            goto do_unaligned_access;
-        }
-        iotlbentry = &env->iotlb[mmu_idx][index];
-
-        /* ??? Note that the io helpers always read data in the target
-           byte ordering.  We should push the LE/BE request down into io.  */
-        val = TGT_LE(val);
-        glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr);
-        return;
-    }
-
-    /* Handle slow unaligned access (it spans two pages or IO).  */
-    if (DATA_SIZE > 1
-        && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
-                     >= TARGET_PAGE_SIZE)) {
-        int i;
-    do_unaligned_access:
-        if ((get_memop(oi) & MO_AMASK) == MO_ALIGN) {
-            cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
-                                 mmu_idx, retaddr);
+        CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
+        if ((tlb_addr & ~TARGET_PAGE_MASK) == TLB_EXCL) {
+            /* The slow-path has been forced since we are writing to
+             * exclusive-protected memory. */
+            hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+
+            bool set_to_dirty;
+
+            /* Two cases of invalidation: the current vCPU is writing to 
another
+             * vCPU's exclusive address or the vCPU that issued the LoadLink is
+             * writing to it, but not through a StoreCond. */
+            set_to_dirty = lookup_cpus_ll_addr(hw_addr);
+            set_to_dirty |= env->ll_sc_context &&
+                           (env->excl_protected_hwaddr == hw_addr);
+
+            if (set_to_dirty) {
+                cpu_physical_memory_set_excl_dirty(hw_addr);
+            } /* the vCPU is legitimately writing to the protected address */
+        } else {
+            if ((addr & (DATA_SIZE - 1)) != 0) {
+                goto do_unaligned_access;
+            }
+
+            /* ??? Note that the io helpers always read data in the target
+               byte ordering.  We should push the LE/BE request down into io. 
*/
+            val = TGT_LE(val);
+            glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr);
+            return;
         }
-        /* XXX: not efficient, but simple */
-        /* Note: relies on the fact that tlb_fill() does not remove the
-         * previous page from the TLB cache.  */
-        for (i = DATA_SIZE - 1; i >= 0; i--) {
-            /* Little-endian extract.  */
-            uint8_t val8 = val >> (i * 8);
-            /* Note the adjustment at the beginning of the function.
-               Undo that for the recursion.  */
-            glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
-                                            oi, retaddr + GETPC_ADJ);
+    } else {
+        /* Handle slow unaligned access (it spans two pages or IO).  */
+        if (DATA_SIZE > 1
+            && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
+                         >= TARGET_PAGE_SIZE)) {
+            int i;
+        do_unaligned_access:
+            if ((get_memop(oi) & MO_AMASK) == MO_ALIGN) {
+                cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
+                                     mmu_idx, retaddr);
+            }
+            /* XXX: not efficient, but simple */
+            /* Note: relies on the fact that tlb_fill() does not remove the
+             * previous page from the TLB cache.  */
+            for (i = DATA_SIZE - 1; i >= 0; i--) {
+                /* Little-endian extract.  */
+                uint8_t val8 = val >> (i * 8);
+                /* Note the adjustment at the beginning of the function.
+                   Undo that for the recursion.  */
+                glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
+                                                oi, retaddr + GETPC_ADJ);
+            }
+            return;
         }
-        return;
     }
 
     /* Handle aligned access or unaligned access in the same page.  */
@@ -489,43 +522,61 @@ void helper_be_st_name(CPUArchState *env, target_ulong 
addr, DATA_TYPE val,
         tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
     }
 
-    /* Handle an IO access.  */
+    /* Handle an IO access or exclusive access.  */
     if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
-        CPUIOTLBEntry *iotlbentry;
-        if ((addr & (DATA_SIZE - 1)) != 0) {
-            goto do_unaligned_access;
-        }
-        iotlbentry = &env->iotlb[mmu_idx][index];
-
-        /* ??? Note that the io helpers always read data in the target
-           byte ordering.  We should push the LE/BE request down into io.  */
-        val = TGT_BE(val);
-        glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr);
-        return;
-    }
-
-    /* Handle slow unaligned access (it spans two pages or IO).  */
-    if (DATA_SIZE > 1
-        && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
-                     >= TARGET_PAGE_SIZE)) {
-        int i;
-    do_unaligned_access:
-        if ((get_memop(oi) & MO_AMASK) == MO_ALIGN) {
-            cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
-                                 mmu_idx, retaddr);
+        CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
+        if ((tlb_addr & ~TARGET_PAGE_MASK) == TLB_EXCL) {
+            /* The slow-path has been forced since we are writing to
+             * exclusive-protected memory. */
+            hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+
+            bool set_to_dirty;
+
+            /* Two cases of invalidation: the current vCPU is writing to 
another
+             * vCPU's exclusive address or the vCPU that issued the LoadLink is
+             * writing to it, but not through a StoreCond. */
+            set_to_dirty = lookup_cpus_ll_addr(hw_addr);
+            set_to_dirty |= env->ll_sc_context &&
+                           (env->excl_protected_hwaddr == hw_addr);
+
+            if (set_to_dirty) {
+                cpu_physical_memory_set_excl_dirty(hw_addr);
+            } /* the vCPU is legitimately writing to the protected address */
+        } else {
+            if ((addr & (DATA_SIZE - 1)) != 0) {
+                goto do_unaligned_access;
+            }
+
+            /* ??? Note that the io helpers always read data in the target
+               byte ordering.  We should push the LE/BE request down into io. 
*/
+            val = TGT_BE(val);
+            glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr);
+            return;
         }
-        /* XXX: not efficient, but simple */
-        /* Note: relies on the fact that tlb_fill() does not remove the
-         * previous page from the TLB cache.  */
-        for (i = DATA_SIZE - 1; i >= 0; i--) {
-            /* Big-endian extract.  */
-            uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8));
-            /* Note the adjustment at the beginning of the function.
-               Undo that for the recursion.  */
-            glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
-                                            oi, retaddr + GETPC_ADJ);
+    } else {
+        /* Handle slow unaligned access (it spans two pages or IO).  */
+        if (DATA_SIZE > 1
+            && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
+                         >= TARGET_PAGE_SIZE)) {
+            int i;
+        do_unaligned_access:
+            if ((get_memop(oi) & MO_AMASK) == MO_ALIGN) {
+                cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
+                                     mmu_idx, retaddr);
+            }
+            /* XXX: not efficient, but simple */
+            /* Note: relies on the fact that tlb_fill() does not remove the
+             * previous page from the TLB cache.  */
+            for (i = DATA_SIZE - 1; i >= 0; i--) {
+                /* Big-endian extract.  */
+                uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8));
+                /* Note the adjustment at the beginning of the function.
+                   Undo that for the recursion.  */
+                glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
+                                                oi, retaddr + GETPC_ADJ);
+            }
+            return;
         }
-        return;
     }
 
     /* Handle aligned access or unaligned access in the same page.  */
-- 
2.4.3

[Prev in Thread]

Current Thread

[Next in Thread]

[Qemu-devel] [RFC v2 0/7] Slow-path for atomic instruction translation, Alvise Rigo, 2015/06/15
- [Qemu-devel] [RFC v2 1/7] bitmap: Add bitmap_one_extend operation, Alvise Rigo, 2015/06/15
- [Qemu-devel] [RFC v2 3/7] Add new TLB_EXCL flag, Alvise Rigo <=
- [Qemu-devel] [RFC v2 2/7] exec: Add new exclusive bitmap to ram_list, Alvise Rigo, 2015/06/15
- [Qemu-devel] [RFC v2 4/7] softmmu: Add helpers for a new slow-path, Alvise Rigo, 2015/06/15
- [Qemu-devel] [RFC v2 5/7] tcg-op: create new TCG qemu_ldlink and qemu_stcond instructions, Alvise Rigo, 2015/06/15
- [Qemu-devel] [RFC v2 6/7] target-arm: translate: implement qemu_ldlink and qemu_stcond ops, Alvise Rigo, 2015/06/15
- [Qemu-devel] [RFC v2 7/7] target-i386: translate: implement qemu_ldlink and qemu_stcond ops, Alvise Rigo, 2015/06/15

Prev by Date: [Qemu-devel] [RFC v2 1/7] bitmap: Add bitmap_one_extend operation
Next by Date: [Qemu-devel] [RFC v2 2/7] exec: Add new exclusive bitmap to ram_list
Previous by thread: [Qemu-devel] [RFC v2 1/7] bitmap: Add bitmap_one_extend operation
Next by thread: [Qemu-devel] [RFC v2 2/7] exec: Add new exclusive bitmap to ram_list
Index(es):
- Date
- Thread