[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [RFC v2 3/7] Add new TLB_EXCL flag
From: |
Alvise Rigo |
Subject: |
[Qemu-devel] [RFC v2 3/7] Add new TLB_EXCL flag |
Date: |
Mon, 15 Jun 2015 13:51:24 +0200 |
Add a new flag for the TLB entries to force all the accesses made to a
page to follow the slow-path.
In the case we remove a TLB entry marked as EXCL, we unset the
corresponding exclusive bit in the bitmap.
Mark the accessed page as dirty to invalidate any pending operation of
LL/SC only if a vCPU writes to the protected address.
Suggested-by: Jani Kokkonen <address@hidden>
Suggested-by: Claudio Fontana <address@hidden>
Signed-off-by: Alvise Rigo <address@hidden>
---
cputlb.c | 18 ++++-
include/exec/cpu-all.h | 2 +
include/exec/cpu-defs.h | 4 ++
softmmu_template.h | 187 ++++++++++++++++++++++++++++++------------------
4 files changed, 142 insertions(+), 69 deletions(-)
diff --git a/cputlb.c b/cputlb.c
index a506086..630c11c 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -299,6 +299,16 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong
vaddr,
env->tlb_v_table[mmu_idx][vidx] = *te;
env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
+ if (te->addr_write & TLB_EXCL) {
+ /* We are removing an exclusive entry, if the corresponding exclusive
+ * bit is set, unset it. */
+ hwaddr hw_addr = (env->iotlb[mmu_idx][index].addr & TARGET_PAGE_MASK) +
+ (te->addr_write & TARGET_PAGE_MASK);
+ if (cpu_physical_memory_excl_is_dirty(hw_addr)) {
+ cpu_physical_memory_set_excl_dirty(hw_addr);
+ }
+ }
+
/* refill the tlb */
env->iotlb[mmu_idx][index].addr = iotlb - vaddr;
env->iotlb[mmu_idx][index].attrs = attrs;
@@ -324,7 +334,13 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong
vaddr,
+ xlat)) {
te->addr_write = address | TLB_NOTDIRTY;
} else {
- te->addr_write = address;
+ if (!(address & TLB_MMIO) &&
+ !cpu_physical_memory_excl_is_dirty(section->mr->ram_addr
+ + xlat)) {
+ te->addr_write = address | TLB_EXCL;
+ } else {
+ te->addr_write = address;
+ }
}
} else {
te->addr_write = -1;
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index ac06c67..632f6ce 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -311,6 +311,8 @@ extern RAMList ram_list;
#define TLB_NOTDIRTY (1 << 4)
/* Set if TLB entry is an IO callback. */
#define TLB_MMIO (1 << 5)
+/* Set if TLB entry refers a page that requires exclusive access. */
+#define TLB_EXCL (1 << 6)
void dump_exec_info(FILE *f, fprintf_function cpu_fprintf);
void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf);
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index d5aecaf..c73a75f 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -165,5 +165,9 @@ typedef struct CPUIOTLBEntry {
#define CPU_COMMON \
/* soft mmu support */ \
CPU_COMMON_TLB \
+ \
+ /* Used for atomic instruction translation. */ \
+ bool ll_sc_context; \
+ hwaddr excl_protected_hwaddr; \
#endif
diff --git a/softmmu_template.h b/softmmu_template.h
index 39f571b..f1782f6 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -141,6 +141,21 @@
vidx >= 0; \
})
+#define lookup_cpus_ll_addr(addr) \
+({ \
+ CPUState *cpu; \
+ bool hit = false; \
+ \
+ CPU_FOREACH(cpu) { \
+ if (cpu != current_cpu && env->excl_protected_hwaddr == addr) { \
+ hit = true; \
+ break; \
+ } \
+ } \
+ \
+ hit; \
+})
+
#ifndef SOFTMMU_CODE_ACCESS
static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
CPUIOTLBEntry *iotlbentry,
@@ -409,43 +424,61 @@ void helper_le_st_name(CPUArchState *env, target_ulong
addr, DATA_TYPE val,
tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
}
- /* Handle an IO access. */
+ /* Handle an IO access or exclusive access. */
if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
- CPUIOTLBEntry *iotlbentry;
- if ((addr & (DATA_SIZE - 1)) != 0) {
- goto do_unaligned_access;
- }
- iotlbentry = &env->iotlb[mmu_idx][index];
-
- /* ??? Note that the io helpers always read data in the target
- byte ordering. We should push the LE/BE request down into io. */
- val = TGT_LE(val);
- glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr);
- return;
- }
-
- /* Handle slow unaligned access (it spans two pages or IO). */
- if (DATA_SIZE > 1
- && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
- >= TARGET_PAGE_SIZE)) {
- int i;
- do_unaligned_access:
- if ((get_memop(oi) & MO_AMASK) == MO_ALIGN) {
- cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
- mmu_idx, retaddr);
+ CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
+ if ((tlb_addr & ~TARGET_PAGE_MASK) == TLB_EXCL) {
+ /* The slow-path has been forced since we are writing to
+ * exclusive-protected memory. */
+ hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+
+ bool set_to_dirty;
+
+ /* Two cases of invalidation: the current vCPU is writing to
another
+ * vCPU's exclusive address or the vCPU that issued the LoadLink is
+ * writing to it, but not through a StoreCond. */
+ set_to_dirty = lookup_cpus_ll_addr(hw_addr);
+ set_to_dirty |= env->ll_sc_context &&
+ (env->excl_protected_hwaddr == hw_addr);
+
+ if (set_to_dirty) {
+ cpu_physical_memory_set_excl_dirty(hw_addr);
+ } /* the vCPU is legitimately writing to the protected address */
+ } else {
+ if ((addr & (DATA_SIZE - 1)) != 0) {
+ goto do_unaligned_access;
+ }
+
+ /* ??? Note that the io helpers always read data in the target
+ byte ordering. We should push the LE/BE request down into io.
*/
+ val = TGT_LE(val);
+ glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr);
+ return;
}
- /* XXX: not efficient, but simple */
- /* Note: relies on the fact that tlb_fill() does not remove the
- * previous page from the TLB cache. */
- for (i = DATA_SIZE - 1; i >= 0; i--) {
- /* Little-endian extract. */
- uint8_t val8 = val >> (i * 8);
- /* Note the adjustment at the beginning of the function.
- Undo that for the recursion. */
- glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
- oi, retaddr + GETPC_ADJ);
+ } else {
+ /* Handle slow unaligned access (it spans two pages or IO). */
+ if (DATA_SIZE > 1
+ && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
+ >= TARGET_PAGE_SIZE)) {
+ int i;
+ do_unaligned_access:
+ if ((get_memop(oi) & MO_AMASK) == MO_ALIGN) {
+ cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
+ mmu_idx, retaddr);
+ }
+ /* XXX: not efficient, but simple */
+ /* Note: relies on the fact that tlb_fill() does not remove the
+ * previous page from the TLB cache. */
+ for (i = DATA_SIZE - 1; i >= 0; i--) {
+ /* Little-endian extract. */
+ uint8_t val8 = val >> (i * 8);
+ /* Note the adjustment at the beginning of the function.
+ Undo that for the recursion. */
+ glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
+ oi, retaddr + GETPC_ADJ);
+ }
+ return;
}
- return;
}
/* Handle aligned access or unaligned access in the same page. */
@@ -489,43 +522,61 @@ void helper_be_st_name(CPUArchState *env, target_ulong
addr, DATA_TYPE val,
tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
}
- /* Handle an IO access. */
+ /* Handle an IO access or exclusive access. */
if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
- CPUIOTLBEntry *iotlbentry;
- if ((addr & (DATA_SIZE - 1)) != 0) {
- goto do_unaligned_access;
- }
- iotlbentry = &env->iotlb[mmu_idx][index];
-
- /* ??? Note that the io helpers always read data in the target
- byte ordering. We should push the LE/BE request down into io. */
- val = TGT_BE(val);
- glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr);
- return;
- }
-
- /* Handle slow unaligned access (it spans two pages or IO). */
- if (DATA_SIZE > 1
- && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
- >= TARGET_PAGE_SIZE)) {
- int i;
- do_unaligned_access:
- if ((get_memop(oi) & MO_AMASK) == MO_ALIGN) {
- cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
- mmu_idx, retaddr);
+ CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
+ if ((tlb_addr & ~TARGET_PAGE_MASK) == TLB_EXCL) {
+ /* The slow-path has been forced since we are writing to
+ * exclusive-protected memory. */
+ hwaddr hw_addr = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
+
+ bool set_to_dirty;
+
+ /* Two cases of invalidation: the current vCPU is writing to
another
+ * vCPU's exclusive address or the vCPU that issued the LoadLink is
+ * writing to it, but not through a StoreCond. */
+ set_to_dirty = lookup_cpus_ll_addr(hw_addr);
+ set_to_dirty |= env->ll_sc_context &&
+ (env->excl_protected_hwaddr == hw_addr);
+
+ if (set_to_dirty) {
+ cpu_physical_memory_set_excl_dirty(hw_addr);
+ } /* the vCPU is legitimately writing to the protected address */
+ } else {
+ if ((addr & (DATA_SIZE - 1)) != 0) {
+ goto do_unaligned_access;
+ }
+
+ /* ??? Note that the io helpers always read data in the target
+ byte ordering. We should push the LE/BE request down into io.
*/
+ val = TGT_BE(val);
+ glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr);
+ return;
}
- /* XXX: not efficient, but simple */
- /* Note: relies on the fact that tlb_fill() does not remove the
- * previous page from the TLB cache. */
- for (i = DATA_SIZE - 1; i >= 0; i--) {
- /* Big-endian extract. */
- uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8));
- /* Note the adjustment at the beginning of the function.
- Undo that for the recursion. */
- glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
- oi, retaddr + GETPC_ADJ);
+ } else {
+ /* Handle slow unaligned access (it spans two pages or IO). */
+ if (DATA_SIZE > 1
+ && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
+ >= TARGET_PAGE_SIZE)) {
+ int i;
+ do_unaligned_access:
+ if ((get_memop(oi) & MO_AMASK) == MO_ALIGN) {
+ cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
+ mmu_idx, retaddr);
+ }
+ /* XXX: not efficient, but simple */
+ /* Note: relies on the fact that tlb_fill() does not remove the
+ * previous page from the TLB cache. */
+ for (i = DATA_SIZE - 1; i >= 0; i--) {
+ /* Big-endian extract. */
+ uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8));
+ /* Note the adjustment at the beginning of the function.
+ Undo that for the recursion. */
+ glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
+ oi, retaddr + GETPC_ADJ);
+ }
+ return;
}
- return;
}
/* Handle aligned access or unaligned access in the same page. */
--
2.4.3
- [Qemu-devel] [RFC v2 0/7] Slow-path for atomic instruction translation, Alvise Rigo, 2015/06/15
- [Qemu-devel] [RFC v2 1/7] bitmap: Add bitmap_one_extend operation, Alvise Rigo, 2015/06/15
- [Qemu-devel] [RFC v2 3/7] Add new TLB_EXCL flag,
Alvise Rigo <=
- [Qemu-devel] [RFC v2 2/7] exec: Add new exclusive bitmap to ram_list, Alvise Rigo, 2015/06/15
- [Qemu-devel] [RFC v2 4/7] softmmu: Add helpers for a new slow-path, Alvise Rigo, 2015/06/15
- [Qemu-devel] [RFC v2 5/7] tcg-op: create new TCG qemu_ldlink and qemu_stcond instructions, Alvise Rigo, 2015/06/15
- [Qemu-devel] [RFC v2 6/7] target-arm: translate: implement qemu_ldlink and qemu_stcond ops, Alvise Rigo, 2015/06/15
- [Qemu-devel] [RFC v2 7/7] target-i386: translate: implement qemu_ldlink and qemu_stcond ops, Alvise Rigo, 2015/06/15