[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 8/8] tcg/loongarch64: Reorg goto_tb implementation
From: |
Richard Henderson |
Subject: |
[PATCH 8/8] tcg/loongarch64: Reorg goto_tb implementation |
Date: |
Mon, 5 Dec 2022 22:40:51 -0600 |
The old implementation replaces two insns, swapping between
b <dest>
nop
and
pcaddu18i tmp, <dest>
jirl zero, tmp, <dest> & 0xffff
There is a race condition in which a thread could be stopped at
the jirl, i.e. with the top of the address loaded, and when
restarted we have re-linked to a different TB, so that the top
half no longer matches the bottom half.
Note that while we never directly re-link to a different TB, we
can link, unlink, and link again all while the stopped thread
remains stopped.
The new implementation replaces only one insn, swapping between
b <dest>
and
nop
falling through to a general-case indirect branch.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/loongarch64/tcg-target.h | 7 +---
tcg/loongarch64/tcg-target.c.inc | 67 ++++++++++++--------------------
2 files changed, 26 insertions(+), 48 deletions(-)
diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
index 624fbe87ff..81548fbb09 100644
--- a/tcg/loongarch64/tcg-target.h
+++ b/tcg/loongarch64/tcg-target.h
@@ -42,11 +42,8 @@
#define TCG_TARGET_INSN_UNIT_SIZE 4
#define TCG_TARGET_NB_REGS 32
-/*
- * PCADDU18I + JIRL sequence can give 20 + 16 + 2 = 38 bits
- * signed offset, which is +/- 128 GiB.
- */
-#define MAX_CODE_GEN_BUFFER_SIZE (128 * GiB)
+
+#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
typedef enum {
TCG_REG_ZERO,
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 47465b8c20..f8964699eb 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1150,37 +1150,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg
*args)
#endif
}
-/* LoongArch uses `andi zero, zero, 0` as NOP. */
-#define NOP OPC_ANDI
-static void tcg_out_nop(TCGContext *s)
-{
- tcg_out32(s, NOP);
-}
-
-void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
- uintptr_t jmp_rx, uintptr_t jmp_rw)
-{
- tcg_insn_unit i1, i2;
- ptrdiff_t upper, lower;
- uintptr_t addr = tb->jmp_target_addr[n];
- ptrdiff_t offset = (ptrdiff_t)(addr - jmp_rx) >> 2;
-
- if (offset == sextreg(offset, 0, 26)) {
- i1 = encode_sd10k16_insn(OPC_B, offset);
- i2 = NOP;
- } else {
- tcg_debug_assert(offset == sextreg(offset, 0, 36));
- lower = (int16_t)offset;
- upper = (offset - lower) >> 16;
-
- i1 = encode_dsj20_insn(OPC_PCADDU18I, TCG_REG_TMP0, upper);
- i2 = encode_djsk16_insn(OPC_JIRL, TCG_REG_ZERO, TCG_REG_TMP0, lower);
- }
- uint64_t pair = ((uint64_t)i2 << 32) | i1;
- qatomic_set((uint64_t *)jmp_rw, pair);
- flush_idcache_range(jmp_rx, jmp_rw, 8);
-}
-
/*
* Entry-points
*/
@@ -1200,23 +1169,35 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
static void tcg_out_goto_tb(TCGContext *s, int which)
{
- /*
- * Ensure that patch area is 8-byte aligned so that an
- * atomic write can be used to patch the target address.
- */
- if ((uintptr_t)s->code_ptr & 7) {
- tcg_out_nop(s);
- }
+ /* Direct branch will be patched by tb_target_set_jmp_target. */
set_jmp_insn_offset(s, which);
- /*
- * actual branch destination will be patched by
- * tb_target_set_jmp_target later
- */
- tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0);
+ tcg_out_opc_b(s, 0);
+
+ /* When branch is out of range, fall through to indirect. */
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
+ get_jmp_target_addr(s, which));
tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
set_jmp_reset_offset(s, which);
}
+void tb_target_set_jmp_target(const TranslationBlock *tb, int n,
+ uintptr_t jmp_rx, uintptr_t jmp_rw)
+{
+ uintptr_t addr = tb->jmp_target_addr[n];
+ ptrdiff_t offset = (ptrdiff_t)(addr - jmp_rx) >> 2;
+ tcg_insn_unit insn;
+
+ /* Either directly branch, or fall through to indirect branch. */
+ if (offset == sextreg(offset, 0, 26)) {
+ insn = encode_sd10k16_insn(OPC_B, offset);
+ } else {
+ /* LoongArch uses `andi zero, zero, 0` as NOP. */
+ insn = OPC_ANDI;
+ }
+ qatomic_set((tcg_insn_unit *)jmp_rw, insn);
+ flush_idcache_range(jmp_rx, jmp_rw, 4);
+}
+
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg args[TCG_MAX_OP_ARGS],
const int const_args[TCG_MAX_OP_ARGS])
--
2.34.1
- [PATCH 3/8] tcg/loongarch64: Update tcg-insn-defs.c.inc, (continued)
[PATCH 5/8] tcg/loongarch64: Improve setcond expansion, Richard Henderson, 2022/12/05
[PATCH 6/8] tcg/loongarch64: Implement movcond, Richard Henderson, 2022/12/05
[PATCH 7/8] tcg/loongarch64: Use tcg_pcrel_diff in tcg_out_ldst, Richard Henderson, 2022/12/05
[PATCH 8/8] tcg/loongarch64: Reorg goto_tb implementation,
Richard Henderson <=
Re: [PATCH 0/8] tcg/loongarch64: Reorg goto_tb and cleanups, Philippe Mathieu-Daudé, 2022/12/15