[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[RFC PATCH v2 3/6] target/riscv: Inline vext_ldst_us and corresponding f
From: |
Max Chou |
Subject: |
[RFC PATCH v2 3/6] target/riscv: Inline vext_ldst_us and corresponding function for performance |
Date: |
Sat, 1 Jun 2024 01:44:50 +0800 |
In the vector unit-stride load/store helper functions. the vext_ldst_us
function corresponding most of the execution time. Inline the functions
can avoid the function call overhead to improve the helper function
performance.
Signed-off-by: Max Chou <max.chou@sifive.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/riscv/vector_helper.c | 30 ++++++++++++++++--------------
1 file changed, 16 insertions(+), 14 deletions(-)
diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c
index 440c33c141b..cb7267c3217 100644
--- a/target/riscv/vector_helper.c
+++ b/target/riscv/vector_helper.c
@@ -149,25 +149,27 @@ static inline void vext_set_elem_mask(void *v0, int index,
typedef void vext_ldst_elem_fn(CPURISCVState *env, abi_ptr addr,
uint32_t idx, void *vd, uintptr_t retaddr);
-#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
-static void NAME(CPURISCVState *env, abi_ptr addr, \
- uint32_t idx, void *vd, uintptr_t retaddr)\
-{ \
- ETYPE *cur = ((ETYPE *)vd + H(idx)); \
- *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
-} \
+#define GEN_VEXT_LD_ELEM(NAME, ETYPE, H, LDSUF) \
+static inline QEMU_ALWAYS_INLINE \
+void NAME(CPURISCVState *env, abi_ptr addr, \
+ uint32_t idx, void *vd, uintptr_t retaddr) \
+{ \
+ ETYPE *cur = ((ETYPE *)vd + H(idx)); \
+ *cur = cpu_##LDSUF##_data_ra(env, addr, retaddr); \
+} \
GEN_VEXT_LD_ELEM(lde_b, int8_t, H1, ldsb)
GEN_VEXT_LD_ELEM(lde_h, int16_t, H2, ldsw)
GEN_VEXT_LD_ELEM(lde_w, int32_t, H4, ldl)
GEN_VEXT_LD_ELEM(lde_d, int64_t, H8, ldq)
-#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
-static void NAME(CPURISCVState *env, abi_ptr addr, \
- uint32_t idx, void *vd, uintptr_t retaddr)\
-{ \
- ETYPE data = *((ETYPE *)vd + H(idx)); \
- cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
+#define GEN_VEXT_ST_ELEM(NAME, ETYPE, H, STSUF) \
+static inline QEMU_ALWAYS_INLINE \
+void NAME(CPURISCVState *env, abi_ptr addr, \
+ uint32_t idx, void *vd, uintptr_t retaddr) \
+{ \
+ ETYPE data = *((ETYPE *)vd + H(idx)); \
+ cpu_##STSUF##_data_ra(env, addr, data, retaddr); \
}
GEN_VEXT_ST_ELEM(ste_b, int8_t, H1, stb)
@@ -291,7 +293,7 @@ GEN_VEXT_ST_STRIDE(vsse64_v, int64_t, ste_d)
*/
/* unmasked unit-stride load and store operation */
-static void
+static inline QEMU_ALWAYS_INLINE void
vext_ldst_us(void *vd, target_ulong base, CPURISCVState *env, uint32_t desc,
vext_ldst_elem_fn *ldst_elem, uint32_t log2_esz, uint32_t evl,
uintptr_t ra)
--
2.34.1
- [RFC PATCH v2 0/6] Improve the performance of RISC-V vector unit-stride/whole register ld/st instructions, Max Chou, 2024/05/31
- [RFC PATCH v2 1/6] target/riscv: Separate vector segment ld/st instructions, Max Chou, 2024/05/31
- [RFC PATCH v2 2/6] accel/tcg: Avoid unnecessary call overhead from qemu_plugin_vcpu_mem_cb, Max Chou, 2024/05/31
- [RFC PATCH v2 3/6] target/riscv: Inline vext_ldst_us and corresponding function for performance,
Max Chou <=
- [RFC PATCH v2 4/6] target/riscv: Add check_probe_[read|write] helper functions, Max Chou, 2024/05/31
- [RFC PATCH v2 5/6] target/riscv: rvv: Optimize v[l|s]e8.v with limitations, Max Chou, 2024/05/31
- [RFC PATCH v2 6/6] target/riscv: rvv: Optimize vl8re8.v/vs8r.v with limitations, Max Chou, 2024/05/31