[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH RFC v2] target/arm: Implement SVE2 TBL, TBX
From: |
Stephen Long |
Subject: |
[PATCH RFC v2] target/arm: Implement SVE2 TBL, TBX |
Date: |
Tue, 28 Apr 2020 07:43:52 -0700 |
Signed-off-by: Stephen Long <address@hidden>
---
target/arm/helper-sve.h | 10 ++++++
target/arm/internals.h | 12 +++++++
target/arm/sve.decode | 5 +++
target/arm/sve_helper.c | 71 ++++++++++++++++++++++++++++++++++----
target/arm/translate-sve.c | 20 +++++++++++
5 files changed, 112 insertions(+), 6 deletions(-)
Pulled out the common functionality for SVE TBL and SVE2 TBL, TBX. I still
haven't reimplemnted AdvSIMD TBL, TBX yet, but I would like to know if
I'm on the right track.
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 51ad60e5c3..ed8b9223ee 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -2690,3 +2690,13 @@ DEF_HELPER_FLAGS_5(sve2_sqrdcmlah_zzzz_d,
TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_6(fmmla_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr,
i32)
DEF_HELPER_FLAGS_6(fmmla_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr,
i32)
+
+DEF_HELPER_FLAGS_5(sve2_tbl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_tbl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_tbl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_tbl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sve2_tbx_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_tbx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_tbx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2_tbx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/arm/internals.h b/target/arm/internals.h
index bae4f36426..286ef3c4c6 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -1381,4 +1381,16 @@ static inline uint64_t useronly_maybe_clean_ptr(uint32_t
desc, uint64_t ptr)
return ptr;
}
+#define DECLARE_DO_TB(TYPE) \
+void do_tb_##TYPE(TYPE *vd, TYPE **tables, intptr_t ntables, \
+ intptr_t table_sz, TYPE *indices, \
+ intptr_t nindices, bool is_tbl);
+
+DECLARE_DO_TB(uint8_t)
+DECLARE_DO_TB(uint16_t)
+DECLARE_DO_TB(uint32_t)
+DECLARE_DO_TB(uint64_t)
+
+#undef DECLARE_DO_TB
+
#endif
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index de3768c24a..624c12faf3 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -1419,3 +1419,8 @@ STNT1_zprz 1110010 .. 00 ..... 001 ... ..... ..... \
# SVE2 32-bit scatter non-temporal store (vector plus scalar)
STNT1_zprz 1110010 .. 10 ..... 001 ... ..... ..... \
@rprr_scatter_store xs=0 esz=2 scale=0
+
+### SVE2 Table Lookup (three sources)
+
+TBL_zzz 00000101 .. 1 ..... 00101 0 ..... ..... @rd_rn_rm
+TBX_zzz 00000101 .. 1 ..... 00101 1 ..... ..... @rd_rn_rm
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index cd5c6f7fb0..2b8de6adb8 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -2944,20 +2944,37 @@ void HELPER(sve_rev_d)(void *vd, void *vn, uint32_t
desc)
}
}
+#define DO_TB(TYPE, H) \
+void do_tb_##TYPE(TYPE *vd, TYPE **tables, intptr_t ntables, \
+ intptr_t table_sz, TYPE *indices, \
+ intptr_t nindices, bool is_tbl) \
+{ \
+ for (intptr_t i = 0; i < nindices; ++i) { \
+ TYPE index = indices[H(i)]; \
+ if (index < table_sz * ntables) { \
+ vd[H(i)] = tables[index / ntables][H(index % ntables)]; \
+ } else if (is_tbl) { \
+ vd[H(i)] = 0; \
+ } \
+ } \
+}
+
+DO_TB(uint8_t, H1)
+DO_TB(uint16_t, H2)
+DO_TB(uint32_t, H4)
+DO_TB(uint64_t, )
+
#define DO_TBL(NAME, TYPE, H) \
void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
{ \
- intptr_t i, opr_sz = simd_oprsz(desc); \
+ intptr_t opr_sz = simd_oprsz(desc); \
uintptr_t elem = opr_sz / sizeof(TYPE); \
- TYPE *d = vd, *n = vn, *m = vm; \
+ TYPE *n = vn; \
ARMVectorReg tmp; \
if (unlikely(vd == vn)) { \
n = memcpy(&tmp, vn, opr_sz); \
} \
- for (i = 0; i < elem; i++) { \
- TYPE j = m[H(i)]; \
- d[H(i)] = j < elem ? n[H(j)] : 0; \
- } \
+ do_tb_##TYPE(vd, &n, 1, elem, vm, elem, true); \
}
DO_TBL(sve_tbl_b, uint8_t, H1)
@@ -2967,6 +2984,48 @@ DO_TBL(sve_tbl_d, uint64_t, )
#undef TBL
+#define DO_SVE2_TBL(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn1, void *vm, void *vn2, uint32_t desc) \
+{ \
+ intptr_t opr_sz = simd_oprsz(desc); \
+ intptr_t elem = opr_sz / sizeof(TYPE); \
+ TYPE *n1 = vn1, *n2 = vn2; \
+ ARMVectorReg tmp1, tmp2; \
+ if (unlikely(vd == vn1)) { \
+ n1 = memcpy(&tmp1, vn1, opr_sz); \
+ } else if (unlikely(vd == vn2)) { \
+ n2 = memcpy(&tmp2, vn2, opr_sz); \
+ } \
+ TYPE *tables[] = {n1, n2}; \
+ do_tb_##TYPE(vd, tables, 2, elem, vm, elem, true); \
+}
+
+DO_SVE2_TBL(sve2_tbl_b, uint8_t, H1)
+DO_SVE2_TBL(sve2_tbl_h, uint16_t, H2)
+DO_SVE2_TBL(sve2_tbl_s, uint32_t, H4)
+DO_SVE2_TBL(sve2_tbl_d, uint64_t, )
+
+#define DO_SVE2_TBX(NAME, TYPE, H) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+{ \
+ intptr_t opr_sz = simd_oprsz(desc); \
+ uintptr_t elem = opr_sz / sizeof(TYPE); \
+ TYPE *n = vn; \
+ ARMVectorReg tmp; \
+ if (unlikely(vd == vn)) { \
+ n = memcpy(&tmp, vn, opr_sz); \
+ } \
+ do_tb_##TYPE(vd, &n, 1, elem, vm, elem, false); \
+}
+
+DO_SVE2_TBX(sve2_tbx_b, uint8_t, H1)
+DO_SVE2_TBX(sve2_tbx_h, uint16_t, H2)
+DO_SVE2_TBX(sve2_tbx_s, uint32_t, H4)
+DO_SVE2_TBX(sve2_tbx_d, uint64_t, )
+
+#undef DO_SVE2_TBX
+#undef DO_SVE2_TBL
+
#define DO_UNPK(NAME, TYPED, TYPES, HD, HS) \
void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \
{ \
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 86c3d0ed11..11b78f49b4 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -7956,3 +7956,23 @@ static bool trans_SQRDCMLAH_zzzz(DisasContext *s,
arg_CMLA_zzzz *a)
};
return do_sve2_zzzz_fn(s, a->rd, a->rn, a->rm, a->ra, fns[a->esz], a->rot);
}
+
+static bool trans_TBL_zzz(DisasContext *s, arg_rrr_esz *a)
+{
+ static gen_helper_gvec_4 * const fns[] = {
+ gen_helper_sve2_tbl_b, gen_helper_sve2_tbl_h,
+ gen_helper_sve2_tbl_s, gen_helper_sve2_tbl_d,
+ };
+ int rn1 = a->rn;
+ int rn2 = (a->rn + 1) % 32;
+ return do_sve2_zzzz_fn(s, a->rd, rn1, a->rm, rn2, fns[a->esz], 0);
+}
+
+static bool trans_TBX_zzz(DisasContext *s, arg_rrr_esz *a)
+{
+ static gen_helper_gvec_3 * const fns[] = {
+ gen_helper_sve2_tbx_b, gen_helper_sve2_tbx_h,
+ gen_helper_sve2_tbx_s, gen_helper_sve2_tbx_d,
+ };
+ return do_sve2_zzz_ool(s, a, fns[a->esz]);
+}
--
2.17.1
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [PATCH RFC v2] target/arm: Implement SVE2 TBL, TBX,
Stephen Long <=