[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 04/17] target/arm: Convert UZP, TRN, ZIP to decodetree
From: |
Richard Henderson |
Subject: |
[PATCH 04/17] target/arm: Convert UZP, TRN, ZIP to decodetree |
Date: |
Wed, 17 Jul 2024 16:08:50 +1000 |
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/translate-a64.c | 158 ++++++++++++++-------------------
target/arm/tcg/a64.decode | 9 ++
2 files changed, 77 insertions(+), 90 deletions(-)
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 7e3bde93fe..e0314a1253 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -4671,6 +4671,74 @@ static bool trans_TBL_TBX(DisasContext *s, arg_TBL_TBX
*a)
return true;
}
+typedef int simd_permute_idx_fn(int i, int part, int elements);
+
+static bool do_simd_permute(DisasContext *s, arg_qrrr_e *a,
+ simd_permute_idx_fn *fn, int part)
+{
+ MemOp esz = a->esz;
+ int datasize = a->q ? 16 : 8;
+ int elements = datasize >> esz;
+ TCGv_i64 tcg_res[2], tcg_ele;
+
+ if (esz == MO_64 && !a->q) {
+ return false;
+ }
+ if (!fp_access_check(s)) {
+ return true;
+ }
+
+ tcg_res[0] = tcg_temp_new_i64();
+ tcg_res[1] = a->q ? tcg_temp_new_i64() : NULL;
+ tcg_ele = tcg_temp_new_i64();
+
+ for (int i = 0; i < elements; i++) {
+ int o, w, idx;
+
+ idx = fn(i, part, elements);
+ read_vec_element(s, tcg_ele, (idx & elements ? a->rm : a->rn),
+ idx & (elements - 1), esz);
+
+ w = (i << (esz + 3)) / 64;
+ o = (i << (esz + 3)) % 64;
+ if (o == 0) {
+ tcg_gen_mov_i64(tcg_res[w], tcg_ele);
+ } else {
+ tcg_gen_deposit_i64(tcg_res[w], tcg_res[w], tcg_ele, o, 8 << esz);
+ }
+ }
+
+ for (int i = a->q; i >= 0; --i) {
+ write_vec_element(s, tcg_res[i], a->rd, i, MO_64);
+ }
+ clear_vec_high(s, a->q, a->rd);
+ return true;
+}
+
+static int permute_load_uzp(int i, int part, int elements)
+{
+ return 2 * i + part;
+}
+
+TRANS(UZP1, do_simd_permute, a, permute_load_uzp, 0)
+TRANS(UZP2, do_simd_permute, a, permute_load_uzp, 1)
+
+static int permute_load_trn(int i, int part, int elements)
+{
+ return (i & 1) * elements + (i & ~1) + part;
+}
+
+TRANS(TRN1, do_simd_permute, a, permute_load_trn, 0)
+TRANS(TRN2, do_simd_permute, a, permute_load_trn, 1)
+
+static int permute_load_zip(int i, int part, int elements)
+{
+ return (i & 1) * elements + ((part * elements + i) >> 1);
+}
+
+TRANS(ZIP1, do_simd_permute, a, permute_load_zip, 0)
+TRANS(ZIP2, do_simd_permute, a, permute_load_zip, 1)
+
/*
* Cryptographic AES, SHA, SHA512
*/
@@ -8911,95 +8979,6 @@ static void disas_data_proc_fp(DisasContext *s, uint32_t
insn)
}
}
-/* ZIP/UZP/TRN
- * 31 30 29 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0
- * +---+---+-------------+------+---+------+---+------------------+------+
- * | 0 | Q | 0 0 1 1 1 0 | size | 0 | Rm | 0 | opc | 1 0 | Rn | Rd |
- * +---+---+-------------+------+---+------+---+------------------+------+
- */
-static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int rm = extract32(insn, 16, 5);
- int size = extract32(insn, 22, 2);
- /* opc field bits [1:0] indicate ZIP/UZP/TRN;
- * bit 2 indicates 1 vs 2 variant of the insn.
- */
- int opcode = extract32(insn, 12, 2);
- bool part = extract32(insn, 14, 1);
- bool is_q = extract32(insn, 30, 1);
- int esize = 8 << size;
- int i;
- int datasize = is_q ? 128 : 64;
- int elements = datasize / esize;
- TCGv_i64 tcg_res[2], tcg_ele;
-
- if (opcode == 0 || (size == 3 && !is_q)) {
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- tcg_res[0] = tcg_temp_new_i64();
- tcg_res[1] = is_q ? tcg_temp_new_i64() : NULL;
- tcg_ele = tcg_temp_new_i64();
-
- for (i = 0; i < elements; i++) {
- int o, w;
-
- switch (opcode) {
- case 1: /* UZP1/2 */
- {
- int midpoint = elements / 2;
- if (i < midpoint) {
- read_vec_element(s, tcg_ele, rn, 2 * i + part, size);
- } else {
- read_vec_element(s, tcg_ele, rm,
- 2 * (i - midpoint) + part, size);
- }
- break;
- }
- case 2: /* TRN1/2 */
- if (i & 1) {
- read_vec_element(s, tcg_ele, rm, (i & ~1) + part, size);
- } else {
- read_vec_element(s, tcg_ele, rn, (i & ~1) + part, size);
- }
- break;
- case 3: /* ZIP1/2 */
- {
- int base = part * elements / 2;
- if (i & 1) {
- read_vec_element(s, tcg_ele, rm, base + (i >> 1), size);
- } else {
- read_vec_element(s, tcg_ele, rn, base + (i >> 1), size);
- }
- break;
- }
- default:
- g_assert_not_reached();
- }
-
- w = (i * esize) / 64;
- o = (i * esize) % 64;
- if (o == 0) {
- tcg_gen_mov_i64(tcg_res[w], tcg_ele);
- } else {
- tcg_gen_shli_i64(tcg_ele, tcg_ele, o);
- tcg_gen_or_i64(tcg_res[w], tcg_res[w], tcg_ele);
- }
- }
-
- for (i = 0; i <= is_q; ++i) {
- write_vec_element(s, tcg_res[i], rd, i, MO_64);
- }
- clear_vec_high(s, is_q, rd);
-}
-
/*
* do_reduction_op helper
*
@@ -11774,7 +11753,6 @@ static const AArch64DecodeTable data_proc_simd[] = {
/* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
{ 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
{ 0x0f000400, 0x9f800400, disas_simd_shift_imm },
- { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
{ 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
{ 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
{ 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 45896902d5..5bd5603cd0 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -1145,3 +1145,12 @@ EXT_q 0110 1110 00 0 rm:5 0 imm:4 0 rn:5 rd:5
# Advanced SIMD Table Lookup
TBL_TBX 0 q:1 00 1110 000 rm:5 0 len:2 tbx:1 00 rn:5 rd:5
+
+# Advanced SIMD Permute
+
+UZP1 0.00 1110 .. 0 ..... 0 001 10 ..... ..... @qrrr_e
+UZP2 0.00 1110 .. 0 ..... 0 101 10 ..... ..... @qrrr_e
+TRN1 0.00 1110 .. 0 ..... 0 010 10 ..... ..... @qrrr_e
+TRN2 0.00 1110 .. 0 ..... 0 110 10 ..... ..... @qrrr_e
+ZIP1 0.00 1110 .. 0 ..... 0 011 10 ..... ..... @qrrr_e
+ZIP2 0.00 1110 .. 0 ..... 0 111 10 ..... ..... @qrrr_e
--
2.43.0
- [PATCH 00/17] target/arm: AdvSIMD decodetree conversion, part 4, Richard Henderson, 2024/07/17
- [PATCH 01/17] target/arm: Use tcg_gen_extract2_i64 for EXT, Richard Henderson, 2024/07/17
- [PATCH 02/17] target/arm: Convert EXT to decodetree, Richard Henderson, 2024/07/17
- [PATCH 03/17] target/arm: Convert TBL, TBX to decodetree, Richard Henderson, 2024/07/17
- [PATCH 04/17] target/arm: Convert UZP, TRN, ZIP to decodetree,
Richard Henderson <=
- [PATCH 05/17] target/arm: Simplify do_reduction_op, Richard Henderson, 2024/07/17
- [PATCH 07/17] target/arm: Convert FMAXNMV, FMINNMV, FMAXV, FMINV to decodetree, Richard Henderson, 2024/07/17
- [PATCH 09/17] target/arm: Convert MOVI, FMOV, ORR, BIC (vector immediate) to decodetree, Richard Henderson, 2024/07/17
- [PATCH 06/17] target/arm: Convert ADDV, *ADDLV, *MAXV, *MINV to decodetree, Richard Henderson, 2024/07/17
- [PATCH 08/17] target/arm: Convert FMOVI (scalar, immediate) to decodetree, Richard Henderson, 2024/07/17
- [PATCH 10/17] target/arm: Introduce gen_gvec_sshr, gen_gvec_ushr, Richard Henderson, 2024/07/17
- [PATCH 12/17] target/arm: Convert handle_vec_simd_shri to decodetree, Richard Henderson, 2024/07/17
- [PATCH 13/17] target/arm: Convet handle_vec_simd_shli to decodetree, Richard Henderson, 2024/07/17
- [PATCH 11/17] target/arm: Fix whitespace near gen_srshr64_i64, Richard Henderson, 2024/07/17