[PATCH 50/65] target/riscv: Add single-width integer reduction instructi

qemu-riscv

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 50/65] target/riscv: Add single-width integer reduction instructi

From:	Huang Tao
Subject:	[PATCH 50/65] target/riscv: Add single-width integer reduction instructions for XTheadVector
Date:	Fri, 12 Apr 2024 15:37:20 +0800

In this patch, we add single-width integer reduction instructions to show
the way we implement XTheadVector reduction instructions.
XTheadVector single-width integer reduction instructions diff from RVV1.0
in the following points:
1. Different mask reg layout. For mask bit of element i, XTheadVector locates it
   in bit[mlen], while RVV1.0 locates it in bit[i].
2. Different tail elements process policy. XTheadVector clear the tail elements.
   While RVV1.0 has vta to set the processing policy, keeping value or overwrite
   it with 1s.
3. Different check policy. XTheadVector does not have fractional lmul, so we can
   use simpler check function.

Signed-off-by: Huang Tao <eric.huang@linux.alibaba.com>
---
 target/riscv/helper.h                         | 33 ++++++++
 .../riscv/insn_trans/trans_xtheadvector.c.inc | 27 +++++--
 target/riscv/xtheadvector_helper.c            | 76 +++++++++++++++++++
 3 files changed, 128 insertions(+), 8 deletions(-)

diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index c666a5a020..84d2921945 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -2236,3 +2236,36 @@ DEF_HELPER_5(th_vfncvt_f_x_v_h, void, ptr, ptr, ptr, 
env, i32)
 DEF_HELPER_5(th_vfncvt_f_x_v_w, void, ptr, ptr, ptr, env, i32)
 DEF_HELPER_5(th_vfncvt_f_f_v_h, void, ptr, ptr, ptr, env, i32)
 DEF_HELPER_5(th_vfncvt_f_f_v_w, void, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_6(th_vredsum_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredsum_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmaxu_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmaxu_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmaxu_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmaxu_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmax_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmax_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmax_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmax_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredminu_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredminu_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredminu_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredminu_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmin_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmin_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmin_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmin_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredand_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredand_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredand_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredand_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredor_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredor_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredor_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredor_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredxor_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredxor_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredxor_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredxor_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
diff --git a/target/riscv/insn_trans/trans_xtheadvector.c.inc 
b/target/riscv/insn_trans/trans_xtheadvector.c.inc
index d2734c007a..1fd66353ed 100644
--- a/target/riscv/insn_trans/trans_xtheadvector.c.inc
+++ b/target/riscv/insn_trans/trans_xtheadvector.c.inc
@@ -2374,20 +2374,31 @@ GEN_OPFV_NARROW_TRANS_TH(th_vfncvt_f_xu_v)
 GEN_OPFV_NARROW_TRANS_TH(th_vfncvt_f_x_v)
 GEN_OPFV_NARROW_TRANS_TH(th_vfncvt_f_f_v)
 
+/*
+ * Vector Reduction Operations
+ */
+
+/* Vector Single-Width Integer Reduction Instructions */
+static bool reduction_check_th(DisasContext *s, arg_rmrr *a)
+{
+    return vext_check_isa_ill(s) && th_check_reg(s, a->rs2, false);
+}
+
+GEN_OPIVV_TRANS_TH(th_vredsum_vs, reduction_check_th)
+GEN_OPIVV_TRANS_TH(th_vredmaxu_vs, reduction_check_th)
+GEN_OPIVV_TRANS_TH(th_vredmax_vs, reduction_check_th)
+GEN_OPIVV_TRANS_TH(th_vredminu_vs, reduction_check_th)
+GEN_OPIVV_TRANS_TH(th_vredmin_vs, reduction_check_th)
+GEN_OPIVV_TRANS_TH(th_vredand_vs, reduction_check_th)
+GEN_OPIVV_TRANS_TH(th_vredor_vs, reduction_check_th)
+GEN_OPIVV_TRANS_TH(th_vredxor_vs, reduction_check_th)
+
 #define TH_TRANS_STUB(NAME)                                \
 static bool trans_##NAME(DisasContext *s, arg_##NAME *a)   \
 {                                                          \
     return require_xtheadvector(s);                        \
 }
 
-TH_TRANS_STUB(th_vredsum_vs)
-TH_TRANS_STUB(th_vredand_vs)
-TH_TRANS_STUB(th_vredor_vs)
-TH_TRANS_STUB(th_vredxor_vs)
-TH_TRANS_STUB(th_vredminu_vs)
-TH_TRANS_STUB(th_vredmin_vs)
-TH_TRANS_STUB(th_vredmaxu_vs)
-TH_TRANS_STUB(th_vredmax_vs)
 TH_TRANS_STUB(th_vwredsumu_vs)
 TH_TRANS_STUB(th_vwredsum_vs)
 TH_TRANS_STUB(th_vfredsum_vs)
diff --git a/target/riscv/xtheadvector_helper.c 
b/target/riscv/xtheadvector_helper.c
index 3a7512ecd8..d041a81150 100644
--- a/target/riscv/xtheadvector_helper.c
+++ b/target/riscv/xtheadvector_helper.c
@@ -3323,3 +3323,79 @@ THCALL(TH_OPFVV1, th_vfncvt_f_f_v_h, NOP_UU_H, H2, H4, 
vfncvtffv16)
 THCALL(TH_OPFVV1, th_vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32)
 GEN_TH_V_ENV(th_vfncvt_f_f_v_h, 2, 2, clearh_th)
 GEN_TH_V_ENV(th_vfncvt_f_f_v_w, 4, 4, clearl_th)
+
+/*
+ * Vector Reduction Operations
+ */
+
+/* Vector Single-Width Integer Reduction Instructions */
+#define GEN_TH_RED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN)  \
+void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
+        void *vs2, CPURISCVState *env, uint32_t desc)     \
+{                                                         \
+    uint32_t mlen = th_mlen(desc);                        \
+    uint32_t vm = th_vm(desc);                            \
+    uint32_t vl = env->vl;                                \
+    uint32_t i;                                           \
+    uint32_t tot = env_archcpu(env)->cfg.vlenb;           \
+    TD s1 =  *((TD *)vs1 + HD(0));                        \
+                                                          \
+    for (i = env->vstart; i < vl; i++) {                  \
+        TS2 s2 = *((TS2 *)vs2 + HS2(i));                  \
+        if (!vm && !th_elem_mask(v0, mlen, i)) {          \
+            continue;                                     \
+        }                                                 \
+        s1 = OP(s1, (TD)s2);                              \
+    }                                                     \
+    *((TD *)vd + HD(0)) = s1;                             \
+    env->vstart = 0;                                      \
+    CLEAR_FN(vd, 1, sizeof(TD), tot);                     \
+}
+
+/* vd[0] = sum(vs1[0], vs2[*]) */
+GEN_TH_RED(th_vredsum_vs_b, int8_t, int8_t, H1, H1, TH_ADD, clearb_th)
+GEN_TH_RED(th_vredsum_vs_h, int16_t, int16_t, H2, H2, TH_ADD, clearh_th)
+GEN_TH_RED(th_vredsum_vs_w, int32_t, int32_t, H4, H4, TH_ADD, clearl_th)
+GEN_TH_RED(th_vredsum_vs_d, int64_t, int64_t, H8, H8, TH_ADD, clearq_th)
+
+/* vd[0] = maxu(vs1[0], vs2[*]) */
+GEN_TH_RED(th_vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, TH_MAX, clearb_th)
+GEN_TH_RED(th_vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, TH_MAX, clearh_th)
+GEN_TH_RED(th_vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, TH_MAX, clearl_th)
+GEN_TH_RED(th_vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, TH_MAX, clearq_th)
+
+/* vd[0] = max(vs1[0], vs2[*]) */
+GEN_TH_RED(th_vredmax_vs_b, int8_t, int8_t, H1, H1, TH_MAX, clearb_th)
+GEN_TH_RED(th_vredmax_vs_h, int16_t, int16_t, H2, H2, TH_MAX, clearh_th)
+GEN_TH_RED(th_vredmax_vs_w, int32_t, int32_t, H4, H4, TH_MAX, clearl_th)
+GEN_TH_RED(th_vredmax_vs_d, int64_t, int64_t, H8, H8, TH_MAX, clearq_th)
+
+/* vd[0] = minu(vs1[0], vs2[*]) */
+GEN_TH_RED(th_vredminu_vs_b, uint8_t, uint8_t, H1, H1, TH_MIN, clearb_th)
+GEN_TH_RED(th_vredminu_vs_h, uint16_t, uint16_t, H2, H2, TH_MIN, clearh_th)
+GEN_TH_RED(th_vredminu_vs_w, uint32_t, uint32_t, H4, H4, TH_MIN, clearl_th)
+GEN_TH_RED(th_vredminu_vs_d, uint64_t, uint64_t, H8, H8, TH_MIN, clearq_th)
+
+/* vd[0] = min(vs1[0], vs2[*]) */
+GEN_TH_RED(th_vredmin_vs_b, int8_t, int8_t, H1, H1, TH_MIN, clearb_th)
+GEN_TH_RED(th_vredmin_vs_h, int16_t, int16_t, H2, H2, TH_MIN, clearh_th)
+GEN_TH_RED(th_vredmin_vs_w, int32_t, int32_t, H4, H4, TH_MIN, clearl_th)
+GEN_TH_RED(th_vredmin_vs_d, int64_t, int64_t, H8, H8, TH_MIN, clearq_th)
+
+/* vd[0] = and(vs1[0], vs2[*]) */
+GEN_TH_RED(th_vredand_vs_b, int8_t, int8_t, H1, H1, TH_AND, clearb_th)
+GEN_TH_RED(th_vredand_vs_h, int16_t, int16_t, H2, H2, TH_AND, clearh_th)
+GEN_TH_RED(th_vredand_vs_w, int32_t, int32_t, H4, H4, TH_AND, clearl_th)
+GEN_TH_RED(th_vredand_vs_d, int64_t, int64_t, H8, H8, TH_AND, clearq_th)
+
+/* vd[0] = or(vs1[0], vs2[*]) */
+GEN_TH_RED(th_vredor_vs_b, int8_t, int8_t, H1, H1, TH_OR, clearb_th)
+GEN_TH_RED(th_vredor_vs_h, int16_t, int16_t, H2, H2, TH_OR, clearh_th)
+GEN_TH_RED(th_vredor_vs_w, int32_t, int32_t, H4, H4, TH_OR, clearl_th)
+GEN_TH_RED(th_vredor_vs_d, int64_t, int64_t, H8, H8, TH_OR, clearq_th)
+
+/* vd[0] = xor(vs1[0], vs2[*]) */
+GEN_TH_RED(th_vredxor_vs_b, int8_t, int8_t, H1, H1, TH_XOR, clearb_th)
+GEN_TH_RED(th_vredxor_vs_h, int16_t, int16_t, H2, H2, TH_XOR, clearh_th)
+GEN_TH_RED(th_vredxor_vs_w, int32_t, int32_t, H4, H4, TH_XOR, clearl_th)
+GEN_TH_RED(th_vredxor_vs_d, int64_t, int64_t, H8, H8, TH_XOR, clearq_th)
-- 
2.44.0

[Prev in Thread]

Current Thread

[Next in Thread]

[PATCH 40/65] target/riscv: Add single-width floating-point fused multiply-add instructions for XTheadVector, (continued)
- [PATCH 40/65] target/riscv: Add single-width floating-point fused multiply-add instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 41/65] target/riscv: Add widening floating-point fused mul-add instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 42/65] target/riscv: Add floating-pointing square-root instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 43/65] target/riscv: Add floating-point MIN/MAX instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 44/65] target/riscv: Add floating-point sign-injection instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 45/65] target/riscv: Add floating-point compare instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 46/65] target/riscv: Add floating-point classify and merge instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 47/65] target/riscv: Add single-width floating-point/integer type-convert instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 48/65] target/riscv: Add widening floating-point/integer type-convert instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 49/65] target/riscv: Add narrowing floating-point/integer type-convert instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 50/65] target/riscv: Add single-width integer reduction instructions for XTheadVector, Huang Tao <=
- [PATCH 51/65] target/riscv: Add widening integer reduction instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 52/65] target/riscv: Add single-width floating-point reduction instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 53/65] target/riscv: Add widening floating-point reduction instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 54/65] target/riscv: Add mask-register logical instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 55/65] target/riscv: Add vector mask population count vmpopc for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 56/65] target/riscv: Add th.vmfirst.m for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 57/65] target/riscv: Add set-X-first mask bit instructrions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 58/65] target/riscv: Add vector iota instruction for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 59/65] target/riscv: Add vector element index instruction for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 60/65] target/riscv: Add integer extract and scalar move instructions for XTheadVector, Huang Tao, 2024/04/12

Prev by Date: [PATCH 49/65] target/riscv: Add narrowing floating-point/integer type-convert instructions for XTheadVector
Next by Date: [PATCH 51/65] target/riscv: Add widening integer reduction instructions for XTheadVector
Previous by thread: [PATCH 49/65] target/riscv: Add narrowing floating-point/integer type-convert instructions for XTheadVector
Next by thread: [PATCH 51/65] target/riscv: Add widening integer reduction instructions for XTheadVector
Index(es):
- Date
- Thread