[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 50/65] target/riscv: Add single-width integer reduction instructi
From: |
Huang Tao |
Subject: |
[PATCH 50/65] target/riscv: Add single-width integer reduction instructions for XTheadVector |
Date: |
Fri, 12 Apr 2024 15:37:20 +0800 |
In this patch, we add single-width integer reduction instructions to show
the way we implement XTheadVector reduction instructions.
XTheadVector single-width integer reduction instructions diff from RVV1.0
in the following points:
1. Different mask reg layout. For mask bit of element i, XTheadVector locates it
in bit[mlen], while RVV1.0 locates it in bit[i].
2. Different tail elements process policy. XTheadVector clear the tail elements.
While RVV1.0 has vta to set the processing policy, keeping value or overwrite
it with 1s.
3. Different check policy. XTheadVector does not have fractional lmul, so we can
use simpler check function.
Signed-off-by: Huang Tao <eric.huang@linux.alibaba.com>
---
target/riscv/helper.h | 33 ++++++++
.../riscv/insn_trans/trans_xtheadvector.c.inc | 27 +++++--
target/riscv/xtheadvector_helper.c | 76 +++++++++++++++++++
3 files changed, 128 insertions(+), 8 deletions(-)
diff --git a/target/riscv/helper.h b/target/riscv/helper.h
index c666a5a020..84d2921945 100644
--- a/target/riscv/helper.h
+++ b/target/riscv/helper.h
@@ -2236,3 +2236,36 @@ DEF_HELPER_5(th_vfncvt_f_x_v_h, void, ptr, ptr, ptr,
env, i32)
DEF_HELPER_5(th_vfncvt_f_x_v_w, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_5(th_vfncvt_f_f_v_h, void, ptr, ptr, ptr, env, i32)
DEF_HELPER_5(th_vfncvt_f_f_v_w, void, ptr, ptr, ptr, env, i32)
+
+DEF_HELPER_6(th_vredsum_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredsum_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredsum_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredsum_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmaxu_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmaxu_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmaxu_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmaxu_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmax_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmax_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmax_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmax_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredminu_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredminu_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredminu_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredminu_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmin_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmin_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmin_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredmin_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredand_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredand_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredand_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredand_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredor_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredor_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredor_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredor_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredxor_vs_b, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredxor_vs_h, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredxor_vs_w, void, ptr, ptr, ptr, ptr, env, i32)
+DEF_HELPER_6(th_vredxor_vs_d, void, ptr, ptr, ptr, ptr, env, i32)
diff --git a/target/riscv/insn_trans/trans_xtheadvector.c.inc
b/target/riscv/insn_trans/trans_xtheadvector.c.inc
index d2734c007a..1fd66353ed 100644
--- a/target/riscv/insn_trans/trans_xtheadvector.c.inc
+++ b/target/riscv/insn_trans/trans_xtheadvector.c.inc
@@ -2374,20 +2374,31 @@ GEN_OPFV_NARROW_TRANS_TH(th_vfncvt_f_xu_v)
GEN_OPFV_NARROW_TRANS_TH(th_vfncvt_f_x_v)
GEN_OPFV_NARROW_TRANS_TH(th_vfncvt_f_f_v)
+/*
+ * Vector Reduction Operations
+ */
+
+/* Vector Single-Width Integer Reduction Instructions */
+static bool reduction_check_th(DisasContext *s, arg_rmrr *a)
+{
+ return vext_check_isa_ill(s) && th_check_reg(s, a->rs2, false);
+}
+
+GEN_OPIVV_TRANS_TH(th_vredsum_vs, reduction_check_th)
+GEN_OPIVV_TRANS_TH(th_vredmaxu_vs, reduction_check_th)
+GEN_OPIVV_TRANS_TH(th_vredmax_vs, reduction_check_th)
+GEN_OPIVV_TRANS_TH(th_vredminu_vs, reduction_check_th)
+GEN_OPIVV_TRANS_TH(th_vredmin_vs, reduction_check_th)
+GEN_OPIVV_TRANS_TH(th_vredand_vs, reduction_check_th)
+GEN_OPIVV_TRANS_TH(th_vredor_vs, reduction_check_th)
+GEN_OPIVV_TRANS_TH(th_vredxor_vs, reduction_check_th)
+
#define TH_TRANS_STUB(NAME) \
static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
{ \
return require_xtheadvector(s); \
}
-TH_TRANS_STUB(th_vredsum_vs)
-TH_TRANS_STUB(th_vredand_vs)
-TH_TRANS_STUB(th_vredor_vs)
-TH_TRANS_STUB(th_vredxor_vs)
-TH_TRANS_STUB(th_vredminu_vs)
-TH_TRANS_STUB(th_vredmin_vs)
-TH_TRANS_STUB(th_vredmaxu_vs)
-TH_TRANS_STUB(th_vredmax_vs)
TH_TRANS_STUB(th_vwredsumu_vs)
TH_TRANS_STUB(th_vwredsum_vs)
TH_TRANS_STUB(th_vfredsum_vs)
diff --git a/target/riscv/xtheadvector_helper.c
b/target/riscv/xtheadvector_helper.c
index 3a7512ecd8..d041a81150 100644
--- a/target/riscv/xtheadvector_helper.c
+++ b/target/riscv/xtheadvector_helper.c
@@ -3323,3 +3323,79 @@ THCALL(TH_OPFVV1, th_vfncvt_f_f_v_h, NOP_UU_H, H2, H4,
vfncvtffv16)
THCALL(TH_OPFVV1, th_vfncvt_f_f_v_w, NOP_UU_W, H4, H8, float64_to_float32)
GEN_TH_V_ENV(th_vfncvt_f_f_v_h, 2, 2, clearh_th)
GEN_TH_V_ENV(th_vfncvt_f_f_v_w, 4, 4, clearl_th)
+
+/*
+ * Vector Reduction Operations
+ */
+
+/* Vector Single-Width Integer Reduction Instructions */
+#define GEN_TH_RED(NAME, TD, TS2, HD, HS2, OP, CLEAR_FN) \
+void HELPER(NAME)(void *vd, void *v0, void *vs1, \
+ void *vs2, CPURISCVState *env, uint32_t desc) \
+{ \
+ uint32_t mlen = th_mlen(desc); \
+ uint32_t vm = th_vm(desc); \
+ uint32_t vl = env->vl; \
+ uint32_t i; \
+ uint32_t tot = env_archcpu(env)->cfg.vlenb; \
+ TD s1 = *((TD *)vs1 + HD(0)); \
+ \
+ for (i = env->vstart; i < vl; i++) { \
+ TS2 s2 = *((TS2 *)vs2 + HS2(i)); \
+ if (!vm && !th_elem_mask(v0, mlen, i)) { \
+ continue; \
+ } \
+ s1 = OP(s1, (TD)s2); \
+ } \
+ *((TD *)vd + HD(0)) = s1; \
+ env->vstart = 0; \
+ CLEAR_FN(vd, 1, sizeof(TD), tot); \
+}
+
+/* vd[0] = sum(vs1[0], vs2[*]) */
+GEN_TH_RED(th_vredsum_vs_b, int8_t, int8_t, H1, H1, TH_ADD, clearb_th)
+GEN_TH_RED(th_vredsum_vs_h, int16_t, int16_t, H2, H2, TH_ADD, clearh_th)
+GEN_TH_RED(th_vredsum_vs_w, int32_t, int32_t, H4, H4, TH_ADD, clearl_th)
+GEN_TH_RED(th_vredsum_vs_d, int64_t, int64_t, H8, H8, TH_ADD, clearq_th)
+
+/* vd[0] = maxu(vs1[0], vs2[*]) */
+GEN_TH_RED(th_vredmaxu_vs_b, uint8_t, uint8_t, H1, H1, TH_MAX, clearb_th)
+GEN_TH_RED(th_vredmaxu_vs_h, uint16_t, uint16_t, H2, H2, TH_MAX, clearh_th)
+GEN_TH_RED(th_vredmaxu_vs_w, uint32_t, uint32_t, H4, H4, TH_MAX, clearl_th)
+GEN_TH_RED(th_vredmaxu_vs_d, uint64_t, uint64_t, H8, H8, TH_MAX, clearq_th)
+
+/* vd[0] = max(vs1[0], vs2[*]) */
+GEN_TH_RED(th_vredmax_vs_b, int8_t, int8_t, H1, H1, TH_MAX, clearb_th)
+GEN_TH_RED(th_vredmax_vs_h, int16_t, int16_t, H2, H2, TH_MAX, clearh_th)
+GEN_TH_RED(th_vredmax_vs_w, int32_t, int32_t, H4, H4, TH_MAX, clearl_th)
+GEN_TH_RED(th_vredmax_vs_d, int64_t, int64_t, H8, H8, TH_MAX, clearq_th)
+
+/* vd[0] = minu(vs1[0], vs2[*]) */
+GEN_TH_RED(th_vredminu_vs_b, uint8_t, uint8_t, H1, H1, TH_MIN, clearb_th)
+GEN_TH_RED(th_vredminu_vs_h, uint16_t, uint16_t, H2, H2, TH_MIN, clearh_th)
+GEN_TH_RED(th_vredminu_vs_w, uint32_t, uint32_t, H4, H4, TH_MIN, clearl_th)
+GEN_TH_RED(th_vredminu_vs_d, uint64_t, uint64_t, H8, H8, TH_MIN, clearq_th)
+
+/* vd[0] = min(vs1[0], vs2[*]) */
+GEN_TH_RED(th_vredmin_vs_b, int8_t, int8_t, H1, H1, TH_MIN, clearb_th)
+GEN_TH_RED(th_vredmin_vs_h, int16_t, int16_t, H2, H2, TH_MIN, clearh_th)
+GEN_TH_RED(th_vredmin_vs_w, int32_t, int32_t, H4, H4, TH_MIN, clearl_th)
+GEN_TH_RED(th_vredmin_vs_d, int64_t, int64_t, H8, H8, TH_MIN, clearq_th)
+
+/* vd[0] = and(vs1[0], vs2[*]) */
+GEN_TH_RED(th_vredand_vs_b, int8_t, int8_t, H1, H1, TH_AND, clearb_th)
+GEN_TH_RED(th_vredand_vs_h, int16_t, int16_t, H2, H2, TH_AND, clearh_th)
+GEN_TH_RED(th_vredand_vs_w, int32_t, int32_t, H4, H4, TH_AND, clearl_th)
+GEN_TH_RED(th_vredand_vs_d, int64_t, int64_t, H8, H8, TH_AND, clearq_th)
+
+/* vd[0] = or(vs1[0], vs2[*]) */
+GEN_TH_RED(th_vredor_vs_b, int8_t, int8_t, H1, H1, TH_OR, clearb_th)
+GEN_TH_RED(th_vredor_vs_h, int16_t, int16_t, H2, H2, TH_OR, clearh_th)
+GEN_TH_RED(th_vredor_vs_w, int32_t, int32_t, H4, H4, TH_OR, clearl_th)
+GEN_TH_RED(th_vredor_vs_d, int64_t, int64_t, H8, H8, TH_OR, clearq_th)
+
+/* vd[0] = xor(vs1[0], vs2[*]) */
+GEN_TH_RED(th_vredxor_vs_b, int8_t, int8_t, H1, H1, TH_XOR, clearb_th)
+GEN_TH_RED(th_vredxor_vs_h, int16_t, int16_t, H2, H2, TH_XOR, clearh_th)
+GEN_TH_RED(th_vredxor_vs_w, int32_t, int32_t, H4, H4, TH_XOR, clearl_th)
+GEN_TH_RED(th_vredxor_vs_d, int64_t, int64_t, H8, H8, TH_XOR, clearq_th)
--
2.44.0
- [PATCH 40/65] target/riscv: Add single-width floating-point fused multiply-add instructions for XTheadVector, (continued)
- [PATCH 40/65] target/riscv: Add single-width floating-point fused multiply-add instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 41/65] target/riscv: Add widening floating-point fused mul-add instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 42/65] target/riscv: Add floating-pointing square-root instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 43/65] target/riscv: Add floating-point MIN/MAX instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 44/65] target/riscv: Add floating-point sign-injection instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 45/65] target/riscv: Add floating-point compare instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 46/65] target/riscv: Add floating-point classify and merge instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 47/65] target/riscv: Add single-width floating-point/integer type-convert instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 48/65] target/riscv: Add widening floating-point/integer type-convert instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 49/65] target/riscv: Add narrowing floating-point/integer type-convert instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 50/65] target/riscv: Add single-width integer reduction instructions for XTheadVector,
Huang Tao <=
- [PATCH 51/65] target/riscv: Add widening integer reduction instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 52/65] target/riscv: Add single-width floating-point reduction instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 53/65] target/riscv: Add widening floating-point reduction instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 54/65] target/riscv: Add mask-register logical instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 55/65] target/riscv: Add vector mask population count vmpopc for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 56/65] target/riscv: Add th.vmfirst.m for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 57/65] target/riscv: Add set-X-first mask bit instructrions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 58/65] target/riscv: Add vector iota instruction for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 59/65] target/riscv: Add vector element index instruction for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 60/65] target/riscv: Add integer extract and scalar move instructions for XTheadVector, Huang Tao, 2024/04/12