[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 60/65] target/riscv: Add integer extract and scalar move instruct
From: |
Huang Tao |
Subject: |
[PATCH 60/65] target/riscv: Add integer extract and scalar move instructions for XTheadVector |
Date: |
Fri, 12 Apr 2024 15:37:30 +0800 |
In this patch, we add integer extract and scalar move instructions to show the
way we
implement XTheadVector permutation instructions.
XTheadVector integer scalar move instructions diff from RVV1.0 in the following
points:
1. th.vext.x.v can transfer any element in a vector register to a general
register, while vmv.x.s can only transfer the first element in a vector
register to a general register.
2. When SEW < XLEN, XTheadVector zero-extend the value, while RVV1.0
sign-extend the value.
3. different tail element process policy.
Signed-off-by: Huang Tao <eric.huang@linux.alibaba.com>
---
.../riscv/insn_trans/trans_xtheadvector.c.inc | 154 +++++++++++++++++-
1 file changed, 152 insertions(+), 2 deletions(-)
diff --git a/target/riscv/insn_trans/trans_xtheadvector.c.inc
b/target/riscv/insn_trans/trans_xtheadvector.c.inc
index 9a0ea606ab..a8a1ec7b3f 100644
--- a/target/riscv/insn_trans/trans_xtheadvector.c.inc
+++ b/target/riscv/insn_trans/trans_xtheadvector.c.inc
@@ -2588,14 +2588,164 @@ static bool trans_th_vid_v(DisasContext *s,
arg_th_vid_v *a)
return false;
}
+/*
+ * Vector Permutation Instructions
+ */
+
+/* Integer Extract Instruction */
+
+/*
+ * This function is almost the copy of load_element, except:
+ * 1) When SEW < XLEN, XTheadVector zero-extend the value, while
+ * RVV1.0 sign-extend the value.
+ */
+static void load_element_th(TCGv_i64 dest, TCGv_ptr base,
+ int ofs, int sew)
+{
+ switch (sew) {
+ case MO_8:
+ tcg_gen_ld8u_i64(dest, base, ofs);
+ break;
+ case MO_16:
+ tcg_gen_ld16u_i64(dest, base, ofs);
+ break;
+ case MO_32:
+ tcg_gen_ld32u_i64(dest, base, ofs);
+ break;
+ case MO_64:
+ tcg_gen_ld_i64(dest, base, ofs);
+ break;
+ default:
+ g_assert_not_reached();
+ break;
+ }
+}
+
+/* Load idx >= VLMAX ? 0 : vreg[idx] */
+static void th_element_loadx(DisasContext *s, TCGv_i64 dest,
+ int vreg, TCGv idx, int vlmax)
+{
+ TCGv_i32 ofs = tcg_temp_new_i32();
+ TCGv_ptr base = tcg_temp_new_ptr();
+ TCGv_i64 t_idx = tcg_temp_new_i64();
+ TCGv_i64 t_vlmax, t_zero;
+
+ /*
+ * Mask the index to the length so that we do
+ * not produce an out-of-range load.
+ */
+ tcg_gen_trunc_tl_i32(ofs, idx);
+ tcg_gen_andi_i32(ofs, ofs, vlmax - 1);
+
+ /* Convert the index to an offset. */
+ endian_adjust(ofs, s->sew);
+ tcg_gen_shli_i32(ofs, ofs, s->sew);
+
+ /* Convert the index to a pointer. */
+ tcg_gen_ext_i32_ptr(base, ofs);
+ tcg_gen_add_ptr(base, base, tcg_env);
+
+ /* Perform the load. */
+ load_element_th(dest, base,
+ vreg_ofs(s, vreg), s->sew);
+
+ /* Flush out-of-range indexing to zero. */
+ t_vlmax = tcg_constant_i64(vlmax);
+ t_zero = tcg_constant_i64(0);
+ tcg_gen_extu_tl_i64(t_idx, idx);
+
+ tcg_gen_movcond_i64(TCG_COND_LTU, dest, t_idx,
+ t_vlmax, dest, t_zero);
+
+}
+/*
+ * This function is almost the copy of vec_element_loadi, except
+ * we just change the function name to decouple and delete the
+ * unused parameter.
+ * We delete the arg "bool sign", because XTheadVector always
+ * zero-extend the value.
+ */
+static void th_element_loadi(DisasContext *s, TCGv_i64 dest,
+ int vreg, int idx)
+{
+ load_element_th(dest, tcg_env, endian_ofs(s, vreg, idx), s->sew);
+}
+
+/*
+ * Compared to trans_vmv_x_s, th.vext.x.v can transfer any element
+ * in a vector register to a general register, while vmv.x.s can only
+ * transfer the first element in a vector register to a general register.
+ *
+ * So we use th_element_loadx to load the element. And we use th_element_loadi
+ * to deal with the special case when rs1 == 0, to accelerate.
+ */
+static bool trans_th_vext_x_v(DisasContext *s, arg_r *a)
+{
+ if (require_xtheadvector(s) &&
+ vext_check_isa_ill(s)) {
+ TCGv_i64 tmp = tcg_temp_new_i64();
+ TCGv dest = dest_gpr(s, a->rd);
+
+ if (a->rs1 == 0) {
+ /* Special case vmv.x.s rd, vs2. */
+ th_element_loadi(s, tmp, a->rs2, 0);
+ } else {
+ /* This instruction ignores LMUL and vector register groups */
+ int vlmax = s->cfg_ptr->vlenb >> s->sew;
+ th_element_loadx(s, tmp, a->rs2, cpu_gpr[a->rs1], vlmax);
+ }
+
+ tcg_gen_trunc_i64_tl(dest, tmp);
+ gen_set_gpr(s, a->rd, dest);
+ tcg_gen_movi_tl(cpu_vstart, 0);
+ finalize_rvv_inst(s);
+ return true;
+ }
+ return false;
+}
+
+/* Integer Scalar Move Instruction */
+
+static void th_element_storei(DisasContext *s, int vreg,
+ int idx, TCGv_i64 val)
+{
+ vec_element_storei(s, vreg, idx, val);
+}
+/* vmv.s.x vd, rs1 # vd[0] = rs1 */
+static bool trans_th_vmv_s_x(DisasContext *s, arg_th_vmv_s_x *a)
+{
+ if (require_xtheadvector(s) &&
+ vext_check_isa_ill(s)) {
+ /* This instruction ignores LMUL and vector register groups */
+ int maxsz = s->cfg_ptr->vlenb;
+ TCGv_i64 t1;
+ TCGLabel *over = gen_new_label();
+ TCGv src1 = get_gpr(s, a->rs1, EXT_ZERO);
+
+ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);
+ tcg_gen_gvec_dup_imm(MO_64, vreg_ofs(s, a->rd), maxsz, maxsz, 0);
+ if (a->rs1 == 0) {
+ goto done;
+ }
+
+ t1 = tcg_temp_new_i64();
+ tcg_gen_extu_tl_i64(t1, src1);
+ th_element_storei(s, a->rd, 0, t1);
+ done:
+ gen_set_label(over);
+ tcg_gen_movi_tl(cpu_vstart, 0);
+ finalize_rvv_inst(s);
+ return true;
+ }
+ return false;
+}
+
#define TH_TRANS_STUB(NAME) \
static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
{ \
return require_xtheadvector(s); \
}
-TH_TRANS_STUB(th_vext_x_v)
-TH_TRANS_STUB(th_vmv_s_x)
TH_TRANS_STUB(th_vfmv_f_s)
TH_TRANS_STUB(th_vfmv_s_f)
TH_TRANS_STUB(th_vslideup_vx)
--
2.44.0
- [PATCH 50/65] target/riscv: Add single-width integer reduction instructions for XTheadVector, (continued)
- [PATCH 50/65] target/riscv: Add single-width integer reduction instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 51/65] target/riscv: Add widening integer reduction instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 52/65] target/riscv: Add single-width floating-point reduction instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 53/65] target/riscv: Add widening floating-point reduction instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 54/65] target/riscv: Add mask-register logical instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 55/65] target/riscv: Add vector mask population count vmpopc for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 56/65] target/riscv: Add th.vmfirst.m for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 57/65] target/riscv: Add set-X-first mask bit instructrions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 58/65] target/riscv: Add vector iota instruction for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 59/65] target/riscv: Add vector element index instruction for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 60/65] target/riscv: Add integer extract and scalar move instructions for XTheadVector,
Huang Tao <=
- [PATCH 61/65] target/riscv: Add floating-point scalar move instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 62/65] target/riscv: Add vector slide instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 63/65] target/riscv: Add vector register gather instructions for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 64/65] target/riscv: Add vector compress instruction for XTheadVector, Huang Tao, 2024/04/12
- [PATCH 65/65] target/riscv: Enable XTheadVector extension for c906, Huang Tao, 2024/04/12