On 9/4/24 07:27, LIU Zhiwei wrote:
@@ -811,31 +849,52 @@ static void tcg_out_extrl_i64_i32(TCGContext
*s, TCGReg ret, TCGReg arg)
tcg_out_ext32s(s, ret, arg);
}
-static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
- TCGReg addr, intptr_t offset)
+static intptr_t split_offset_scalar(TCGContext *s, TCGReg *addr,
+ intptr_t offset)
{
intptr_t imm12 = sextreg(offset, 0, 12);
if (offset != imm12) {
intptr_t diff = tcg_pcrel_diff(s, (void *)offset);
- if (addr == TCG_REG_ZERO && diff == (int32_t)diff) {
+ if (*addr == TCG_REG_ZERO && diff == (int32_t)diff) {
imm12 = sextreg(diff, 0, 12);
tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP2, diff -
imm12);
} else {
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset -
imm12);
- if (addr != TCG_REG_ZERO) {
- tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2,
TCG_REG_TMP2, addr);
+ if (*addr != TCG_REG_ZERO) {
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2,
TCG_REG_TMP2, *addr);
}
}
- addr = TCG_REG_TMP2;
+ *addr = TCG_REG_TMP2;
+ }
+ return imm12;
+}
+
+static void split_offset_vector(TCGContext *s, TCGReg *addr,
intptr_t offset)
+{
+ if (offset != 0) {
+ if (offset == sextreg(offset, 0, 12)) {
+ tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP0, *addr, offset);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
+ tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0,
*addr);
+ }
+ *addr = TCG_REG_TMP0;
}
+}
+
+static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
+ TCGReg addr, intptr_t offset)
+{
+ intptr_t imm12;
switch (opc) {
case OPC_SB:
case OPC_SH:
case OPC_SW:
case OPC_SD:
+ imm12 = split_offset_scalar(s, &addr, offset);
tcg_out_opc_store(s, opc, addr, data, imm12);
break;
case OPC_LB:
@@ -845,8 +904,31 @@ static void tcg_out_ldst(TCGContext *s,
RISCVInsn opc, TCGReg data,
case OPC_LW:
case OPC_LWU:
case OPC_LD:
+ imm12 = split_offset_scalar(s, &addr, offset);
tcg_out_opc_imm(s, opc, data, addr, imm12);
break;
+ case OPC_VSE8_V:
+ case OPC_VSE16_V:
+ case OPC_VSE32_V:
+ case OPC_VSE64_V:
+ case OPC_VS1R_V:
+ case OPC_VS2R_V:
+ case OPC_VS4R_V:
+ case OPC_VS8R_V:
+ split_offset_vector(s, &addr, offset);
+ tcg_out_opc_ldst_vec(s, opc, data, addr, true);
+ break;
+ case OPC_VLE8_V:
+ case OPC_VLE16_V:
+ case OPC_VLE32_V:
+ case OPC_VLE64_V:
+ case OPC_VL1RE64_V:
+ case OPC_VL2RE64_V:
+ case OPC_VL4RE64_V:
+ case OPC_VL8RE64_V:
+ split_offset_vector(s, &addr, offset);
+ tcg_out_opc_ldst_vec(s, opc, data, addr, true);
+ break;
default:
g_assert_not_reached();
}
This is more complicated than it needs to be, calling a combined
function, then using a switch to separate, then calling separate
functions. Calling separate functions in the first place is simpler.
E.g.
static void tcg_out_vec_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
TCGReg addr, intptr_t offset)
{
tcg_debug_assert(data >= TCG_REG_V0);
tcg_debug_assert(addr < TCG_REG_V0);
if (offset) {
tcg_debug_assert(addr != TCG_REG_ZERO);
if (offset == sextreg(offset, 0, 12)) {
tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP0, addr, offset);
} else {
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0,
addr);
}
addr = TCG_REG_TMP0;
}
tcg_out32(s, opc | ((data & 0x1f) << 7) | (addr << 15) | (1 << 25));
}
static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
TCGReg arg1, intptr_t arg2)
{
- RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_LW : OPC_LD;
+ RISCVInsn insn;
+
+ if (type < TCG_TYPE_V64) {
+ insn = (type == TCG_TYPE_I32) ? OPC_LW : OPC_LD;
+ } else {
+ int nf = get_vec_type_bytes(type) / riscv_vlenb;
+
+ switch (nf) {
+ case 1:
+ insn = OPC_VL1RE64_V;
+ break;
+ case 2:
+ insn = OPC_VL2RE64_V;
+ break;
+ case 4:
+ insn = OPC_VL4RE64_V;
+ break;
+ case 8:
+ insn = OPC_VL8RE64_V;
+ break;
+ default:
+ {
+ int prev_vsew = riscv_set_vec_config_vl(s, type);
+
+ switch (prev_vsew) {
+ case MO_8:
+ insn = OPC_VLE8_V;
+ break;
+ case MO_16:
+ insn = OPC_VLE16_V;
+ break;
+ case MO_32:
+ insn = OPC_VLE32_V;
+ break;
+ case MO_64:
+ insn = OPC_VLE64_V;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+ break;
This can be simplified:
switch (type) {
case TCG_TYPE_I32:
tcg_out_ldst(s, OPC_LW, data, base, offset);
break;
case TCG_TYPE_I64:
tcg_out_ldst(s, OPC_LD, data, base, offset);
break;
case TCG_TYPE_V64:
case TCG_TYPE_V128:
case TCG_TYPE_V256:
if (type >= riscv_lg2_vlenb) {
static const RISCVInsn whole_reg_ld[] = {
OPC_VL1RE64_V, OPC_VL2RE64_V, OPC_VL4RE64_V,
OPC_VL8RE64_V
};
unsigned idx = type - riscv_lg2_vlenb;
insn = whole_reg_ld[idx];
} else {
static const RISCVInsn unit_stride_ld[] = {
OPC_VLE8_V, OPC_VLE16_V, OPC_VLE32_V, OPC_VLE64_V
};
MemOp prev_vsew = set_vtype_len(s, type);
insn = unit_stride_ld[prev_vsew];
}
tcg_out_vec_ldst(s, insn, data, base, offset);
break;
default:
g_assert_not_reached();
}
and similar for store.