qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v3 05/14] tcg/riscv: Implement vector load/store


From: LIU Zhiwei
Subject: Re: [PATCH v3 05/14] tcg/riscv: Implement vector load/store
Date: Tue, 10 Sep 2024 11:04:53 +0800
User-agent: Mozilla Thunderbird


On 2024/9/5 14:39, Richard Henderson wrote:
On 9/4/24 07:27, LIU Zhiwei wrote:
@@ -811,31 +849,52 @@ static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg)
      tcg_out_ext32s(s, ret, arg);
  }
  -static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
-                         TCGReg addr, intptr_t offset)
+static intptr_t split_offset_scalar(TCGContext *s, TCGReg *addr,
+                                    intptr_t offset)
  {
      intptr_t imm12 = sextreg(offset, 0, 12);
        if (offset != imm12) {
          intptr_t diff = tcg_pcrel_diff(s, (void *)offset);
  -        if (addr == TCG_REG_ZERO && diff == (int32_t)diff) {
+        if (*addr == TCG_REG_ZERO && diff == (int32_t)diff) {
              imm12 = sextreg(diff, 0, 12);
              tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP2, diff - imm12);
          } else {
              tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12);
-            if (addr != TCG_REG_ZERO) {
-                tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, addr);
+            if (*addr != TCG_REG_ZERO) {
+                tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP2, TCG_REG_TMP2, *addr);
              }
          }
-        addr = TCG_REG_TMP2;
+        *addr = TCG_REG_TMP2;
+    }
+    return imm12;
+}
+
+static void split_offset_vector(TCGContext *s, TCGReg *addr, intptr_t offset)
+{
+    if (offset != 0) {
+        if (offset == sextreg(offset, 0, 12)) {
+            tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP0, *addr, offset);
+        } else {
+            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
+            tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0, *addr);
+        }
+        *addr = TCG_REG_TMP0;
      }
+}
+
+static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
+                         TCGReg addr, intptr_t offset)
+{
+    intptr_t imm12;
        switch (opc) {
      case OPC_SB:
      case OPC_SH:
      case OPC_SW:
      case OPC_SD:
+        imm12 = split_offset_scalar(s, &addr, offset);
          tcg_out_opc_store(s, opc, addr, data, imm12);
          break;
      case OPC_LB:
@@ -845,8 +904,31 @@ static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
      case OPC_LW:
      case OPC_LWU:
      case OPC_LD:
+        imm12 = split_offset_scalar(s, &addr, offset);
          tcg_out_opc_imm(s, opc, data, addr, imm12);
          break;
+    case OPC_VSE8_V:
+    case OPC_VSE16_V:
+    case OPC_VSE32_V:
+    case OPC_VSE64_V:
+    case OPC_VS1R_V:
+    case OPC_VS2R_V:
+    case OPC_VS4R_V:
+    case OPC_VS8R_V:
+        split_offset_vector(s, &addr, offset);
+        tcg_out_opc_ldst_vec(s, opc, data, addr, true);
+        break;
+    case OPC_VLE8_V:
+    case OPC_VLE16_V:
+    case OPC_VLE32_V:
+    case OPC_VLE64_V:
+    case OPC_VL1RE64_V:
+    case OPC_VL2RE64_V:
+    case OPC_VL4RE64_V:
+    case OPC_VL8RE64_V:
+        split_offset_vector(s, &addr, offset);
+        tcg_out_opc_ldst_vec(s, opc, data, addr, true);
+        break;
      default:
          g_assert_not_reached();
      }

This is more complicated than it needs to be, calling a combined function, then using a switch to separate, then calling separate functions.  Calling separate functions in the first place is simpler.  E.g.

static void tcg_out_vec_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
                             TCGReg addr, intptr_t offset)
{
    tcg_debug_assert(data >= TCG_REG_V0);
    tcg_debug_assert(addr < TCG_REG_V0);

    if (offset) {
        tcg_debug_assert(addr != TCG_REG_ZERO);
        if (offset == sextreg(offset, 0, 12)) {
            tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP0, addr, offset);
        } else {
            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, offset);
            tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0, addr);
        }
        addr = TCG_REG_TMP0;
    }

    tcg_out32(s, opc | ((data & 0x1f) << 7) | (addr << 15) | (1 << 25));
}

  static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
                         TCGReg arg1, intptr_t arg2)
  {
-    RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_LW : OPC_LD;
+    RISCVInsn insn;
+
+    if (type < TCG_TYPE_V64) {
+        insn = (type == TCG_TYPE_I32) ? OPC_LW : OPC_LD;
+    } else {
+        int nf = get_vec_type_bytes(type) / riscv_vlenb;
+
+        switch (nf) {
+        case 1:
+            insn = OPC_VL1RE64_V;
+            break;
+        case 2:
+            insn = OPC_VL2RE64_V;
+            break;
+        case 4:
+            insn = OPC_VL4RE64_V;
+            break;
+        case 8:
+            insn = OPC_VL8RE64_V;
+            break;
+        default:
+            {
+                int prev_vsew = riscv_set_vec_config_vl(s, type);
+
+                switch (prev_vsew) {
+                case MO_8:
+                    insn = OPC_VLE8_V;
+                    break;
+                case MO_16:
+                    insn = OPC_VLE16_V;
+                    break;
+                case MO_32:
+                    insn = OPC_VLE32_V;
+                    break;
+                case MO_64:
+                    insn = OPC_VLE64_V;
+                    break;
+                default:
+                    g_assert_not_reached();
+                }
+            }
+            break;

This can be simplified:

    switch (type) {
    case TCG_TYPE_I32:
        tcg_out_ldst(s, OPC_LW, data, base, offset);
        break;
    case TCG_TYPE_I64:
        tcg_out_ldst(s, OPC_LD, data, base, offset);
        break;
    case TCG_TYPE_V64:
    case TCG_TYPE_V128:
    case TCG_TYPE_V256:
        if (type >= riscv_lg2_vlenb) {
            static const RISCVInsn whole_reg_ld[] = {
                OPC_VL1RE64_V, OPC_VL2RE64_V, OPC_VL4RE64_V, OPC_VL8RE64_V
            };
            unsigned idx = type - riscv_lg2_vlenb;
            insn = whole_reg_ld[idx];
        } else {
            static const RISCVInsn unit_stride_ld[] = {
                OPC_VLE8_V, OPC_VLE16_V, OPC_VLE32_V, OPC_VLE64_V
            };
            MemOp prev_vsew = set_vtype_len(s, type);
            insn = unit_stride_ld[prev_vsew];
        }
        tcg_out_vec_ldst(s, insn, data, base, offset);
        break;
    default:
        g_assert_not_reached();
    }

and similar for store.

Great. We will take this way.

Zhiwei



r~



reply via email to

[Prev in Thread] Current Thread [Next in Thread]