qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 1/8] target/ppc: Optimize emulation of lvsl and lvsr


From: Stefan Brankovic
Subject: [Qemu-devel] [PATCH 1/8] target/ppc: Optimize emulation of lvsl and lvsr instructions
Date: Thu, 6 Jun 2019 12:15:23 +0200

Adding simple macro that is calling tcg implementation of appropriate
instruction if altivec support is active.

Optimization of altivec instruction lvsl (Load Vector for Shift Left).
Place bytes sh:sh+15 of value 0x00 || 0x01 || 0x02 || ... || 0x1E || 0x1F
in destination register. Sh is calculated by adding 2 source registers and
getting bits 60-63 of result.

First we place bits [28-31] of EA to variable sh. After that
we create bytes sh:(sh+7) of X(from description) in for loop
(by incrementing sh in each iteration and placing it in
appropriate byte of variable result) and save them in higher
doubleword element of vD. We repeat this once again for lower
doubleword element of vD by creating bytes (sh+8):(sh+15) in
a for loop and saving result.

Optimization of altivec instruction lvsr (Load Vector for Shift Right).
Place bytes 16-sh:31-sh of value 0x00 || 0x01 || 0x02 || ... || 0x1E ||
0x1F in destination register. Sh is calculated by adding 2 source
registers and getting bits 60-63 of result.

First we place bits [28-31] of EA to variable sh. After that
we create bytes (16-sh):(23-sh) of X(from description) in for loop
(by incrementing sh in each iteration and placing it in
appropriate byte of variable result) and save them in higher
doubleword element of vD. We repeat this once again for lower
doubleword element of vD by creating bytes (24-sh):(32-sh) in
a for loop and saving result.

Signed-off-by: Stefan Brankovic <address@hidden>
---
 target/ppc/translate/vmx-impl.inc.c | 143 ++++++++++++++++++++++++++++--------
 1 file changed, 111 insertions(+), 32 deletions(-)

diff --git a/target/ppc/translate/vmx-impl.inc.c 
b/target/ppc/translate/vmx-impl.inc.c
index bd3ff40..140bb05 100644
--- a/target/ppc/translate/vmx-impl.inc.c
+++ b/target/ppc/translate/vmx-impl.inc.c
@@ -142,38 +142,6 @@ GEN_VR_STVE(bx, 0x07, 0x04, 1);
 GEN_VR_STVE(hx, 0x07, 0x05, 2);
 GEN_VR_STVE(wx, 0x07, 0x06, 4);
 
-static void gen_lvsl(DisasContext *ctx)
-{
-    TCGv_ptr rd;
-    TCGv EA;
-    if (unlikely(!ctx->altivec_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VPU);
-        return;
-    }
-    EA = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
-    rd = gen_avr_ptr(rD(ctx->opcode));
-    gen_helper_lvsl(rd, EA);
-    tcg_temp_free(EA);
-    tcg_temp_free_ptr(rd);
-}
-
-static void gen_lvsr(DisasContext *ctx)
-{
-    TCGv_ptr rd;
-    TCGv EA;
-    if (unlikely(!ctx->altivec_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VPU);
-        return;
-    }
-    EA = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
-    rd = gen_avr_ptr(rD(ctx->opcode));
-    gen_helper_lvsr(rd, EA);
-    tcg_temp_free(EA);
-    tcg_temp_free_ptr(rd);
-}
-
 static void gen_mfvscr(DisasContext *ctx)
 {
     TCGv_i32 t;
@@ -316,6 +284,16 @@ static void glue(gen_, name)(DisasContext *ctx)            
             \
     tcg_temp_free_ptr(rd);                                              \
 }
 
+#define GEN_VXFORM_TRANS(name, opc2, opc3)                              \
+static void glue(gen_, name)(DisasContext *ctx)                         \
+{                                                                       \
+    if (unlikely(!ctx->altivec_enabled)) {                              \
+        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
+        return;                                                         \
+    }                                                                   \
+    trans_##name(ctx);                                                  \
+}
+
 #define GEN_VXFORM_ENV(name, opc2, opc3)                                \
 static void glue(gen_, name)(DisasContext *ctx)                         \
 {                                                                       \
@@ -515,6 +493,105 @@ static void gen_vmrgow(DisasContext *ctx)
     tcg_temp_free_i64(avr);
 }
 
+/*
+ * lvsl VRT,RA,RB - Load Vector for Shift Left
+ *
+ * Let the EA be the sum (rA|0)+(rB). Let sh=EA[28???31].
+ * Let X be the 32-byte value 0x00 || 0x01 || 0x02 || ... || 0x1E || 0x1F.
+ * Bytes sh:sh+15 of X are placed into vD.
+ */
+static void trans_lvsl(DisasContext *ctx)
+{
+    int VT = rD(ctx->opcode);
+    TCGv_i64 result = tcg_temp_new_i64();
+    TCGv_i64 tmp = tcg_temp_new_i64();
+    TCGv_i64 sh = tcg_temp_new_i64();
+    TCGv_i64 EA = tcg_temp_new();
+    int i;
+
+    /* Get sh(from description) by anding EA with 0xf. */
+    gen_addr_reg_index(ctx, EA);
+    tcg_gen_andi_i64(sh, EA, 0xfULL);
+    /*
+     * Create bytes sh:sh+7 of X(from description) and place them in
+     * higher doubleword of vD.
+     */
+    tcg_gen_addi_i64(result, sh, 7);
+    for (i = 7; i >= 1; i--) {
+        tcg_gen_shli_i64(tmp, sh, i * 8);
+        tcg_gen_or_i64(result, result, tmp);
+        tcg_gen_addi_i64(sh, sh, 1);
+    }
+    set_avr64(VT, result, true);
+    /*
+     * Create bytes sh+8:sh+15 of X(from description) and place them in
+     * lower doubleword of vD.
+     */
+    tcg_gen_addi_i64(result, sh, 8);
+    for (i = 7; i >= 1; i--) {
+        tcg_gen_addi_i64(sh, sh, 1);
+        tcg_gen_shli_i64(tmp, sh, i * 8);
+        tcg_gen_or_i64(result, result, tmp);
+    }
+    set_avr64(VT, result, false);
+
+    tcg_temp_free_i64(result);
+    tcg_temp_free_i64(tmp);
+    tcg_temp_free_i64(sh);
+    tcg_temp_free(EA);
+}
+
+/*
+ * lvsr VRT,RA,RB - Load Vector for Shift Right
+ *
+ * Let the EA be the sum (rA|0)+(rB). Let sh=EA[28???31].
+ * Let X be the 32-byte value 0x00 || 0x01 || 0x02 || ... || 0x1E || 0x1F.
+ * Bytes (16-sh):(31-sh) of X are placed into vD.
+ */
+static void trans_lvsr(DisasContext *ctx)
+{
+    int VT = rD(ctx->opcode);
+    TCGv_i64 result = tcg_temp_new_i64();
+    TCGv_i64 tmp = tcg_temp_new_i64();
+    TCGv_i64 sh = tcg_temp_new_i64();
+    TCGv_i64 EA = tcg_temp_new();
+    int i;
+
+    /* Get sh(from description) by anding EA with 0xf. */
+    gen_addr_reg_index(ctx, EA);
+    tcg_gen_andi_i64(sh, EA, 0xfULL);
+    /* Make (16-sh) and save it in sh. */
+    tcg_gen_subi_i64(sh, sh, 0x10ULL);
+    tcg_gen_neg_i64(sh, sh);
+    /*
+     * Create bytes (16-sh):(23-sh) of X(from description) and place them in
+     * higher doubleword of vD.
+     */
+    tcg_gen_addi_i64(result, sh, 7);
+    for (i = 7; i >= 1; i--) {
+        tcg_gen_shli_i64(tmp, sh, i * 8);
+        tcg_gen_or_i64(result, result, tmp);
+        tcg_gen_addi_i64(sh, sh, 1);
+    }
+    set_avr64(VT, result, true);
+    /*
+     * Create bytes (24-sh):(32-sh) of X(from description) and place them in
+     * lower doubleword of vD.
+     */
+    tcg_gen_addi_i64(result, sh, 8);
+    for (i = 7; i >= 1; i--) {
+        tcg_gen_addi_i64(sh, sh, 1);
+        tcg_gen_shli_i64(tmp, sh, i * 8);
+        tcg_gen_or_i64(result, result, tmp);
+    }
+    set_avr64(VT, result, false);
+
+    tcg_temp_free_i64(result);
+    tcg_temp_free_i64(tmp);
+    tcg_temp_free_i64(sh);
+    tcg_temp_free(EA);
+}
+
 GEN_VXFORM(vmuloub, 4, 0);
 GEN_VXFORM(vmulouh, 4, 1);
 GEN_VXFORM(vmulouw, 4, 2);
@@ -657,6 +734,8 @@ GEN_VXFORM_DUAL(vmrgow, PPC_NONE, PPC2_ALTIVEC_207,
 GEN_VXFORM_HETRO(vextubrx, 6, 28)
 GEN_VXFORM_HETRO(vextuhrx, 6, 29)
 GEN_VXFORM_HETRO(vextuwrx, 6, 30)
+GEN_VXFORM_TRANS(lvsl, 6, 31)
+GEN_VXFORM_TRANS(lvsr, 6, 32)
 GEN_VXFORM_DUAL(vmrgew, PPC_NONE, PPC2_ALTIVEC_207, \
                 vextuwrx, PPC_NONE, PPC2_ISA300)
 
-- 
2.7.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]