[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 05/21] tcg-i386: Tidy bswap operations.
From: |
Richard Henderson |
Subject: |
[Qemu-devel] [PATCH 05/21] tcg-i386: Tidy bswap operations. |
Date: |
Tue, 13 Apr 2010 16:33:59 -0700 |
Define OPC_BSWAP. Factor opcode emission to separate functions.
Use bswap+shift to implement 16-bit swap instead of a rolw; this
gets the proper zero-extension required by INDEX_op_bswap16_i32.
Signed-off-by: Richard Henderson <address@hidden>
---
tcg/i386/tcg-target.c | 53 +++++++++++++++++++++++++------------------------
1 files changed, 27 insertions(+), 26 deletions(-)
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 75b9915..0bafd00 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -163,6 +163,7 @@ static inline int tcg_target_const_match(tcg_target_long
val,
#define P_EXT 0x100 /* 0x0f opcode prefix */
+#define OPC_BSWAP (0xc8 | P_EXT)
#define OPC_MOVZBL (0xb6 | P_EXT)
#define OPC_MOVZWL (0xb7 | P_EXT)
#define OPC_MOVSBL (0xbe | P_EXT)
@@ -339,6 +340,22 @@ static inline void tcg_out_ext16s(TCGContext *s, int dest,
int src)
tcg_out_modrm(s, OPC_MOVSWL, dest, src);
}
+static inline void tcg_out_bswap32(TCGContext *s, int reg)
+{
+ tcg_out_opc(s, OPC_BSWAP + reg);
+}
+
+static inline void tcg_out_bswap16(TCGContext *s, int reg, int sign)
+{
+ /* This swap+shift combination guarantees that the high part contains
+ the sign or zero extension required. It also doesn't suffer the
+ problem of partial register stalls that using rolw does. */
+ tcg_out_bswap32(s, reg);
+ /* shr $16, dest */
+ tcg_out_modrm(s, 0xc1, (sign ? SHIFT_SAR : SHIFT_SHR), reg);
+ tcg_out8(s, 16);
+}
+
static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val, int
cf)
{
if (!cf && ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val ==
-1))) {
@@ -745,31 +762,21 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg
*args,
/* movzwl */
tcg_out_modrm_offset(s, OPC_MOVZWL, data_reg, r0, GUEST_BASE);
if (bswap) {
- /* rolw $8, data_reg */
- tcg_out8(s, 0x66);
- tcg_out_modrm(s, 0xc1, 0, data_reg);
- tcg_out8(s, 8);
+ tcg_out_bswap16(s, data_reg, 0);
}
break;
case 1 | 4:
/* movswl */
tcg_out_modrm_offset(s, OPC_MOVSWL, data_reg, r0, GUEST_BASE);
if (bswap) {
- /* rolw $8, data_reg */
- tcg_out8(s, 0x66);
- tcg_out_modrm(s, 0xc1, 0, data_reg);
- tcg_out8(s, 8);
-
- /* movswl data_reg, data_reg */
- tcg_out_modrm(s, OPC_MOVSWL, data_reg, data_reg);
+ tcg_out_bswap16(s, data_reg, 1);
}
break;
case 2:
/* movl (r0), data_reg */
tcg_out_modrm_offset(s, 0x8b, data_reg, r0, GUEST_BASE);
if (bswap) {
- /* bswap */
- tcg_out_opc(s, (0xc8 + data_reg) | P_EXT);
+ tcg_out_bswap32(s, data_reg);
}
break;
case 3:
@@ -786,11 +793,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg
*args,
tcg_out_modrm_offset(s, 0x8b, data_reg2, r0, GUEST_BASE + 4);
} else {
tcg_out_modrm_offset(s, 0x8b, data_reg, r0, GUEST_BASE + 4);
- tcg_out_opc(s, (0xc8 + data_reg) | P_EXT);
+ tcg_out_bswap32(s, data_reg);
tcg_out_modrm_offset(s, 0x8b, data_reg2, r0, GUEST_BASE);
- /* bswap */
- tcg_out_opc(s, (0xc8 + data_reg2) | P_EXT);
+ tcg_out_bswap32(s, data_reg2);
}
break;
default:
@@ -982,8 +988,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg
*args,
case 2:
if (bswap) {
tcg_out_mov(s, r1, data_reg);
- /* bswap data_reg */
- tcg_out_opc(s, (0xc8 + r1) | P_EXT);
+ tcg_out_bswap32(s, r1);
data_reg = r1;
}
/* movl */
@@ -992,12 +997,10 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg
*args,
case 3:
if (bswap) {
tcg_out_mov(s, r1, data_reg2);
- /* bswap data_reg */
- tcg_out_opc(s, (0xc8 + r1) | P_EXT);
+ tcg_out_bswap32(s, r1);
tcg_out_modrm_offset(s, 0x89, r1, r0, GUEST_BASE);
tcg_out_mov(s, r1, data_reg);
- /* bswap data_reg */
- tcg_out_opc(s, (0xc8 + r1) | P_EXT);
+ tcg_out_bswap32(s, r1);
tcg_out_modrm_offset(s, 0x89, r1, r0, GUEST_BASE + 4);
} else {
tcg_out_modrm_offset(s, 0x89, data_reg, r0, GUEST_BASE);
@@ -1195,12 +1198,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode
opc,
break;
case INDEX_op_bswap16_i32:
- tcg_out8(s, 0x66);
- tcg_out_modrm(s, 0xc1, SHIFT_ROL, args[0]);
- tcg_out8(s, 8);
+ tcg_out_bswap16(s, args[0], 0);
break;
case INDEX_op_bswap32_i32:
- tcg_out_opc(s, (0xc8 + args[0]) | P_EXT);
+ tcg_out_bswap32(s, args[0]);
break;
case INDEX_op_neg_i32:
--
1.6.2.5
[Qemu-devel] [PATCH 10/21] tcg-i386: Tidy immediate arithmetic operations., Richard Henderson, 2010/04/14
[Qemu-devel] [PATCH 06/21] tcg-i386: Tidy shift operations., Richard Henderson, 2010/04/14
[Qemu-devel] [PATCH 11/21] tcg-i386: Tidy non-immediate arithmetic operations., Richard Henderson, 2010/04/14
[Qemu-devel] [PATCH 09/21] tcg-i386: Tidy jumps., Richard Henderson, 2010/04/14
[Qemu-devel] [PATCH 08/21] tcg-i386: Eliminate extra move from qemu_ld64., Richard Henderson, 2010/04/14
[Qemu-devel] [PATCH 07/21] tcg-i386: Tidy move operations., Richard Henderson, 2010/04/14