[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 11/22] tcg-i386: Tidy non-immediate arithmetic o
From: |
Aurelien Jarno |
Subject: |
Re: [Qemu-devel] [PATCH 11/22] tcg-i386: Tidy non-immediate arithmetic operations. |
Date: |
Fri, 21 May 2010 11:38:49 +0200 |
User-agent: |
Mutt/1.5.20 (2009-06-14) |
On Wed, Apr 14, 2010 at 10:16:33AM -0700, Richard Henderson wrote:
> Add more OPC values, and tgen_arithr. Use the later throughout.
>
> Note that normal reg/reg arithmetic now uses the Gv,Ev opcode form
> instead of the Ev,Gv opcode form used previously. Both forms
> disassemble properly, and so there's no visible change when diffing
> log files before and after the change. This change makes the operand
> ordering within the output routines more natural, and avoids the need
> to define an OPC_ARITH_EvGv since a read-modify-write with memory is
> not needed within TCG.
>
> Signed-off-by: Richard Henderson <address@hidden>
Acked-by: Aurelien Jarno <address@hidden>
> ---
> tcg/i386/tcg-target.c | 78
> ++++++++++++++++++++++++++++++-------------------
> 1 files changed, 48 insertions(+), 30 deletions(-)
>
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index df1bdfc..b4e8e74 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -165,7 +165,12 @@ static inline int tcg_target_const_match(tcg_target_long
> val,
>
> #define OPC_ARITH_EvIz (0x81)
> #define OPC_ARITH_EvIb (0x83)
> +#define OPC_ARITH_GvEv (0x03) /* ... plus (ARITH_FOO << 3) */
> +#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
> +#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
> +#define OPC_DEC_r32 (0x48)
> #define OPC_BSWAP (0xc8 | P_EXT)
> +#define OPC_INC_r32 (0x40)
> #define OPC_JCC_long (0x80 | P_EXT) /* ... plus condition code */
> #define OPC_JCC_short (0x70) /* ... plus condition code */
> #define OPC_JMP_long (0xe9)
> @@ -180,6 +185,7 @@ static inline int tcg_target_const_match(tcg_target_long
> val,
> #define OPC_SHIFT_1 (0xd1)
> #define OPC_SHIFT_Ib (0xc1)
> #define OPC_SHIFT_cl (0xd3)
> +#define OPC_TESTL (0x85)
>
> /* Group 1 opcode extensions for 0x80-0x83. */
> #define ARITH_ADD 0
> @@ -280,6 +286,12 @@ static inline void tcg_out_modrm_offset(TCGContext *s,
> int opc, int r, int rm,
> }
> }
>
> +/* Generate dest op= src. Uses the same ARITH_* codes as tgen_arithi. */
> +static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
> +{
> + tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3), dest, src);
> +}
> +
> static inline void tcg_out_mov(TCGContext *s, int ret, int arg)
> {
> if (arg != ret) {
> @@ -291,8 +303,7 @@ static inline void tcg_out_movi(TCGContext *s, TCGType
> type,
> int ret, int32_t arg)
> {
> if (arg == 0) {
> - /* xor r0,r0 */
> - tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), ret, ret);
> + tgen_arithr(s, ARITH_XOR, ret, ret);
> } else {
> tcg_out8(s, 0xb8 + ret);
> tcg_out32(s, arg);
> @@ -374,14 +385,15 @@ static inline void tcg_out_rolw_8(TCGContext *s, int
> reg)
> tcg_out_shifti(s, SHIFT_ROL, reg, 8);
> }
>
> -static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val,
> int cf)
> +static inline void tgen_arithi(TCGContext *s, int c, int r0,
> + int32_t val, int cf)
> {
> - if (!cf && ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val ==
> -1))) {
> - /* inc */
> - tcg_out_opc(s, 0x40 + r0);
> - } else if (!cf && ((c == ARITH_ADD && val == -1) || (c == ARITH_SUB &&
> val == 1))) {
> - /* dec */
> - tcg_out_opc(s, 0x48 + r0);
> + /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
> + partial flags update stalls on Pentium4 and are not recommended
> + by current Intel optimization manuals. */
> + if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val ==
> -1)) {
> + int opc = ((c == ARITH_ADD) ^ (val < 0) ? OPC_INC_r32 : OPC_DEC_r32);
> + tcg_out_opc(s, opc + r0);
> } else if (val == (int8_t)val) {
> tcg_out_modrm(s, OPC_ARITH_EvIb, c, r0);
> tcg_out8(s, val);
> @@ -454,12 +466,12 @@ static void tcg_out_cmp(TCGContext *s, TCGArg arg1,
> TCGArg arg2,
> if (const_arg2) {
> if (arg2 == 0) {
> /* test r, r */
> - tcg_out_modrm(s, 0x85, arg1, arg1);
> + tcg_out_modrm(s, OPC_TESTL, arg1, arg1);
> } else {
> tgen_arithi(s, ARITH_CMP, arg1, arg2, 0);
> }
> } else {
> - tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
> + tgen_arithr(s, ARITH_CMP, arg1, arg2);
> }
> }
>
> @@ -674,7 +686,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg
> *args,
> tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_read));
>
> /* cmp 0(r1), r0 */
> - tcg_out_modrm_offset(s, 0x3b, r0, r1, 0);
> + tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0);
>
> tcg_out_mov(s, r0, addr_reg);
>
> @@ -690,7 +702,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg
> *args,
> s->code_ptr++;
>
> /* cmp 4(r1), addr_reg2 */
> - tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4);
> + tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4);
>
> /* je label1 */
> tcg_out8(s, OPC_JCC_short + JCC_JE);
> @@ -749,7 +761,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg
> *args,
> *label1_ptr = s->code_ptr - label1_ptr - 1;
>
> /* add x(r1), r0 */
> - tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) -
> + tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
> + offsetof(CPUTLBEntry, addend) -
> offsetof(CPUTLBEntry, addr_read));
> #else
> r0 = addr_reg;
> @@ -864,7 +877,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg
> *args,
> tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_write));
>
> /* cmp 0(r1), r0 */
> - tcg_out_modrm_offset(s, 0x3b, r0, r1, 0);
> + tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0);
>
> tcg_out_mov(s, r0, addr_reg);
>
> @@ -880,7 +893,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg
> *args,
> s->code_ptr++;
>
> /* cmp 4(r1), addr_reg2 */
> - tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4);
> + tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4);
>
> /* je label1 */
> tcg_out8(s, OPC_JCC_short + JCC_JE);
> @@ -961,7 +974,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg
> *args,
> *label1_ptr = s->code_ptr - label1_ptr - 1;
>
> /* add x(r1), r0 */
> - tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) -
> + tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
> + offsetof(CPUTLBEntry, addend) -
> offsetof(CPUTLBEntry, addr_write));
> #else
> r0 = addr_reg;
> @@ -1113,7 +1127,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode
> opc,
> if (const_args[2]) {
> tgen_arithi(s, c, args[0], args[2], 0);
> } else {
> - tcg_out_modrm(s, 0x01 | (c << 3), args[2], args[0]);
> + tgen_arithr(s, c, args[0], args[2]);
> }
> break;
> case INDEX_op_mul_i32:
> @@ -1163,24 +1177,28 @@ static inline void tcg_out_op(TCGContext *s,
> TCGOpcode opc,
> goto gen_shift32;
>
> case INDEX_op_add2_i32:
> - if (const_args[4])
> + if (const_args[4]) {
> tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
> - else
> - tcg_out_modrm(s, 0x01 | (ARITH_ADD << 3), args[4], args[0]);
> - if (const_args[5])
> + } else {
> + tgen_arithr(s, ARITH_ADD, args[0], args[4]);
> + }
> + if (const_args[5]) {
> tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
> - else
> - tcg_out_modrm(s, 0x01 | (ARITH_ADC << 3), args[5], args[1]);
> + } else {
> + tgen_arithr(s, ARITH_ADC, args[0], args[5]);
> + }
> break;
> case INDEX_op_sub2_i32:
> - if (const_args[4])
> + if (const_args[4]) {
> tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
> - else
> - tcg_out_modrm(s, 0x01 | (ARITH_SUB << 3), args[4], args[0]);
> - if (const_args[5])
> + } else {
> + tgen_arithr(s, ARITH_SUB, args[0], args[4]);
> + }
> + if (const_args[5]) {
> tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
> - else
> - tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
> + } else {
> + tgen_arithr(s, ARITH_SBB, args[1], args[5]);
> + }
> break;
> case INDEX_op_brcond_i32:
> tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
> --
> 1.6.6.1
>
>
>
>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
address@hidden http://www.aurel32.net
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- Re: [Qemu-devel] [PATCH 11/22] tcg-i386: Tidy non-immediate arithmetic operations.,
Aurelien Jarno <=