qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 11/22] tcg-i386: Tidy non-immediate arithmetic o


From: Aurelien Jarno
Subject: Re: [Qemu-devel] [PATCH 11/22] tcg-i386: Tidy non-immediate arithmetic operations.
Date: Fri, 21 May 2010 11:38:49 +0200
User-agent: Mutt/1.5.20 (2009-06-14)

On Wed, Apr 14, 2010 at 10:16:33AM -0700, Richard Henderson wrote:
> Add more OPC values, and tgen_arithr.  Use the later throughout.
> 
> Note that normal reg/reg arithmetic now uses the Gv,Ev opcode form
> instead of the Ev,Gv opcode form used previously.  Both forms
> disassemble properly, and so there's no visible change when diffing
> log files before and after the change.  This change makes the operand
> ordering within the output routines more natural, and avoids the need
> to define an OPC_ARITH_EvGv since a read-modify-write with memory is
> not needed within TCG.
> 
> Signed-off-by: Richard Henderson <address@hidden>

Acked-by: Aurelien Jarno <address@hidden>

> ---
>  tcg/i386/tcg-target.c |   78 
> ++++++++++++++++++++++++++++++-------------------
>  1 files changed, 48 insertions(+), 30 deletions(-)
> 
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index df1bdfc..b4e8e74 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -165,7 +165,12 @@ static inline int tcg_target_const_match(tcg_target_long 
> val,
>  
>  #define OPC_ARITH_EvIz       (0x81)
>  #define OPC_ARITH_EvIb       (0x83)
> +#define OPC_ARITH_GvEv       (0x03)          /* ... plus (ARITH_FOO << 3) */
> +#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
> +#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
> +#define OPC_DEC_r32  (0x48)
>  #define OPC_BSWAP    (0xc8 | P_EXT)
> +#define OPC_INC_r32  (0x40)
>  #define OPC_JCC_long (0x80 | P_EXT)  /* ... plus condition code */
>  #define OPC_JCC_short        (0x70)          /* ... plus condition code */
>  #define OPC_JMP_long (0xe9)
> @@ -180,6 +185,7 @@ static inline int tcg_target_const_match(tcg_target_long 
> val,
>  #define OPC_SHIFT_1  (0xd1)
>  #define OPC_SHIFT_Ib (0xc1)
>  #define OPC_SHIFT_cl (0xd3)
> +#define OPC_TESTL    (0x85)
>  
>  /* Group 1 opcode extensions for 0x80-0x83.  */
>  #define ARITH_ADD 0
> @@ -280,6 +286,12 @@ static inline void tcg_out_modrm_offset(TCGContext *s, 
> int opc, int r, int rm,
>      }
>  }
>  
> +/* Generate dest op= src.  Uses the same ARITH_* codes as tgen_arithi.  */
> +static inline void tgen_arithr(TCGContext *s, int subop, int dest, int src)
> +{
> +    tcg_out_modrm(s, OPC_ARITH_GvEv + (subop << 3), dest, src);
> +}
> +
>  static inline void tcg_out_mov(TCGContext *s, int ret, int arg)
>  {
>      if (arg != ret) {
> @@ -291,8 +303,7 @@ static inline void tcg_out_movi(TCGContext *s, TCGType 
> type,
>                                  int ret, int32_t arg)
>  {
>      if (arg == 0) {
> -        /* xor r0,r0 */
> -        tcg_out_modrm(s, 0x01 | (ARITH_XOR << 3), ret, ret);
> +        tgen_arithr(s, ARITH_XOR, ret, ret);
>      } else {
>          tcg_out8(s, 0xb8 + ret);
>          tcg_out32(s, arg);
> @@ -374,14 +385,15 @@ static inline void tcg_out_rolw_8(TCGContext *s, int 
> reg)
>      tcg_out_shifti(s, SHIFT_ROL, reg, 8);
>  }
>  
> -static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val, 
> int cf)
> +static inline void tgen_arithi(TCGContext *s, int c, int r0,
> +                               int32_t val, int cf)
>  {
> -    if (!cf && ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == 
> -1))) {
> -        /* inc */
> -        tcg_out_opc(s, 0x40 + r0);
> -    } else if (!cf && ((c == ARITH_ADD && val == -1) || (c == ARITH_SUB && 
> val == 1))) {
> -        /* dec */
> -        tcg_out_opc(s, 0x48 + r0);
> +    /* ??? While INC is 2 bytes shorter than ADDL $1, they also induce
> +       partial flags update stalls on Pentium4 and are not recommended
> +       by current Intel optimization manuals.  */
> +    if (!cf && (c == ARITH_ADD || c == ARITH_SUB) && (val == 1 || val == 
> -1)) {
> +        int opc = ((c == ARITH_ADD) ^ (val < 0) ? OPC_INC_r32 : OPC_DEC_r32);
> +        tcg_out_opc(s, opc + r0);
>      } else if (val == (int8_t)val) {
>          tcg_out_modrm(s, OPC_ARITH_EvIb, c, r0);
>          tcg_out8(s, val);
> @@ -454,12 +466,12 @@ static void tcg_out_cmp(TCGContext *s, TCGArg arg1, 
> TCGArg arg2,
>      if (const_arg2) {
>          if (arg2 == 0) {
>              /* test r, r */
> -            tcg_out_modrm(s, 0x85, arg1, arg1);
> +            tcg_out_modrm(s, OPC_TESTL, arg1, arg1);
>          } else {
>              tgen_arithi(s, ARITH_CMP, arg1, arg2, 0);
>          }
>      } else {
> -        tcg_out_modrm(s, 0x01 | (ARITH_CMP << 3), arg2, arg1);
> +        tgen_arithr(s, ARITH_CMP, arg1, arg2);
>      }
>  }
>  
> @@ -674,7 +686,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
> *args,
>      tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_read));
>  
>      /* cmp 0(r1), r0 */
> -    tcg_out_modrm_offset(s, 0x3b, r0, r1, 0);
> +    tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0);
>      
>      tcg_out_mov(s, r0, addr_reg);
>      
> @@ -690,7 +702,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
> *args,
>      s->code_ptr++;
>      
>      /* cmp 4(r1), addr_reg2 */
> -    tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4);
> +    tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4);
>  
>      /* je label1 */
>      tcg_out8(s, OPC_JCC_short + JCC_JE);
> @@ -749,7 +761,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
> *args,
>      *label1_ptr = s->code_ptr - label1_ptr - 1;
>  
>      /* add x(r1), r0 */
> -    tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) - 
> +    tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
> +                         offsetof(CPUTLBEntry, addend) - 
>                           offsetof(CPUTLBEntry, addr_read));
>  #else
>      r0 = addr_reg;
> @@ -864,7 +877,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
> *args,
>      tcg_out32(s, offsetof(CPUState, tlb_table[mem_index][0].addr_write));
>  
>      /* cmp 0(r1), r0 */
> -    tcg_out_modrm_offset(s, 0x3b, r0, r1, 0);
> +    tcg_out_modrm_offset(s, OPC_CMP_GvEv, r0, r1, 0);
>      
>      tcg_out_mov(s, r0, addr_reg);
>      
> @@ -880,7 +893,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
> *args,
>      s->code_ptr++;
>      
>      /* cmp 4(r1), addr_reg2 */
> -    tcg_out_modrm_offset(s, 0x3b, addr_reg2, r1, 4);
> +    tcg_out_modrm_offset(s, OPC_CMP_GvEv, addr_reg2, r1, 4);
>  
>      /* je label1 */
>      tcg_out8(s, OPC_JCC_short + JCC_JE);
> @@ -961,7 +974,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
> *args,
>      *label1_ptr = s->code_ptr - label1_ptr - 1;
>  
>      /* add x(r1), r0 */
> -    tcg_out_modrm_offset(s, 0x03, r0, r1, offsetof(CPUTLBEntry, addend) - 
> +    tcg_out_modrm_offset(s, OPC_ADD_GvEv, r0, r1,
> +                         offsetof(CPUTLBEntry, addend) - 
>                           offsetof(CPUTLBEntry, addr_write));
>  #else
>      r0 = addr_reg;
> @@ -1113,7 +1127,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
> opc,
>          if (const_args[2]) {
>              tgen_arithi(s, c, args[0], args[2], 0);
>          } else {
> -            tcg_out_modrm(s, 0x01 | (c << 3), args[2], args[0]);
> +            tgen_arithr(s, c, args[0], args[2]);
>          }
>          break;
>      case INDEX_op_mul_i32:
> @@ -1163,24 +1177,28 @@ static inline void tcg_out_op(TCGContext *s, 
> TCGOpcode opc,
>          goto gen_shift32;
>  
>      case INDEX_op_add2_i32:
> -        if (const_args[4]) 
> +        if (const_args[4]) {
>              tgen_arithi(s, ARITH_ADD, args[0], args[4], 1);
> -        else
> -            tcg_out_modrm(s, 0x01 | (ARITH_ADD << 3), args[4], args[0]);
> -        if (const_args[5]) 
> +        } else {
> +            tgen_arithr(s, ARITH_ADD, args[0], args[4]);
> +        }
> +        if (const_args[5]) {
>              tgen_arithi(s, ARITH_ADC, args[1], args[5], 1);
> -        else
> -            tcg_out_modrm(s, 0x01 | (ARITH_ADC << 3), args[5], args[1]);
> +        } else {
> +            tgen_arithr(s, ARITH_ADC, args[0], args[5]);
> +        }
>          break;
>      case INDEX_op_sub2_i32:
> -        if (const_args[4]) 
> +        if (const_args[4]) {
>              tgen_arithi(s, ARITH_SUB, args[0], args[4], 1);
> -        else
> -            tcg_out_modrm(s, 0x01 | (ARITH_SUB << 3), args[4], args[0]);
> -        if (const_args[5]) 
> +        } else {
> +            tgen_arithr(s, ARITH_SUB, args[0], args[4]);
> +        }
> +        if (const_args[5]) {
>              tgen_arithi(s, ARITH_SBB, args[1], args[5], 1);
> -        else
> -            tcg_out_modrm(s, 0x01 | (ARITH_SBB << 3), args[5], args[1]);
> +        } else {
> +            tgen_arithr(s, ARITH_SBB, args[1], args[5]);
> +        }
>          break;
>      case INDEX_op_brcond_i32:
>          tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
> -- 
> 1.6.6.1
> 
> 
> 
> 

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
address@hidden                 http://www.aurel32.net



reply via email to

[Prev in Thread] Current Thread [Next in Thread]