qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH][RFC] x86: use globals for CPU registers


From: Aurelien Jarno
Subject: Re: [Qemu-devel] [PATCH][RFC] x86: use globals for CPU registers
Date: Sun, 27 Sep 2009 00:41:56 +0200
User-agent: Mutt/1.5.18 (2008-05-17)

On Sun, Sep 13, 2009 at 11:00:08PM +0200, Laurent Desnogues wrote:
> Hello,
> 
> this patch is a proposal to use globals for the 8 or 16 CPU
> registers on i386 and x86_64.
> 
> I measured the improvement in the following conditions:
> 
>   - Machine:  i7 920
>   - Software:  Fedora11 x86_64 gcc 4.4.1
>   - Benchmark: SPEC2000 gcc with expr.i input
>   - User mode
>   - i386 and x86_64 hosts and targets, with and without the patch
>     (8 combinations)
> 
> The results are:
> 
> qemu-i386_on-i386          15.82user 0.05system 0:15.91elapsed
> qemu-i386_on-i386-reg      15.40user 0.02system 0:15.43elapsed
> qemu-i386_on-x86_64        15.65user 0.05system 0:15.71elapsed
> qemu-i386_on-x86_64-reg    15.11user 0.03system 0:15.15elapsed
> qemu-x86_64_on-i386        mmap: No such device or address
> qemu-x86_64_on-i386-reg    mmap: No such device or address
> qemu-x86_64_on-x86_64      18.42user 0.07system 0:18.49elapsed
> qemu-x86_64_on-x86_64-reg  13.22user 0.06system 0:13.31elapsed
> 
> Given my lack of knowledge of system QEMU, I will leave it to
> someone else to measure the speedup.

I'll try to provide benchmarks later.

> A previous version of that patch, that only handled i386 target,
> was tested by Malc who got speedup running OpenSSL on his G4.  It
> was also sent to Fabrice who asked me to send it to the mailing
> list.
> 
> The usage of globals is controlled by USE_REGS so that reviewers
> can quickly test the benefit (or the lack of it).
> 
> Comments are welcome (except for the obvious presence of //
> which is only temporary).  I need to optimize a few things once
> I'm sure the temporaries (cpu_tmp0, ...) are not used outside of
> the modified functions.  x86_64 was coded in a hurry and is
> perhaps buggy.
>

It basically looks good. Please find my comments inline.

> Laurent
> 
> Signed-off-by: Laurent Desnogues <address@hidden>

> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 335fc08..dc2fcde 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -58,10 +58,15 @@
>  
>  //#define MACRO_TEST   1
>  
> +#define USE_REGS
> +
>  /* global register indexes */
>  static TCGv_ptr cpu_env;
>  static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp;
>  static TCGv_i32 cpu_cc_op;
> +#ifdef USE_REGS
> +static TCGv cpu_regs[CPU_NB_REGS];
> +#endif
>  /* local temps */
>  static TCGv cpu_T[2], cpu_T3;
>  /* local register indexes (only used inside old micro ops) */
> @@ -269,70 +274,95 @@ static inline void gen_op_andl_A0_ffff(void)
>  #define REG_LH_OFFSET 4
>  #endif
>  
> +#ifdef USE_REGS
> +#ifdef TARGET_X86_64
> +/* #warning NYI */
> +#endif
> +
>  static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
>  {
> +    TCGv tmp;
> +
>      switch(ot) {
>      case OT_BYTE:
> +        tmp = tcg_temp_new();
> +        tcg_gen_andi_tl(tmp, t0, 0xff);

           tcg_gen_ext8u_tl(tmp, t0); ?

>          if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
> -            tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_B_OFFSET);
> +            tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xff);
> +            tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
> +            //tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_B_OFFSET);
>          } else {
> -            tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + 
> REG_H_OFFSET);
> +            tcg_gen_shli_tl(tmp, tmp, 8);
> +            tcg_gen_andi_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], ~0xff00);
> +            tcg_gen_or_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], tmp);
> +            //tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) 
> + REG_H_OFFSET);
>          }
> +        tcg_temp_free(tmp);
>          break;
>      case OT_WORD:
> -        tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_W_OFFSET);
> +        tmp = tcg_temp_new();
> +        tcg_gen_andi_tl(tmp, t0, 0xffff);

           tcg_gen_ext16u_tl(tmp, t0); ?

> +        tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
> +        tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
> +        tcg_temp_free(tmp);
> +        //tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_W_OFFSET);
>          break;
>  #ifdef TARGET_X86_64
>      case OT_LONG:
> -        tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_L_OFFSET);
>          /* high part of register set to zero */
> -        tcg_gen_movi_tl(cpu_tmp0, 0);
> -        tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_LH_OFFSET);
> +        tcg_gen_ext32u_tl(cpu_regs[reg], t0);
> +        //tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_L_OFFSET);
> +        /* high part of register set to zero */
> +        //tcg_gen_movi_tl(cpu_tmp0, 0);
> +        //tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_LH_OFFSET);
>          break;
>      default:
>      case OT_QUAD:
> -        tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
> +        tcg_gen_mov_tl(cpu_regs[reg], t0);
> +        //tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
>          break;
>  #else
>      default:
>      case OT_LONG:
> -        tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_L_OFFSET);
> +        tcg_gen_mov_tl(cpu_regs[reg], t0);
> +        //tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_L_OFFSET);
>          break;
>  #endif

        tcg_gen_ext32u_tl(cpu_regs[reg], t0) is equivalent to 
        tcg_gen_mov_tl(cpu_regs[reg], t0) if TARGET_LONG_BITS == 32, ie
        if !TARGET_X86_64. This means the OT_LONG can now be common, with
        the #ifdef only for OT_QUAD.

>      }
>  }
>  
> -static inline void gen_op_mov_reg_T0(int ot, int reg)
> -{
> -    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
> -}
> -
> -static inline void gen_op_mov_reg_T1(int ot, int reg)
> -{
> -    gen_op_mov_reg_v(ot, reg, cpu_T[1]);
> -}
> -
>  static inline void gen_op_mov_reg_A0(int size, int reg)
>  {
> +    TCGv tmp;
> +
>      switch(size) {
>      case 0:
> -        tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_W_OFFSET);
> +        tmp = tcg_temp_new();
> +        tcg_gen_andi_tl(tmp, cpu_A0, 0xffff);

           tcg_gen_ext16u_tl(tmp, t0); ?

> +        tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
> +        tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
> +        tcg_temp_free(tmp);
> +        //tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_W_OFFSET);
>          break;
>  #ifdef TARGET_X86_64
>      case 1:
> -        tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_L_OFFSET);
>          /* high part of register set to zero */
> -        tcg_gen_movi_tl(cpu_tmp0, 0);
> -        tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_LH_OFFSET);
> +        tcg_gen_ext32u_tl(cpu_regs[reg], cpu_A0);
> +        //tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_L_OFFSET);
> +        /* high part of register set to zero */
> +        //tcg_gen_movi_tl(cpu_tmp0, 0);
> +        //tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_LH_OFFSET);
>          break;
>      default:
>      case 2:
> -        tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
> +        tcg_gen_mov_tl(cpu_regs[reg], cpu_A0);
> +        //tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
>          break;
>  #else
>      default:
>      case 1:
> -        tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_L_OFFSET);
> +        tcg_gen_mov_tl(cpu_regs[reg], cpu_A0);
> +        //tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_L_OFFSET);
>          break;
>  #endif

        Same comment as previous to share more code between x86 and
        x86_64.

>      }
> @@ -345,59 +375,213 @@ static inline void gen_op_mov_v_reg(int ot, TCGv t0, 
> int reg)
>          if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
>              goto std_case;
>          } else {
> -            tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + 
> REG_H_OFFSET);
> +            tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8);
> +            tcg_gen_andi_tl(t0, t0, 0xff);

               tcg_gen_ext8u_tl(t0, t0) ?

> +            //tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) 
> + REG_H_OFFSET);
>          }
>          break;
>      default:
>      std_case:
> -        tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
> +        tcg_gen_mov_tl(t0, cpu_regs[reg]);
> +        //tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
>          break;
>      }
>  }
>  
> -static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
> +static inline void gen_op_movl_A0_reg(int reg)
>  {
> -    gen_op_mov_v_reg(ot, cpu_T[t_index], reg);
> +    tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
> +    //tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_L_OFFSET);
>  }
>  
> -static inline void gen_op_movl_A0_reg(int reg)
> +static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
>  {
> -    tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_L_OFFSET);
> +    TCGv tmp;
> +
> +    switch(size) {
> +    case 0:
> +        // TODO optimize
> +        tmp = tcg_temp_new();
> +        tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
> +        tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
> +        tcg_gen_andi_tl(tmp, cpu_tmp0, 0xffff);
> +        tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
> +        tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
> +        tcg_temp_free(tmp);
> +        //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> +        //tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
> +        //tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_W_OFFSET);
> +        break;
> +    case 1:
> +        // TODO optimize
> +        tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
> +        tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
> +#ifdef TARGET_X86_64
> +        tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);

           tcg_gen_ext32u_tl(cpu_tmp0, cpu_tmp0) would automatically be
           removed at compilation time if !TARGET_X86_64

> +#endif
> +        tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
> +        //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> +        //tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
> +        //#ifdef TARGET_X86_64
> +        //tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
> +        //#endif
> +        //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> +        break;
> +#ifdef TARGET_X86_64
> +    case 2:
> +        tcg_gen_addi_tl(cpu_regs[reg], cpu_regs[reg], val);
> +        //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> +        //tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
> +        //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> +        break;
> +#endif
> +    }
>  }
>  
> -static inline void gen_op_addl_A0_im(int32_t val)
> +static inline void gen_op_add_reg_T0(int size, int reg)
>  {
> -    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
> +    TCGv tmp;
> +
> +    switch(size) {
> +    case 0:
> +        // TODO optimize
> +        tmp = tcg_temp_new();
> +        tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
> +        tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
> +        tcg_gen_andi_tl(tmp, cpu_tmp0, 0xffff);
> +        tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
> +        tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
> +        tcg_temp_free(tmp);
> +        //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> +        //tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
> +        //tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_W_OFFSET);
> +        break;
> +    case 1:
> +        // TODO optimize
> +        tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
> +        tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
>  #ifdef TARGET_X86_64
> -    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
> +        tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);

           same here

> +#endif
> +        tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
> +        //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> +        //tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
> +        //#ifdef TARGET_X86_64
> +        //tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
> +        //#endif
> +        //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> +        break;
> +#ifdef TARGET_X86_64
> +    case 2:
> +        tcg_gen_add_tl(cpu_regs[reg], cpu_regs[reg], cpu_T[0]);
> +        //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> +        //tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
> +        //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> +        break;
>  #endif
> +    }
>  }
>  
> -#ifdef TARGET_X86_64
> -static inline void gen_op_addq_A0_im(int64_t val)
> +static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
>  {
> -    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
> +    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
> +    if (shift != 0)
> +        tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
> +    tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
> +#ifdef TARGET_X86_64
> +    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
> +#endif
> +
> +    //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> +    //if (shift != 0)
> +    //    tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
> +    //tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
> +    //#ifdef TARGET_X86_64
> +    //tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
> +    //#endif
>  }
> +
> +#else
> +
> +static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
> +{
> +    switch(ot) {
> +    case OT_BYTE:
> +        if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
> +            tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_B_OFFSET);
> +        } else {
> +            tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + 
> REG_H_OFFSET);
> +        }
> +        break;
> +    case OT_WORD:
> +        tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_W_OFFSET);
> +        break;
> +#ifdef TARGET_X86_64
> +    case OT_LONG:
> +        tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_L_OFFSET);
> +        /* high part of register set to zero */
> +        tcg_gen_movi_tl(cpu_tmp0, 0);
> +        tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_LH_OFFSET);
> +        break;
> +    default:
> +    case OT_QUAD:
> +        tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
> +        break;
> +#else
> +    default:
> +    case OT_LONG:
> +        tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_L_OFFSET);
> +        break;
>  #endif
> -    
> -static void gen_add_A0_im(DisasContext *s, int val)
> +    }
> +}
> +
> +static inline void gen_op_mov_reg_A0(int size, int reg)
>  {
> +    switch(size) {
> +    case 0:
> +        tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_W_OFFSET);
> +        break;
>  #ifdef TARGET_X86_64
> -    if (CODE64(s))
> -        gen_op_addq_A0_im(val);
> -    else
> +    case 1:
> +        tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_L_OFFSET);
> +        /* high part of register set to zero */
> +        tcg_gen_movi_tl(cpu_tmp0, 0);
> +        tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_LH_OFFSET);
> +        break;
> +    default:
> +    case 2:
> +        tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
> +        break;
> +#else
> +    default:
> +    case 1:
> +        tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_L_OFFSET);
> +        break;
>  #endif
> -        gen_op_addl_A0_im(val);
> +    }
>  }
>  
> -static inline void gen_op_addl_T0_T1(void)
> +static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg)
>  {
> -    tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
> +    switch(ot) {
> +    case OT_BYTE:
> +        if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
> +            goto std_case;
> +        } else {
> +            tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) + 
> REG_H_OFFSET);
> +        }
> +        break;
> +    default:
> +    std_case:
> +        tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
> +        break;
> +    }
>  }
>  
> -static inline void gen_op_jmp_T0(void)
> +static inline void gen_op_movl_A0_reg(int reg)
>  {
> -    tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip));
> +    tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_L_OFFSET);
>  }
>  
>  static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
> @@ -452,15 +636,10 @@ static inline void gen_op_add_reg_T0(int size, int reg)
>      }
>  }
>  
> -static inline void gen_op_set_cc_op(int32_t val)
> -{
> -    tcg_gen_movi_i32(cpu_cc_op, val);
> -}
> -
>  static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
>  {
>      tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> -    if (shift != 0) 
> +    if (shift != 0)
>          tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
>      tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
>  #ifdef TARGET_X86_64
> @@ -468,6 +647,63 @@ static inline void gen_op_addl_A0_reg_sN(int shift, int 
> reg)
>  #endif
>  }
>  
> +#endif
> +
> +static inline void gen_op_mov_reg_T0(int ot, int reg)
> +{
> +    gen_op_mov_reg_v(ot, reg, cpu_T[0]);
> +}
> +
> +static inline void gen_op_mov_reg_T1(int ot, int reg)
> +{
> +    gen_op_mov_reg_v(ot, reg, cpu_T[1]);
> +}
> +
> +static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
> +{
> +    gen_op_mov_v_reg(ot, cpu_T[t_index], reg);
> +}
> +
> +static inline void gen_op_addl_A0_im(int32_t val)
> +{
> +    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
> +#ifdef TARGET_X86_64
> +    tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
> +#endif
> +}
> +
> +#ifdef TARGET_X86_64
> +static inline void gen_op_addq_A0_im(int64_t val)
> +{
> +    tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
> +}
> +#endif
> +    
> +static void gen_add_A0_im(DisasContext *s, int val)
> +{
> +#ifdef TARGET_X86_64
> +    if (CODE64(s))
> +        gen_op_addq_A0_im(val);
> +    else
> +#endif
> +        gen_op_addl_A0_im(val);
> +}
> +
> +static inline void gen_op_addl_T0_T1(void)
> +{
> +    tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
> +}
> +
> +static inline void gen_op_jmp_T0(void)
> +{
> +    tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip));
> +}
> +
> +static inline void gen_op_set_cc_op(int32_t val)
> +{
> +    tcg_gen_movi_i32(cpu_cc_op, val);
> +}
> +
>  static inline void gen_op_movl_A0_seg(int reg)
>  {
>      tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base) + 
> REG_L_OFFSET);
> @@ -496,13 +732,21 @@ static inline void gen_op_addq_A0_seg(int reg)
>  
>  static inline void gen_op_movq_A0_reg(int reg)
>  {
> +#ifdef USE_REGS
> +    tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
> +#else
>      tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
> +#endif
>  }
>  
>  static inline void gen_op_addq_A0_reg_sN(int shift, int reg)
>  {
> +#ifdef USE_REGS
> +    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
> +#else
>      tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> -    if (shift != 0) 
> +#endif
> +    if (shift != 0)
>          tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
>      tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
>  }
> @@ -701,14 +945,22 @@ static void gen_exts(int ot, TCGv reg)
>  
>  static inline void gen_op_jnz_ecx(int size, int label1)
>  {
> +#ifdef USE_REGS
> +    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
> +#else
>      tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX]));
> +#endif
>      gen_extu(size + 1, cpu_tmp0);
>      tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
>  }
>  
>  static inline void gen_op_jz_ecx(int size, int label1)
>  {
> +#ifdef USE_REGS
> +    tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
> +#else
>      tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX]));
> +#endif
>      gen_extu(size + 1, cpu_tmp0);
>      tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
>  }
> @@ -4834,7 +5086,11 @@ static target_ulong disas_insn(DisasContext *s, 
> target_ulong pc_start)
>                  rm = 0; /* avoid warning */
>              }
>              label1 = gen_new_label();
> +#ifdef USE_REGS
> +            tcg_gen_mov_tl(t2, cpu_regs[R_EAX]);
> +#else
>              tcg_gen_ld_tl(t2, cpu_env, offsetof(CPUState, regs[R_EAX]));
> +#endif
>              tcg_gen_sub_tl(t2, t2, t0);
>              gen_extu(ot, t2);
>              tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
> @@ -5409,7 +5665,11 @@ static target_ulong disas_insn(DisasContext *s, 
> target_ulong pc_start)
>              val = ldub_code(s->pc++);
>              tcg_gen_movi_tl(cpu_T3, val);
>          } else {
> +#ifdef USE_REGS
> +            tcg_gen_mov_tl(cpu_T3, cpu_regs[R_ECX]);
> +#else
>              tcg_gen_ld_tl(cpu_T3, cpu_env, offsetof(CPUState, regs[R_ECX]));
> +#endif
>          }
>          gen_shiftd_rm_T1_T3(s, ot, opreg, op);
>          break;
> @@ -6317,10 +6577,18 @@ static target_ulong disas_insn(DisasContext *s, 
> target_ulong pc_start)
>                  /* XXX: specific Intel behaviour ? */
>                  l1 = gen_new_label();
>                  gen_jcc1(s, s->cc_op, b ^ 1, l1);
> +#ifdef USE_REGS
> +                tcg_gen_mov_tl(cpu_regs[reg], t0);
> +#else
>                  tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) + 
> REG_L_OFFSET);
> +#endif
>                  gen_set_label(l1);
> +#ifdef USE_REGS
> +                tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]);
> +#else
>                  tcg_gen_movi_tl(cpu_tmp0, 0);
>                  tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, 
> regs[reg]) + REG_LH_OFFSET);
> +#endif
>              } else
>  #endif
>              {
> @@ -7588,6 +7856,60 @@ void optimize_flags_init(void)
>      cpu_cc_tmp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_tmp),
>                                      "cc_tmp");
>  
> +#ifdef USE_REGS
> +#ifdef TARGET_X86_64
> +    cpu_regs[R_EAX] = tcg_global_mem_new_i64(TCG_AREG0,
> +                                             offsetof(CPUState, 
> regs[R_EAX]), "rax");
> +    cpu_regs[R_ECX] = tcg_global_mem_new_i64(TCG_AREG0,
> +                                             offsetof(CPUState, 
> regs[R_ECX]), "rcx");
> +    cpu_regs[R_EDX] = tcg_global_mem_new_i64(TCG_AREG0,
> +                                             offsetof(CPUState, 
> regs[R_EDX]), "rdx");
> +    cpu_regs[R_EBX] = tcg_global_mem_new_i64(TCG_AREG0,
> +                                             offsetof(CPUState, 
> regs[R_EBX]), "rbx");
> +    cpu_regs[R_ESP] = tcg_global_mem_new_i64(TCG_AREG0,
> +                                             offsetof(CPUState, 
> regs[R_ESP]), "rsp");
> +    cpu_regs[R_EBP] = tcg_global_mem_new_i64(TCG_AREG0,
> +                                             offsetof(CPUState, 
> regs[R_EBP]), "rbp");
> +    cpu_regs[R_ESI] = tcg_global_mem_new_i64(TCG_AREG0,
> +                                             offsetof(CPUState, 
> regs[R_ESI]), "rsi");
> +    cpu_regs[R_EDI] = tcg_global_mem_new_i64(TCG_AREG0,
> +                                             offsetof(CPUState, 
> regs[R_EDI]), "rdi");
> +    cpu_regs[8] = tcg_global_mem_new_i64(TCG_AREG0,
> +                                         offsetof(CPUState, regs[8]), "r8");
> +    cpu_regs[9] = tcg_global_mem_new_i64(TCG_AREG0,
> +                                          offsetof(CPUState, regs[9]), "r9");
> +    cpu_regs[10] = tcg_global_mem_new_i64(TCG_AREG0,
> +                                          offsetof(CPUState, regs[10]), 
> "r10");
> +    cpu_regs[11] = tcg_global_mem_new_i64(TCG_AREG0,
> +                                          offsetof(CPUState, regs[11]), 
> "r11");
> +    cpu_regs[12] = tcg_global_mem_new_i64(TCG_AREG0,
> +                                          offsetof(CPUState, regs[12]), 
> "r12");
> +    cpu_regs[13] = tcg_global_mem_new_i64(TCG_AREG0,
> +                                          offsetof(CPUState, regs[13]), 
> "r13");
> +    cpu_regs[14] = tcg_global_mem_new_i64(TCG_AREG0,
> +                                          offsetof(CPUState, regs[14]), 
> "r14");
> +    cpu_regs[15] = tcg_global_mem_new_i64(TCG_AREG0,
> +                                          offsetof(CPUState, regs[15]), 
> "r15");
> +#else
> +    cpu_regs[R_EAX] = tcg_global_mem_new_i32(TCG_AREG0,
> +                                             offsetof(CPUState, 
> regs[R_EAX]), "eax");
> +    cpu_regs[R_ECX] = tcg_global_mem_new_i32(TCG_AREG0,
> +                                             offsetof(CPUState, 
> regs[R_ECX]), "ecx");
> +    cpu_regs[R_EDX] = tcg_global_mem_new_i32(TCG_AREG0,
> +                                             offsetof(CPUState, 
> regs[R_EDX]), "edx");
> +    cpu_regs[R_EBX] = tcg_global_mem_new_i32(TCG_AREG0,
> +                                             offsetof(CPUState, 
> regs[R_EBX]), "ebx");
> +    cpu_regs[R_ESP] = tcg_global_mem_new_i32(TCG_AREG0,
> +                                             offsetof(CPUState, 
> regs[R_ESP]), "esp");
> +    cpu_regs[R_EBP] = tcg_global_mem_new_i32(TCG_AREG0,
> +                                             offsetof(CPUState, 
> regs[R_EBP]), "ebp");
> +    cpu_regs[R_ESI] = tcg_global_mem_new_i32(TCG_AREG0,
> +                                             offsetof(CPUState, 
> regs[R_ESI]), "esi");
> +    cpu_regs[R_EDI] = tcg_global_mem_new_i32(TCG_AREG0,
> +                                             offsetof(CPUState, 
> regs[R_EDI]), "edi");
> +#endif
> +#endif
> +
>      /* register helpers */
>  #define GEN_HELPER 2
>  #include "helper.h"


-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
address@hidden                 http://www.aurel32.net




reply via email to

[Prev in Thread] Current Thread [Next in Thread]