[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH][RFC] x86: use globals for CPU registers
From: |
Aurelien Jarno |
Subject: |
Re: [Qemu-devel] [PATCH][RFC] x86: use globals for CPU registers |
Date: |
Sun, 27 Sep 2009 00:41:56 +0200 |
User-agent: |
Mutt/1.5.18 (2008-05-17) |
On Sun, Sep 13, 2009 at 11:00:08PM +0200, Laurent Desnogues wrote:
> Hello,
>
> this patch is a proposal to use globals for the 8 or 16 CPU
> registers on i386 and x86_64.
>
> I measured the improvement in the following conditions:
>
> - Machine: i7 920
> - Software: Fedora11 x86_64 gcc 4.4.1
> - Benchmark: SPEC2000 gcc with expr.i input
> - User mode
> - i386 and x86_64 hosts and targets, with and without the patch
> (8 combinations)
>
> The results are:
>
> qemu-i386_on-i386 15.82user 0.05system 0:15.91elapsed
> qemu-i386_on-i386-reg 15.40user 0.02system 0:15.43elapsed
> qemu-i386_on-x86_64 15.65user 0.05system 0:15.71elapsed
> qemu-i386_on-x86_64-reg 15.11user 0.03system 0:15.15elapsed
> qemu-x86_64_on-i386 mmap: No such device or address
> qemu-x86_64_on-i386-reg mmap: No such device or address
> qemu-x86_64_on-x86_64 18.42user 0.07system 0:18.49elapsed
> qemu-x86_64_on-x86_64-reg 13.22user 0.06system 0:13.31elapsed
>
> Given my lack of knowledge of system QEMU, I will leave it to
> someone else to measure the speedup.
I'll try to provide benchmarks later.
> A previous version of that patch, that only handled i386 target,
> was tested by Malc who got speedup running OpenSSL on his G4. It
> was also sent to Fabrice who asked me to send it to the mailing
> list.
>
> The usage of globals is controlled by USE_REGS so that reviewers
> can quickly test the benefit (or the lack of it).
>
> Comments are welcome (except for the obvious presence of //
> which is only temporary). I need to optimize a few things once
> I'm sure the temporaries (cpu_tmp0, ...) are not used outside of
> the modified functions. x86_64 was coded in a hurry and is
> perhaps buggy.
>
It basically looks good. Please find my comments inline.
> Laurent
>
> Signed-off-by: Laurent Desnogues <address@hidden>
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 335fc08..dc2fcde 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -58,10 +58,15 @@
>
> //#define MACRO_TEST 1
>
> +#define USE_REGS
> +
> /* global register indexes */
> static TCGv_ptr cpu_env;
> static TCGv cpu_A0, cpu_cc_src, cpu_cc_dst, cpu_cc_tmp;
> static TCGv_i32 cpu_cc_op;
> +#ifdef USE_REGS
> +static TCGv cpu_regs[CPU_NB_REGS];
> +#endif
> /* local temps */
> static TCGv cpu_T[2], cpu_T3;
> /* local register indexes (only used inside old micro ops) */
> @@ -269,70 +274,95 @@ static inline void gen_op_andl_A0_ffff(void)
> #define REG_LH_OFFSET 4
> #endif
>
> +#ifdef USE_REGS
> +#ifdef TARGET_X86_64
> +/* #warning NYI */
> +#endif
> +
> static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
> {
> + TCGv tmp;
> +
> switch(ot) {
> case OT_BYTE:
> + tmp = tcg_temp_new();
> + tcg_gen_andi_tl(tmp, t0, 0xff);
tcg_gen_ext8u_tl(tmp, t0); ?
> if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
> - tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_B_OFFSET);
> + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xff);
> + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
> + //tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_B_OFFSET);
> } else {
> - tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) +
> REG_H_OFFSET);
> + tcg_gen_shli_tl(tmp, tmp, 8);
> + tcg_gen_andi_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], ~0xff00);
> + tcg_gen_or_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], tmp);
> + //tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4])
> + REG_H_OFFSET);
> }
> + tcg_temp_free(tmp);
> break;
> case OT_WORD:
> - tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_W_OFFSET);
> + tmp = tcg_temp_new();
> + tcg_gen_andi_tl(tmp, t0, 0xffff);
tcg_gen_ext16u_tl(tmp, t0); ?
> + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
> + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
> + tcg_temp_free(tmp);
> + //tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_W_OFFSET);
> break;
> #ifdef TARGET_X86_64
> case OT_LONG:
> - tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_L_OFFSET);
> /* high part of register set to zero */
> - tcg_gen_movi_tl(cpu_tmp0, 0);
> - tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_LH_OFFSET);
> + tcg_gen_ext32u_tl(cpu_regs[reg], t0);
> + //tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_L_OFFSET);
> + /* high part of register set to zero */
> + //tcg_gen_movi_tl(cpu_tmp0, 0);
> + //tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_LH_OFFSET);
> break;
> default:
> case OT_QUAD:
> - tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
> + tcg_gen_mov_tl(cpu_regs[reg], t0);
> + //tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
> break;
> #else
> default:
> case OT_LONG:
> - tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_L_OFFSET);
> + tcg_gen_mov_tl(cpu_regs[reg], t0);
> + //tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_L_OFFSET);
> break;
> #endif
tcg_gen_ext32u_tl(cpu_regs[reg], t0) is equivalent to
tcg_gen_mov_tl(cpu_regs[reg], t0) if TARGET_LONG_BITS == 32, ie
if !TARGET_X86_64. This means the OT_LONG can now be common, with
the #ifdef only for OT_QUAD.
> }
> }
>
> -static inline void gen_op_mov_reg_T0(int ot, int reg)
> -{
> - gen_op_mov_reg_v(ot, reg, cpu_T[0]);
> -}
> -
> -static inline void gen_op_mov_reg_T1(int ot, int reg)
> -{
> - gen_op_mov_reg_v(ot, reg, cpu_T[1]);
> -}
> -
> static inline void gen_op_mov_reg_A0(int size, int reg)
> {
> + TCGv tmp;
> +
> switch(size) {
> case 0:
> - tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_W_OFFSET);
> + tmp = tcg_temp_new();
> + tcg_gen_andi_tl(tmp, cpu_A0, 0xffff);
tcg_gen_ext16u_tl(tmp, t0); ?
> + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
> + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
> + tcg_temp_free(tmp);
> + //tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_W_OFFSET);
> break;
> #ifdef TARGET_X86_64
> case 1:
> - tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_L_OFFSET);
> /* high part of register set to zero */
> - tcg_gen_movi_tl(cpu_tmp0, 0);
> - tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_LH_OFFSET);
> + tcg_gen_ext32u_tl(cpu_regs[reg], cpu_A0);
> + //tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_L_OFFSET);
> + /* high part of register set to zero */
> + //tcg_gen_movi_tl(cpu_tmp0, 0);
> + //tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_LH_OFFSET);
> break;
> default:
> case 2:
> - tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
> + tcg_gen_mov_tl(cpu_regs[reg], cpu_A0);
> + //tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
> break;
> #else
> default:
> case 1:
> - tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_L_OFFSET);
> + tcg_gen_mov_tl(cpu_regs[reg], cpu_A0);
> + //tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_L_OFFSET);
> break;
> #endif
Same comment as previous to share more code between x86 and
x86_64.
> }
> @@ -345,59 +375,213 @@ static inline void gen_op_mov_v_reg(int ot, TCGv t0,
> int reg)
> if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
> goto std_case;
> } else {
> - tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) +
> REG_H_OFFSET);
> + tcg_gen_shri_tl(t0, cpu_regs[reg - 4], 8);
> + tcg_gen_andi_tl(t0, t0, 0xff);
tcg_gen_ext8u_tl(t0, t0) ?
> + //tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4])
> + REG_H_OFFSET);
> }
> break;
> default:
> std_case:
> - tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
> + tcg_gen_mov_tl(t0, cpu_regs[reg]);
> + //tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
> break;
> }
> }
>
> -static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
> +static inline void gen_op_movl_A0_reg(int reg)
> {
> - gen_op_mov_v_reg(ot, cpu_T[t_index], reg);
> + tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
> + //tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_L_OFFSET);
> }
>
> -static inline void gen_op_movl_A0_reg(int reg)
> +static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
> {
> - tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_L_OFFSET);
> + TCGv tmp;
> +
> + switch(size) {
> + case 0:
> + // TODO optimize
> + tmp = tcg_temp_new();
> + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
> + tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
> + tcg_gen_andi_tl(tmp, cpu_tmp0, 0xffff);
> + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
> + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
> + tcg_temp_free(tmp);
> + //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> + //tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
> + //tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_W_OFFSET);
> + break;
> + case 1:
> + // TODO optimize
> + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
> + tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
> +#ifdef TARGET_X86_64
> + tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
tcg_gen_ext32u_tl(cpu_tmp0, cpu_tmp0) would automatically be
removed at compilation time if !TARGET_X86_64
> +#endif
> + tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
> + //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> + //tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
> + //#ifdef TARGET_X86_64
> + //tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
> + //#endif
> + //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> + break;
> +#ifdef TARGET_X86_64
> + case 2:
> + tcg_gen_addi_tl(cpu_regs[reg], cpu_regs[reg], val);
> + //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> + //tcg_gen_addi_tl(cpu_tmp0, cpu_tmp0, val);
> + //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> + break;
> +#endif
> + }
> }
>
> -static inline void gen_op_addl_A0_im(int32_t val)
> +static inline void gen_op_add_reg_T0(int size, int reg)
> {
> - tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
> + TCGv tmp;
> +
> + switch(size) {
> + case 0:
> + // TODO optimize
> + tmp = tcg_temp_new();
> + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
> + tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
> + tcg_gen_andi_tl(tmp, cpu_tmp0, 0xffff);
> + tcg_gen_andi_tl(cpu_regs[reg], cpu_regs[reg], ~0xffff);
> + tcg_gen_or_tl(cpu_regs[reg], cpu_regs[reg], tmp);
> + tcg_temp_free(tmp);
> + //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> + //tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
> + //tcg_gen_st16_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_W_OFFSET);
> + break;
> + case 1:
> + // TODO optimize
> + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
> + tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
> #ifdef TARGET_X86_64
> - tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
> + tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
same here
> +#endif
> + tcg_gen_mov_tl(cpu_regs[reg], cpu_tmp0);
> + //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> + //tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
> + //#ifdef TARGET_X86_64
> + //tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffff);
> + //#endif
> + //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> + break;
> +#ifdef TARGET_X86_64
> + case 2:
> + tcg_gen_add_tl(cpu_regs[reg], cpu_regs[reg], cpu_T[0]);
> + //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> + //tcg_gen_add_tl(cpu_tmp0, cpu_tmp0, cpu_T[0]);
> + //tcg_gen_st_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> + break;
> #endif
> + }
> }
>
> -#ifdef TARGET_X86_64
> -static inline void gen_op_addq_A0_im(int64_t val)
> +static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
> {
> - tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
> + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
> + if (shift != 0)
> + tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
> + tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
> +#ifdef TARGET_X86_64
> + tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
> +#endif
> +
> + //tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> + //if (shift != 0)
> + // tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
> + //tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
> + //#ifdef TARGET_X86_64
> + //tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
> + //#endif
> }
> +
> +#else
> +
> +static inline void gen_op_mov_reg_v(int ot, int reg, TCGv t0)
> +{
> + switch(ot) {
> + case OT_BYTE:
> + if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
> + tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_B_OFFSET);
> + } else {
> + tcg_gen_st8_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) +
> REG_H_OFFSET);
> + }
> + break;
> + case OT_WORD:
> + tcg_gen_st16_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_W_OFFSET);
> + break;
> +#ifdef TARGET_X86_64
> + case OT_LONG:
> + tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_L_OFFSET);
> + /* high part of register set to zero */
> + tcg_gen_movi_tl(cpu_tmp0, 0);
> + tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_LH_OFFSET);
> + break;
> + default:
> + case OT_QUAD:
> + tcg_gen_st_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
> + break;
> +#else
> + default:
> + case OT_LONG:
> + tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_L_OFFSET);
> + break;
> #endif
> -
> -static void gen_add_A0_im(DisasContext *s, int val)
> + }
> +}
> +
> +static inline void gen_op_mov_reg_A0(int size, int reg)
> {
> + switch(size) {
> + case 0:
> + tcg_gen_st16_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_W_OFFSET);
> + break;
> #ifdef TARGET_X86_64
> - if (CODE64(s))
> - gen_op_addq_A0_im(val);
> - else
> + case 1:
> + tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_L_OFFSET);
> + /* high part of register set to zero */
> + tcg_gen_movi_tl(cpu_tmp0, 0);
> + tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_LH_OFFSET);
> + break;
> + default:
> + case 2:
> + tcg_gen_st_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
> + break;
> +#else
> + default:
> + case 1:
> + tcg_gen_st32_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_L_OFFSET);
> + break;
> #endif
> - gen_op_addl_A0_im(val);
> + }
> }
>
> -static inline void gen_op_addl_T0_T1(void)
> +static inline void gen_op_mov_v_reg(int ot, TCGv t0, int reg)
> {
> - tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
> + switch(ot) {
> + case OT_BYTE:
> + if (reg < 4 X86_64_DEF( || reg >= 8 || x86_64_hregs)) {
> + goto std_case;
> + } else {
> + tcg_gen_ld8u_tl(t0, cpu_env, offsetof(CPUState, regs[reg - 4]) +
> REG_H_OFFSET);
> + }
> + break;
> + default:
> + std_case:
> + tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUState, regs[reg]));
> + break;
> + }
> }
>
> -static inline void gen_op_jmp_T0(void)
> +static inline void gen_op_movl_A0_reg(int reg)
> {
> - tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip));
> + tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_L_OFFSET);
> }
>
> static inline void gen_op_add_reg_im(int size, int reg, int32_t val)
> @@ -452,15 +636,10 @@ static inline void gen_op_add_reg_T0(int size, int reg)
> }
> }
>
> -static inline void gen_op_set_cc_op(int32_t val)
> -{
> - tcg_gen_movi_i32(cpu_cc_op, val);
> -}
> -
> static inline void gen_op_addl_A0_reg_sN(int shift, int reg)
> {
> tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> - if (shift != 0)
> + if (shift != 0)
> tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
> tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
> #ifdef TARGET_X86_64
> @@ -468,6 +647,63 @@ static inline void gen_op_addl_A0_reg_sN(int shift, int
> reg)
> #endif
> }
>
> +#endif
> +
> +static inline void gen_op_mov_reg_T0(int ot, int reg)
> +{
> + gen_op_mov_reg_v(ot, reg, cpu_T[0]);
> +}
> +
> +static inline void gen_op_mov_reg_T1(int ot, int reg)
> +{
> + gen_op_mov_reg_v(ot, reg, cpu_T[1]);
> +}
> +
> +static inline void gen_op_mov_TN_reg(int ot, int t_index, int reg)
> +{
> + gen_op_mov_v_reg(ot, cpu_T[t_index], reg);
> +}
> +
> +static inline void gen_op_addl_A0_im(int32_t val)
> +{
> + tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
> +#ifdef TARGET_X86_64
> + tcg_gen_andi_tl(cpu_A0, cpu_A0, 0xffffffff);
> +#endif
> +}
> +
> +#ifdef TARGET_X86_64
> +static inline void gen_op_addq_A0_im(int64_t val)
> +{
> + tcg_gen_addi_tl(cpu_A0, cpu_A0, val);
> +}
> +#endif
> +
> +static void gen_add_A0_im(DisasContext *s, int val)
> +{
> +#ifdef TARGET_X86_64
> + if (CODE64(s))
> + gen_op_addq_A0_im(val);
> + else
> +#endif
> + gen_op_addl_A0_im(val);
> +}
> +
> +static inline void gen_op_addl_T0_T1(void)
> +{
> + tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
> +}
> +
> +static inline void gen_op_jmp_T0(void)
> +{
> + tcg_gen_st_tl(cpu_T[0], cpu_env, offsetof(CPUState, eip));
> +}
> +
> +static inline void gen_op_set_cc_op(int32_t val)
> +{
> + tcg_gen_movi_i32(cpu_cc_op, val);
> +}
> +
> static inline void gen_op_movl_A0_seg(int reg)
> {
> tcg_gen_ld32u_tl(cpu_A0, cpu_env, offsetof(CPUState, segs[reg].base) +
> REG_L_OFFSET);
> @@ -496,13 +732,21 @@ static inline void gen_op_addq_A0_seg(int reg)
>
> static inline void gen_op_movq_A0_reg(int reg)
> {
> +#ifdef USE_REGS
> + tcg_gen_mov_tl(cpu_A0, cpu_regs[reg]);
> +#else
> tcg_gen_ld_tl(cpu_A0, cpu_env, offsetof(CPUState, regs[reg]));
> +#endif
> }
>
> static inline void gen_op_addq_A0_reg_sN(int shift, int reg)
> {
> +#ifdef USE_REGS
> + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[reg]);
> +#else
> tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[reg]));
> - if (shift != 0)
> +#endif
> + if (shift != 0)
> tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, shift);
> tcg_gen_add_tl(cpu_A0, cpu_A0, cpu_tmp0);
> }
> @@ -701,14 +945,22 @@ static void gen_exts(int ot, TCGv reg)
>
> static inline void gen_op_jnz_ecx(int size, int label1)
> {
> +#ifdef USE_REGS
> + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
> +#else
> tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX]));
> +#endif
> gen_extu(size + 1, cpu_tmp0);
> tcg_gen_brcondi_tl(TCG_COND_NE, cpu_tmp0, 0, label1);
> }
>
> static inline void gen_op_jz_ecx(int size, int label1)
> {
> +#ifdef USE_REGS
> + tcg_gen_mov_tl(cpu_tmp0, cpu_regs[R_ECX]);
> +#else
> tcg_gen_ld_tl(cpu_tmp0, cpu_env, offsetof(CPUState, regs[R_ECX]));
> +#endif
> gen_extu(size + 1, cpu_tmp0);
> tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, label1);
> }
> @@ -4834,7 +5086,11 @@ static target_ulong disas_insn(DisasContext *s,
> target_ulong pc_start)
> rm = 0; /* avoid warning */
> }
> label1 = gen_new_label();
> +#ifdef USE_REGS
> + tcg_gen_mov_tl(t2, cpu_regs[R_EAX]);
> +#else
> tcg_gen_ld_tl(t2, cpu_env, offsetof(CPUState, regs[R_EAX]));
> +#endif
> tcg_gen_sub_tl(t2, t2, t0);
> gen_extu(ot, t2);
> tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, label1);
> @@ -5409,7 +5665,11 @@ static target_ulong disas_insn(DisasContext *s,
> target_ulong pc_start)
> val = ldub_code(s->pc++);
> tcg_gen_movi_tl(cpu_T3, val);
> } else {
> +#ifdef USE_REGS
> + tcg_gen_mov_tl(cpu_T3, cpu_regs[R_ECX]);
> +#else
> tcg_gen_ld_tl(cpu_T3, cpu_env, offsetof(CPUState, regs[R_ECX]));
> +#endif
> }
> gen_shiftd_rm_T1_T3(s, ot, opreg, op);
> break;
> @@ -6317,10 +6577,18 @@ static target_ulong disas_insn(DisasContext *s,
> target_ulong pc_start)
> /* XXX: specific Intel behaviour ? */
> l1 = gen_new_label();
> gen_jcc1(s, s->cc_op, b ^ 1, l1);
> +#ifdef USE_REGS
> + tcg_gen_mov_tl(cpu_regs[reg], t0);
> +#else
> tcg_gen_st32_tl(t0, cpu_env, offsetof(CPUState, regs[reg]) +
> REG_L_OFFSET);
> +#endif
> gen_set_label(l1);
> +#ifdef USE_REGS
> + tcg_gen_ext32u_tl(cpu_regs[reg], cpu_regs[reg]);
> +#else
> tcg_gen_movi_tl(cpu_tmp0, 0);
> tcg_gen_st32_tl(cpu_tmp0, cpu_env, offsetof(CPUState,
> regs[reg]) + REG_LH_OFFSET);
> +#endif
> } else
> #endif
> {
> @@ -7588,6 +7856,60 @@ void optimize_flags_init(void)
> cpu_cc_tmp = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, cc_tmp),
> "cc_tmp");
>
> +#ifdef USE_REGS
> +#ifdef TARGET_X86_64
> + cpu_regs[R_EAX] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState,
> regs[R_EAX]), "rax");
> + cpu_regs[R_ECX] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState,
> regs[R_ECX]), "rcx");
> + cpu_regs[R_EDX] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState,
> regs[R_EDX]), "rdx");
> + cpu_regs[R_EBX] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState,
> regs[R_EBX]), "rbx");
> + cpu_regs[R_ESP] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState,
> regs[R_ESP]), "rsp");
> + cpu_regs[R_EBP] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState,
> regs[R_EBP]), "rbp");
> + cpu_regs[R_ESI] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState,
> regs[R_ESI]), "rsi");
> + cpu_regs[R_EDI] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState,
> regs[R_EDI]), "rdi");
> + cpu_regs[8] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[8]), "r8");
> + cpu_regs[9] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[9]), "r9");
> + cpu_regs[10] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[10]),
> "r10");
> + cpu_regs[11] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[11]),
> "r11");
> + cpu_regs[12] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[12]),
> "r12");
> + cpu_regs[13] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[13]),
> "r13");
> + cpu_regs[14] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[14]),
> "r14");
> + cpu_regs[15] = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUState, regs[15]),
> "r15");
> +#else
> + cpu_regs[R_EAX] = tcg_global_mem_new_i32(TCG_AREG0,
> + offsetof(CPUState,
> regs[R_EAX]), "eax");
> + cpu_regs[R_ECX] = tcg_global_mem_new_i32(TCG_AREG0,
> + offsetof(CPUState,
> regs[R_ECX]), "ecx");
> + cpu_regs[R_EDX] = tcg_global_mem_new_i32(TCG_AREG0,
> + offsetof(CPUState,
> regs[R_EDX]), "edx");
> + cpu_regs[R_EBX] = tcg_global_mem_new_i32(TCG_AREG0,
> + offsetof(CPUState,
> regs[R_EBX]), "ebx");
> + cpu_regs[R_ESP] = tcg_global_mem_new_i32(TCG_AREG0,
> + offsetof(CPUState,
> regs[R_ESP]), "esp");
> + cpu_regs[R_EBP] = tcg_global_mem_new_i32(TCG_AREG0,
> + offsetof(CPUState,
> regs[R_EBP]), "ebp");
> + cpu_regs[R_ESI] = tcg_global_mem_new_i32(TCG_AREG0,
> + offsetof(CPUState,
> regs[R_ESI]), "esi");
> + cpu_regs[R_EDI] = tcg_global_mem_new_i32(TCG_AREG0,
> + offsetof(CPUState,
> regs[R_EDI]), "edi");
> +#endif
> +#endif
> +
> /* register helpers */
> #define GEN_HELPER 2
> #include "helper.h"
--
Aurelien Jarno GPG: 1024D/F1BCDB73
address@hidden http://www.aurel32.net