qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 11/14] ppc: store CR registers in 32 1-bit regis


From: Tom Musta
Subject: Re: [Qemu-devel] [PATCH 11/14] ppc: store CR registers in 32 1-bit registers
Date: Thu, 18 Sep 2014 15:25:59 -0500
User-agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:24.0) Gecko/20100101 Thunderbird/24.6.0

On 9/15/2014 10:03 AM, Paolo Bonzini wrote:
> This makes comparisons much smaller and faster.  The speedup is
> approximately 10% on user-mode emulation on x86 host, 3-4% on PPC.
> 
> Note that CRF_* constants are flipped to match PowerPC's big
> bit-endianness.  Previously, the CR register was effectively stored
> in mixed endianness, so now there is less indirection going on.
> 
> Signed-off-by: Paolo Bonzini <address@hidden>
> ---
>       v1->v2: fixed all issues reported by Tom, notably: 1) temporary
>       leak in gen_op_mfcr; 2) missing set of cr[so] for gen_op_cmp32;
>       3) i32 vs. tl typing issues; 4) creqv/nand/nor/orc extra 1 bits.
> 
>  linux-user/main.c       |   4 +-
>  target-ppc/cpu.h        |  41 +++---
>  target-ppc/fpu_helper.c |  44 ++-----
>  target-ppc/helper.h     |   6 -
>  target-ppc/int_helper.c |   2 +-
>  target-ppc/machine.c    |   9 ++
>  target-ppc/translate.c  | 344 
> ++++++++++++++++++++++++++----------------------
>  7 files changed, 236 insertions(+), 214 deletions(-)
> 

Run checkpatch.pl  In fairness, you are modifying code that didn't pass before 
... but still ....

> diff --git a/linux-user/main.c b/linux-user/main.c
> index 152c031..b403f24 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -1929,7 +1929,7 @@ void cpu_loop(CPUPPCState *env)
>               * PPC ABI uses overflow flag in cr0 to signal an error
>               * in syscalls.
>               */
> -            env->crf[0] &= ~0x1;
> +            env->cr[CRF_SO] = 0;
>              ret = do_syscall(env, env->gpr[0], env->gpr[3], env->gpr[4],
>                               env->gpr[5], env->gpr[6], env->gpr[7],
>                               env->gpr[8], 0, 0);
> @@ -1939,7 +1939,7 @@ void cpu_loop(CPUPPCState *env)
>                  break;
>              }
>              if (ret > (target_ulong)(-515)) {
> -                env->crf[0] |= 0x1;
> +                env->cr[CRF_SO] = 1;
>                  ret = -ret;
>              }
>              env->gpr[3] = ret;
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 91eac17..41b8299 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -940,7 +940,7 @@ struct CPUPPCState {
>      /* CTR */
>      target_ulong ctr;
>      /* condition register */
> -    uint32_t crf[8];
> +    uint32_t cr[32];
>  #if defined(TARGET_PPC64)
>      /* CFAR */
>      target_ulong cfar;
> @@ -1059,6 +1059,9 @@ struct CPUPPCState {
>      uint64_t dtl_addr, dtl_size;
>  #endif /* TARGET_PPC64 */
>  
> +    /* condition register, for migration compatibility */
> +    uint32_t crf[8];
> +
>      int error_code;
>      uint32_t pending_interrupts;
>  #if !defined(CONFIG_USER_ONLY)
> @@ -1202,8 +1205,8 @@ static inline uint32_t ppc_get_cr(const CPUPPCState 
> *env)
>      uint32_t cr = 0;
>      int i;
>  
> -    for (i = 0; i < ARRAY_SIZE(env->crf); i++) {
> -        cr |= env->crf[i] << (32 - ((i + 1) * 4));
> +    for (i = 0; i < ARRAY_SIZE(env->cr); i++) {
> +        cr |= env->cr[i] << (31 - i);
>      }
>      return cr;
>  }
> @@ -1212,19 +1215,27 @@ static inline void ppc_set_cr(CPUPPCState *env, 
> uint32_t cr)
>  {
>      int i;
>  
> -    for (i = 0; i < ARRAY_SIZE(env->crf); i++) {
> -        env->crf[i] = (cr >> (32 - ((i + 1) * 4))) & 0xF;
> +    for (i = 0; i < ARRAY_SIZE(env->cr); i++) {
> +        env->cr[i] = (cr >> (31 - i)) & 1;
>      }
>  }
>  
>  static inline uint32_t ppc_get_crf(const CPUPPCState *env, int i)
>  {
> -    return env->crf[i];
> +    uint32_t r;
> +    r = env->cr[i * 4];
> +    r = (r << 1) | (env->cr[i * 4 + 1]);
> +    r = (r << 1) | (env->cr[i * 4 + 2]);
> +    r = (r << 1) | (env->cr[i * 4 + 3]);
> +    return r;
>  }
>  
>  static inline void ppc_set_crf(CPUPPCState *env, int i, uint32_t val)
>  {
> -    env->crf[i] = val;
> +    env->cr[i * 4 + 0] = (val & 0x08) != 0;
> +    env->cr[i * 4 + 1] = (val & 0x04) != 0;
> +    env->cr[i * 4 + 2] = (val & 0x02) != 0;
> +    env->cr[i * 4 + 3] = (val & 0x01) != 0;
>  }
>  
>  static inline uint64_t ppc_dump_gpr(CPUPPCState *env, int gprn)
> @@ -1271,14 +1282,14 @@ static inline int cpu_mmu_index (CPUPPCState *env)
>  
>  
> /*****************************************************************************/
>  /* CRF definitions */
> -#define CRF_LT        3
> -#define CRF_GT        2
> -#define CRF_EQ        1
> -#define CRF_SO        0
> -#define CRF_CH        (1 << CRF_LT)
> -#define CRF_CL        (1 << CRF_GT)
> -#define CRF_CH_OR_CL  (1 << CRF_EQ)
> -#define CRF_CH_AND_CL (1 << CRF_SO)
> +#define CRF_LT        0
> +#define CRF_GT        1
> +#define CRF_EQ        2
> +#define CRF_SO        3
> +#define CRF_CH        CRF_LT
> +#define CRF_CL        CRF_GT
> +#define CRF_CH_OR_CL  CRF_EQ
> +#define CRF_CH_AND_CL CRF_SO

This breaks what you did in patch 5, which used LE bit numbering to perform 
shifts.  And it breaks other code that uses the old LE convention.

Here is what I found:

diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 7894dc5..3f656e5 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -1043,7 +1043,7 @@ uint32_t helper_ftdiv(uint64_t fra, uint64_t frb)
         }
     }

-    return (1 << CRF_LT) | (fg_flag << CRF_GT) | (fe_flag << CRF_EQ);
+    return (1 << 3) | (fg_flag << 2) | (fe_flag << 1);
 }

 uint32_t helper_ftsqrt(uint64_t frb)
@@ -1074,7 +1074,7 @@ uint32_t helper_ftsqrt(uint64_t frb)
         }
     }

-    return (1 << CRF_LT) | (fg_flag << CRF_GT) | (fe_flag << CRF_EQ);
+    return (1 << 3) | (fg_flag << 2) | (fe_flag << 1);
 }

 void helper_fcmpu(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 96f2e7d..2f88854 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -2303,25 +2303,25 @@ uint32_t helper_bcdadd(ppc_avr_t *r,  ppc_avr_t *a, 
ppc_avr_t *b, uint32_t ps)
         if (sgna == sgnb) {
             result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
             zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
-            cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
+            cr = (sgna > 0) ? 8 >> CRF_GT : 8 >> CRF_LT;
         } else if (bcd_cmp_mag(a, b) > 0) {
             result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
             zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
-            cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
+            cr = (sgna > 0) ? 8 >> CRF_GT : 8 >> CRF_LT;
         } else {
             result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
             zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
-            cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
+            cr = (sgnb > 0) ? 8 >> CRF_GT : 8 >> CRF_LT;
         }
     }

     if (unlikely(invalid)) {
         result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
-        cr = 1 << CRF_SO;
+        cr = 8 >> CRF_SO;
     } else if (overflow) {
-        cr |= 1 << CRF_SO;
+        cr |= 0x8 >> CRF_SO;
     } else if (zero) {
-        cr = 1 << CRF_EQ;
+        cr = 8 >> CRF_EQ;
     }

     *r = result;



>  
>  /* XER definitions */
>  #define XER_SO  31
> diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
> index 8cf321b..7894dc5 100644
> --- a/target-ppc/fpu_helper.c
> +++ b/target-ppc/fpu_helper.c
> @@ -1098,8 +1098,8 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1, 
> uint64_t arg2,
>      }
>  
>      env->fpscr &= ~(0x0F << FPSCR_FPRF);
> -    env->fpscr |= (0x01 << FPSCR_FPRF) << fpcc;
> -    ppc_set_crf(env, crfD, 1 << fpcc);
> +    env->fpscr |= (0x08 << FPSCR_FPRF) >> fpcc;
> +    ppc_set_crf(env, crfD, 0x08 >> fpcc);
>  
>      if (unlikely(fpcc == CRF_SO
>                   && (float64_is_signaling_nan(farg1.d) ||
> @@ -1130,8 +1130,8 @@ void helper_fcmpo(CPUPPCState *env, uint64_t arg1, 
> uint64_t arg2,
>      }
>  
>      env->fpscr &= ~(0x0F << FPSCR_FPRF);
> -    env->fpscr |= (0x01 << FPSCR_FPRF) << fpcc;
> -    ppc_set_crf(env, crfD, 1 << fpcc);
> +    env->fpscr |= (0x08 << FPSCR_FPRF) >> fpcc;
> +    ppc_set_crf(env, crfD, 0x08 >> fpcc);
>  
>      if (unlikely(fpcc == CRF_SO)) {
>          if (float64_is_signaling_nan(farg1.d) ||
> @@ -1403,7 +1403,7 @@ static inline uint32_t efscmplt(CPUPPCState *env, 
> uint32_t op1, uint32_t op2)
>  
>      u1.l = op1;
>      u2.l = op2;
> -    return float32_lt(u1.f, u2.f, &env->vec_status) ? 4 : 0;
> +    return float32_lt(u1.f, u2.f, &env->vec_status);
>  }
>  
>  static inline uint32_t efscmpgt(CPUPPCState *env, uint32_t op1, uint32_t op2)
> @@ -1412,7 +1412,7 @@ static inline uint32_t efscmpgt(CPUPPCState *env, 
> uint32_t op1, uint32_t op2)
>  
>      u1.l = op1;
>      u2.l = op2;
> -    return float32_le(u1.f, u2.f, &env->vec_status) ? 0 : 4;
> +    return !float32_le(u1.f, u2.f, &env->vec_status);
>  }
>  
>  static inline uint32_t efscmpeq(CPUPPCState *env, uint32_t op1, uint32_t op2)
> @@ -1421,7 +1421,7 @@ static inline uint32_t efscmpeq(CPUPPCState *env, 
> uint32_t op1, uint32_t op2)
>  
>      u1.l = op1;
>      u2.l = op2;
> -    return float32_eq(u1.f, u2.f, &env->vec_status) ? 4 : 0;
> +    return float32_eq(u1.f, u2.f, &env->vec_status);
>  }
>  
>  static inline uint32_t efststlt(CPUPPCState *env, uint32_t op1, uint32_t op2)
> @@ -1460,30 +1460,6 @@ HELPER_SINGLE_SPE_CMP(fscmpgt);
>  /* efscmpeq */
>  HELPER_SINGLE_SPE_CMP(fscmpeq);
>  
> -static inline uint32_t evcmp_merge(int t0, int t1)
> -{
> -    return (t0 << 3) | (t1 << 2) | ((t0 | t1) << 1) | (t0 & t1);
> -}
> -
> -#define HELPER_VECTOR_SPE_CMP(name)                                     \
> -    uint32_t helper_ev##name(CPUPPCState *env, uint64_t op1, uint64_t op2) \
> -    {                                                                   \
> -        return evcmp_merge(e##name(env, op1 >> 32, op2 >> 32),          \
> -                           e##name(env, op1, op2));                     \
> -    }
> -/* evfststlt */
> -HELPER_VECTOR_SPE_CMP(fststlt);
> -/* evfststgt */
> -HELPER_VECTOR_SPE_CMP(fststgt);
> -/* evfststeq */
> -HELPER_VECTOR_SPE_CMP(fststeq);
> -/* evfscmplt */
> -HELPER_VECTOR_SPE_CMP(fscmplt);
> -/* evfscmpgt */
> -HELPER_VECTOR_SPE_CMP(fscmpgt);
> -/* evfscmpeq */
> -HELPER_VECTOR_SPE_CMP(fscmpeq);
> -
>  /* Double-precision floating-point conversion */
>  uint64_t helper_efdcfsi(CPUPPCState *env, uint32_t val)
>  {
> @@ -1725,7 +1701,7 @@ uint32_t helper_efdtstlt(CPUPPCState *env, uint64_t 
> op1, uint64_t op2)
>  
>      u1.ll = op1;
>      u2.ll = op2;
> -    return float64_lt(u1.d, u2.d, &env->vec_status) ? 4 : 0;
> +    return float64_lt(u1.d, u2.d, &env->vec_status);
>  }
>  
>  uint32_t helper_efdtstgt(CPUPPCState *env, uint64_t op1, uint64_t op2)
> @@ -1734,7 +1710,7 @@ uint32_t helper_efdtstgt(CPUPPCState *env, uint64_t 
> op1, uint64_t op2)
>  
>      u1.ll = op1;
>      u2.ll = op2;
> -    return float64_le(u1.d, u2.d, &env->vec_status) ? 0 : 4;
> +    return !float64_le(u1.d, u2.d, &env->vec_status);
>  }
>  
>  uint32_t helper_efdtsteq(CPUPPCState *env, uint64_t op1, uint64_t op2)
> @@ -1743,7 +1719,7 @@ uint32_t helper_efdtsteq(CPUPPCState *env, uint64_t 
> op1, uint64_t op2)
>  
>      u1.ll = op1;
>      u2.ll = op2;
> -    return float64_eq_quiet(u1.d, u2.d, &env->vec_status) ? 4 : 0;
> +    return float64_eq_quiet(u1.d, u2.d, &env->vec_status);
>  }
>  
>  uint32_t helper_efdcmplt(CPUPPCState *env, uint64_t op1, uint64_t op2)
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index ee748a1..dff7c1c 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -492,12 +492,6 @@ DEF_HELPER_3(efststeq, i32, env, i32, i32)
>  DEF_HELPER_3(efscmplt, i32, env, i32, i32)
>  DEF_HELPER_3(efscmpgt, i32, env, i32, i32)
>  DEF_HELPER_3(efscmpeq, i32, env, i32, i32)
> -DEF_HELPER_3(evfststlt, i32, env, i64, i64)
> -DEF_HELPER_3(evfststgt, i32, env, i64, i64)
> -DEF_HELPER_3(evfststeq, i32, env, i64, i64)
> -DEF_HELPER_3(evfscmplt, i32, env, i64, i64)
> -DEF_HELPER_3(evfscmpgt, i32, env, i64, i64)
> -DEF_HELPER_3(evfscmpeq, i32, env, i64, i64)
>  DEF_HELPER_2(efdcfsi, i64, env, i32)
>  DEF_HELPER_2(efdcfsid, i64, env, i64)
>  DEF_HELPER_2(efdcfui, i64, env, i32)
> diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
> index b76a895..96f2e7d 100644
> --- a/target-ppc/int_helper.c
> +++ b/target-ppc/int_helper.c
> @@ -2580,7 +2580,7 @@ target_ulong helper_dlmzb(CPUPPCState *env, 
> target_ulong high,
>   done:
>      env->xer = (env->xer & ~0x7F) | i;
>      if (update_Rc) {
> -        env->crf[0] |= xer_so;
> +        env->cr[CRF_SO] = xer_so;
>      }
>      return i;
>  }
> diff --git a/target-ppc/machine.c b/target-ppc/machine.c
> index c801b82..9fa309a 100644
> --- a/target-ppc/machine.c
> +++ b/target-ppc/machine.c
> @@ -132,6 +132,10 @@ static void cpu_pre_save(void *opaque)
>      CPUPPCState *env = &cpu->env;
>      int i;
>  
> +    for (i = 0; i < 8; i++) {
> +        env->crf[i] = ppc_get_crf(env, i);
> +    }
> +
>      env->spr[SPR_LR] = env->lr;
>      env->spr[SPR_CTR] = env->ctr;
>      env->spr[SPR_XER] = env->xer;
> @@ -165,6 +169,11 @@ static int cpu_post_load(void *opaque, int version_id)
>       * software has to take care of running QEMU in a compatible mode.
>       */
>      env->spr[SPR_PVR] = env->spr_cb[SPR_PVR].default_value;
> +
> +    for (i = 0; i < 8; i++) {
> +        ppc_set_crf(env, i, env->crf[i]);
> +    }
> +
>      env->lr = env->spr[SPR_LR];
>      env->ctr = env->spr[SPR_CTR];
>      env->xer = env->spr[SPR_XER];
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 0933c00..d8c9240 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -53,13 +53,13 @@ static char cpu_reg_names[10*3 + 22*4 /* GPR */
>      + 10*4 + 22*5 /* FPR */
>      + 2*(10*6 + 22*7) /* AVRh, AVRl */
>      + 10*5 + 22*6 /* VSR */
> -    + 8*5 /* CRF */];
> +    + 32*8 /* CR */];
>  static TCGv cpu_gpr[32];
>  static TCGv cpu_gprh[32];
>  static TCGv_i64 cpu_fpr[32];
>  static TCGv_i64 cpu_avrh[32], cpu_avrl[32];
>  static TCGv_i64 cpu_vsr[32];
> -static TCGv_i32 cpu_crf[8];
> +static TCGv_i32 cpu_cr[32];
>  static TCGv cpu_nip;
>  static TCGv cpu_msr;
>  static TCGv cpu_ctr;
> @@ -89,12 +89,13 @@ void ppc_translate_init(void)
>      p = cpu_reg_names;
>      cpu_reg_names_size = sizeof(cpu_reg_names);
>  
> -    for (i = 0; i < 8; i++) {
> -        snprintf(p, cpu_reg_names_size, "crf%d", i);
> -        cpu_crf[i] = tcg_global_mem_new_i32(TCG_AREG0,
> -                                            offsetof(CPUPPCState, crf[i]), 
> p);
> -        p += 5;
> -        cpu_reg_names_size -= 5;
> +    for (i = 0; i < 32; i++) {
> +        static const char names[] = "lt\0gt\0eq\0so";
> +        snprintf(p, cpu_reg_names_size, "cr%d[%s]", i >> 2, names + (i & 3) 
> * 3);
> +        cpu_cr[i] = tcg_global_mem_new_i32(TCG_AREG0,
> +                                           offsetof(CPUPPCState, cr[i]), p);
> +        p += 8;
> +        cpu_reg_names_size -= 8;
>      }
>  
>      for (i = 0; i < 32; i++) {
> @@ -252,17 +253,31 @@ static inline void gen_reset_fpstatus(void)
>  
>  static inline void gen_op_mfcr(TCGv_i32 dest, int first_cr, int shift)
>  {
> -    tcg_gen_shli_i32(dest, cpu_crf[first_cr >> 2], shift);
> +    TCGv_i32 t0 = tcg_temp_new_i32();
> +
> +    tcg_gen_shli_i32(dest, cpu_cr[first_cr + 3], shift);
> +    tcg_gen_shli_i32(t0, cpu_cr[first_cr + 2], shift + 1);
> +    tcg_gen_or_i32(dest, dest, t0);
> +    tcg_gen_shli_i32(t0, cpu_cr[first_cr + 1], shift + 2);
> +    tcg_gen_or_i32(dest, dest, t0);
> +    tcg_gen_shli_i32(t0, cpu_cr[first_cr], shift + 3);
> +    tcg_temp_free_i32(t0);
>  }

You are missing one last OR .... like this:

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 03cdd05..47ee4e3 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -260,6 +260,7 @@ static inline void gen_op_mfcr(TCGv_i32 dest, int first_cr, 
int shift)
     tcg_gen_shli_i32(t0, cpu_cr[first_cr + 1], shift + 2);
     tcg_gen_or_i32(dest, dest, t0);
     tcg_gen_shli_i32(t0, cpu_cr[first_cr], shift + 3);
+    tcg_gen_or_i32(dest, dest, t0);
     tcg_temp_free_i32(t0);
 }


>  
>  static inline void gen_op_mtcr(int first_cr, TCGv_i32 src, int shift)
>  {
>      if (shift) {
> -        tcg_gen_shri_i32(cpu_crf[first_cr >> 2], src, shift);
> -        tcg_gen_andi_i32(cpu_crf[first_cr >> 2], cpu_crf[first_cr >> 2], 
> 0x0F);
> +        tcg_gen_shri_i32(cpu_cr[first_cr + 3], src, shift);
> +        tcg_gen_andi_i32(cpu_cr[first_cr + 3], cpu_cr[first_cr + 3], 1);
>      } else {
> -        tcg_gen_andi_i32(cpu_crf[first_cr >> 2], src, 0x0F);
> +        tcg_gen_andi_i32(cpu_cr[first_cr + 3], src, 1);
>      }
> +    tcg_gen_shri_i32(cpu_cr[first_cr + 2], src, shift + 1);
> +    tcg_gen_andi_i32(cpu_cr[first_cr + 2], cpu_cr[first_cr + 2], 1);
> +    tcg_gen_shri_i32(cpu_cr[first_cr + 1], src, shift + 2);
> +    tcg_gen_andi_i32(cpu_cr[first_cr + 1], cpu_cr[first_cr + 1], 1);
> +    tcg_gen_shri_i32(cpu_cr[first_cr], src, shift + 3);
> +    tcg_gen_andi_i32(cpu_cr[first_cr], cpu_cr[first_cr], 1);
>  }
>  
>  static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc)
> @@ -663,27 +678,19 @@ static opc_handler_t invalid_handler = {
>  static inline void gen_op_cmp(TCGv arg0, TCGv arg1, int s, int crf)
>  {
>      TCGv t0 = tcg_temp_new();
> -    TCGv_i32 t1 = tcg_temp_new_i32();
>  
> -    tcg_gen_trunc_tl_i32(cpu_crf[crf], cpu_so);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_SO], cpu_so);
>  
>      tcg_gen_setcond_tl((s ? TCG_COND_LT: TCG_COND_LTU), t0, arg0, arg1);
> -    tcg_gen_trunc_tl_i32(t1, t0);
> -    tcg_gen_shli_i32(t1, t1, CRF_LT);
> -    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_LT], t0);
>  
>      tcg_gen_setcond_tl((s ? TCG_COND_GT: TCG_COND_GTU), t0, arg0, arg1);
> -    tcg_gen_trunc_tl_i32(t1, t0);
> -    tcg_gen_shli_i32(t1, t1, CRF_GT);
> -    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_GT], t0);
>  
>      tcg_gen_setcond_tl(TCG_COND_EQ, t0, arg0, arg1);
> -    tcg_gen_trunc_tl_i32(t1, t0);
> -    tcg_gen_shli_i32(t1, t1, CRF_EQ);
> -    tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_EQ], t0);
>  
>      tcg_temp_free(t0);
> -    tcg_temp_free_i32(t1);
>  }
>  
>  static inline void gen_op_cmpi(TCGv arg0, target_ulong arg1, int s, int crf)
> @@ -695,19 +702,26 @@ static inline void gen_op_cmpi(TCGv arg0, target_ulong 
> arg1, int s, int crf)
>  
>  static inline void gen_op_cmp32(TCGv arg0, TCGv arg1, int s, int crf)
>  {
> -    TCGv t0, t1;
> -    t0 = tcg_temp_new();
> -    t1 = tcg_temp_new();
> -    if (s) {
> -        tcg_gen_ext32s_tl(t0, arg0);
> -        tcg_gen_ext32s_tl(t1, arg1);
> -    } else {
> -        tcg_gen_ext32u_tl(t0, arg0);
> -        tcg_gen_ext32u_tl(t1, arg1);
> -    }
> -    gen_op_cmp(t0, t1, s, crf);
> -    tcg_temp_free(t1);
> -    tcg_temp_free(t0);
> +    TCGv_i32 t0, t1;
> +
> +    t0 = tcg_temp_new_i32();
> +    t1 = tcg_temp_new_i32();
> +    tcg_gen_trunc_tl_i32(t0, arg0);
> +    tcg_gen_trunc_tl_i32(t1, arg1);
> +
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_SO], cpu_so);
> +
> +    tcg_gen_setcond_i32((s ? TCG_COND_LT: TCG_COND_LTU), 
> +                        cpu_cr[crf * 4 + CRF_LT], t0, t1);
> +
> +    tcg_gen_setcond_i32((s ? TCG_COND_GT: TCG_COND_GTU), 
> +                        cpu_cr[crf * 4 + CRF_GT], t0, t1);
> +
> +    tcg_gen_setcond_i32(TCG_COND_EQ, 
> +                        cpu_cr[crf * 4 + CRF_EQ], t0, t1);
> +
> +    tcg_temp_free_i32(t1);
> +    tcg_temp_free_i32(t0);
>  }
>  
>  static inline void gen_op_cmpi32(TCGv arg0, target_ulong arg1, int s, int 
> crf)
> @@ -778,15 +792,10 @@ static void gen_cmpli(DisasContext *ctx)
>  static void gen_isel(DisasContext *ctx)
>  {
>      uint32_t bi = rC(ctx->opcode);
> -    uint32_t mask;
> -    TCGv_i32 t0;
>      TCGv t1, true_op, zero;
>  
> -    mask = 0x08 >> (bi & 0x03);
> -    t0 = tcg_temp_new_i32();
> -    tcg_gen_andi_i32(t0, cpu_crf[bi >> 2], mask);
>      t1 = tcg_temp_new();
> -    tcg_gen_extu_i32_tl(t1, t0);
> +    tcg_gen_extu_i32_tl(t1, cpu_cr[bi]);
>      zero = tcg_const_tl(0);
>      if (rA(ctx->opcode) == 0) {
>          true_op = zero;
> @@ -796,7 +805,6 @@ static void gen_isel(DisasContext *ctx)
>      tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr[rD(ctx->opcode)], t1, zero,
>                         true_op, cpu_gpr[rB(ctx->opcode)]);
>      tcg_temp_free(t1);
> -    tcg_temp_free_i32(t0);
>      tcg_temp_free(zero);
>  }
>  
> @@ -2318,21 +2326,29 @@ GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT);
>  
>  static void gen_ftdiv(DisasContext *ctx)
>  {
> +    TCGv_i32 crf;
>      if (unlikely(!ctx->fpu_enabled)) {
>          gen_exception(ctx, POWERPC_EXCP_FPU);
>          return;
>      }
> -    gen_helper_ftdiv(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rA(ctx->opcode)],
> +    crf = tcg_temp_new_i32();
> +    gen_helper_ftdiv(crf, cpu_fpr[rA(ctx->opcode)],
>                       cpu_fpr[rB(ctx->opcode)]);
> +    gen_op_mtcr(crfD(ctx->opcode) << 2, crf, 0);
> +    tcg_temp_free_i32(crf);
>  }
>  
>  static void gen_ftsqrt(DisasContext *ctx)
>  {
> +    TCGv_i32 crf;
>      if (unlikely(!ctx->fpu_enabled)) {
>          gen_exception(ctx, POWERPC_EXCP_FPU);
>          return;
>      }
> -    gen_helper_ftsqrt(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)]);
> +    crf = tcg_temp_new_i32();
> +    gen_helper_ftsqrt(crf, cpu_fpr[rB(ctx->opcode)]);
> +    gen_op_mtcr(crfD(ctx->opcode) << 2, crf, 0);
> +    tcg_temp_free_i32(crf);
>  }
>  
>  
> @@ -3330,10 +3346,13 @@ static void gen_conditional_store(DisasContext *ctx, 
> TCGv EA,
>  {
>      int l1;
>  
> -    tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
> +    tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so);
> +    tcg_gen_movi_i32(cpu_cr[CRF_LT], 0);
> +    tcg_gen_movi_i32(cpu_cr[CRF_EQ], 0);
> +    tcg_gen_movi_i32(cpu_cr[CRF_GT], 0);
>      l1 = gen_new_label();
>      tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, l1);
> -    tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 1 << CRF_EQ);
> +    tcg_gen_movi_i32(cpu_cr[CRF_EQ], 1);
>  #if defined(TARGET_PPC64)
>      if (size == 8) {
>          gen_qemu_st64(ctx, cpu_gpr[reg], EA);
> @@ -3900,17 +3919,11 @@ static inline void gen_bcond(DisasContext *ctx, int 
> type)
>      if ((bo & 0x10) == 0) {
>          /* Test CR */
>          uint32_t bi = BI(ctx->opcode);
> -        uint32_t mask = 0x08 >> (bi & 0x03);
> -        TCGv_i32 temp = tcg_temp_new_i32();
> -
>          if (bo & 0x8) {
> -            tcg_gen_andi_i32(temp, cpu_crf[bi >> 2], mask);
> -            tcg_gen_brcondi_i32(TCG_COND_EQ, temp, 0, l1);
> +            tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[bi], 0, l1);
>          } else {
> -            tcg_gen_andi_i32(temp, cpu_crf[bi >> 2], mask);
> -            tcg_gen_brcondi_i32(TCG_COND_NE, temp, 0, l1);
> +            tcg_gen_brcondi_i32(TCG_COND_NE, cpu_cr[bi], 0, l1);
>          }
> -        tcg_temp_free_i32(temp);
>      }
>      gen_update_cfar(ctx, ctx->nip);
>      if (type == BCOND_IM) {
> @@ -3959,35 +3972,21 @@ static void gen_bctar(DisasContext *ctx)
>  }
>  
>  /***                      Condition register logical                       
> ***/
> -#define GEN_CRLOGIC(name, tcg_op, opc)                                       
>  \
> -static void glue(gen_, name)(DisasContext *ctx)                              
>          \
> -{                                                                            
>  \
> -    uint8_t bitmask;                                                         
>  \
> -    int sh;                                                                  
>  \
> -    TCGv_i32 t0, t1;                                                         
>  \
> -    sh = (crbD(ctx->opcode) & 0x03) - (crbA(ctx->opcode) & 0x03);            
>  \
> -    t0 = tcg_temp_new_i32();                                                 
>  \
> -    if (sh > 0)                                                              
>  \
> -        tcg_gen_shri_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2], sh);           
>  \
> -    else if (sh < 0)                                                         
>  \
> -        tcg_gen_shli_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2], -sh);          
>  \
> -    else                                                                     
>  \
> -        tcg_gen_mov_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2]);                
>  \
> -    t1 = tcg_temp_new_i32();                                                 
>  \
> -    sh = (crbD(ctx->opcode) & 0x03) - (crbB(ctx->opcode) & 0x03);            
>  \
> -    if (sh > 0)                                                              
>  \
> -        tcg_gen_shri_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2], sh);           
>  \
> -    else if (sh < 0)                                                         
>  \
> -        tcg_gen_shli_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2], -sh);          
>  \
> -    else                                                                     
>  \
> -        tcg_gen_mov_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2]);                
>  \
> -    tcg_op(t0, t0, t1);                                                      
>  \
> -    bitmask = 0x08 >> (crbD(ctx->opcode) & 0x03);                            
>  \
> -    tcg_gen_andi_i32(t0, t0, bitmask);                                       
>  \
> -    tcg_gen_andi_i32(t1, cpu_crf[crbD(ctx->opcode) >> 2], ~bitmask);         
>  \
> -    tcg_gen_or_i32(cpu_crf[crbD(ctx->opcode) >> 2], t0, t1);                 
>  \
> -    tcg_temp_free_i32(t0);                                                   
>  \
> -    tcg_temp_free_i32(t1);                                                   
>  \
> +#define GEN_CRLOGIC(name, tcg_op, opc)                                       
>   \
> +static void glue(gen_, name)(DisasContext *ctx)                              
>   \
> +{                                                                            
>   \
> +    tcg_op(cpu_cr[crbD(ctx->opcode)], cpu_cr[crbA(ctx->opcode)],             
>   \
> +           cpu_cr[crbB(ctx->opcode)]);                                       
>   \
> +}
> +
> +#define GEN_CRLOGIC_MASK(name, tcg_op, opc)                                  
>    \
> +static void glue(gen_, name)(DisasContext *ctx)                              
>    \
> +{                                                                            
>    \
> +    TCGv_i32 one = tcg_const_i32(1);                                         
>    \
> +    tcg_op(cpu_cr[crbD(ctx->opcode)], cpu_cr[crbA(ctx->opcode)],             
>    \
> +           cpu_cr[crbB(ctx->opcode)]);                                       
>    \
> +    tcg_gen_and_i32(cpu_cr[crbD(ctx->opcode)], cpu_cr[crbD(ctx->opcode)], 
> one); \
> +    tcg_temp_free_i32(one);                                                  
>    \
>  }
>  
>  /* crand */
> @@ -3995,22 +3994,26 @@ GEN_CRLOGIC(crand, tcg_gen_and_i32, 0x08);
>  /* crandc */
>  GEN_CRLOGIC(crandc, tcg_gen_andc_i32, 0x04);
>  /* creqv */
> -GEN_CRLOGIC(creqv, tcg_gen_eqv_i32, 0x09);
> +GEN_CRLOGIC_MASK(creqv, tcg_gen_eqv_i32, 0x09);
>  /* crnand */
> -GEN_CRLOGIC(crnand, tcg_gen_nand_i32, 0x07);
> +GEN_CRLOGIC_MASK(crnand, tcg_gen_nand_i32, 0x07);
>  /* crnor */
> -GEN_CRLOGIC(crnor, tcg_gen_nor_i32, 0x01);
> +GEN_CRLOGIC_MASK(crnor, tcg_gen_nor_i32, 0x01);
>  /* cror */
>  GEN_CRLOGIC(cror, tcg_gen_or_i32, 0x0E);
>  /* crorc */
> -GEN_CRLOGIC(crorc, tcg_gen_orc_i32, 0x0D);
> +GEN_CRLOGIC_MASK(crorc, tcg_gen_orc_i32, 0x0D);
>  /* crxor */
>  GEN_CRLOGIC(crxor, tcg_gen_xor_i32, 0x06);
>  
>  /* mcrf */
>  static void gen_mcrf(DisasContext *ctx)
>  {
> -    tcg_gen_mov_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfS(ctx->opcode)]);
> +    int i;
> +    for (i = 0; i < 4; i++) {
> +        tcg_gen_mov_i32(cpu_cr[crfD(ctx->opcode) * 4 + i],
> +                        cpu_cr[crfS(ctx->opcode) * 4 + i]);
> +    }
>  }
>  
>  /***                           System linkage                              
> ***/
> @@ -4163,20 +4166,12 @@ static void gen_write_xer(TCGv src)
>  /* mcrxr */
>  static void gen_mcrxr(DisasContext *ctx)
>  {
> -    TCGv_i32 t0 = tcg_temp_new_i32();
> -    TCGv_i32 t1 = tcg_temp_new_i32();
> -    TCGv_i32 dst = cpu_crf[crfD(ctx->opcode)];
> -
> -    tcg_gen_trunc_tl_i32(t0, cpu_so);
> -    tcg_gen_trunc_tl_i32(t1, cpu_ov);
> -    tcg_gen_trunc_tl_i32(dst, cpu_ca);
> -    tcg_gen_shli_i32(t0, t0, 3);
> -    tcg_gen_shli_i32(t1, t1, 2);
> -    tcg_gen_shli_i32(dst, dst, 1);
> -    tcg_gen_or_i32(dst, dst, t0);
> -    tcg_gen_or_i32(dst, dst, t1);
> -    tcg_temp_free_i32(t0);
> -    tcg_temp_free_i32(t1);
> +    int crf = crfD(ctx->opcode);
> +
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_LT], cpu_so);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_GT], cpu_ov);
> +    tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_EQ], cpu_ca);
> +    tcg_gen_movi_i32(cpu_cr[crf * 4 + CRF_SO], 0);
>  
>      tcg_gen_movi_tl(cpu_so, 0);
>      tcg_gen_movi_tl(cpu_ov, 0);
> @@ -6351,11 +6346,13 @@ static void gen_tlbsx_40x(DisasContext *ctx)
>      gen_helper_4xx_tlbsx(cpu_gpr[rD(ctx->opcode)], cpu_env, t0);
>      tcg_temp_free(t0);
>      if (Rc(ctx->opcode)) {
> -        int l1 = gen_new_label();
> -        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
> -        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rD(ctx->opcode)], -1, l1);
> -        tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 0x02);
> -        gen_set_label(l1);
> +        t0 = tcg_temp_new();
> +        tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so);
> +        tcg_gen_movi_i32(cpu_cr[CRF_LT], 0);
> +        tcg_gen_movi_i32(cpu_cr[CRF_GT], 0);
> +        tcg_gen_setcondi_tl(TCG_COND_EQ, t0, cpu_gpr[rD(ctx->opcode)], -1);
> +        tcg_gen_trunc_tl_i32(cpu_cr[CRF_EQ], t0);
> +        tcg_temp_free(t0);
>      }
>  #endif
>  }
> @@ -6432,11 +6429,13 @@ static void gen_tlbsx_440(DisasContext *ctx)
>      gen_helper_440_tlbsx(cpu_gpr[rD(ctx->opcode)], cpu_env, t0);
>      tcg_temp_free(t0);
>      if (Rc(ctx->opcode)) {
> -        int l1 = gen_new_label();
> -        tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
> -        tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rD(ctx->opcode)], -1, l1);
> -        tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 0x02);
> -        gen_set_label(l1);
> +        t0 = tcg_temp_new();
> +        tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so);
> +        tcg_gen_movi_i32(cpu_cr[CRF_LT], 0);
> +        tcg_gen_movi_i32(cpu_cr[CRF_GT], 0);
> +        tcg_gen_setcondi_tl(TCG_COND_EQ, t0, cpu_gpr[rD(ctx->opcode)], -1);
> +        tcg_gen_trunc_tl_i32(cpu_cr[CRF_EQ], t0);
> +        tcg_temp_free(t0);
>      }
>  #endif
>  }
> @@ -7402,7 +7401,7 @@ GEN_VXFORM(vpmsumd, 4, 19)
>  static void gen_##op(DisasContext *ctx)             \
>  {                                                   \
>      TCGv_ptr ra, rb, rd;                            \
> -    TCGv_i32 ps;                                    \
> +    TCGv_i32 ps, crf;                               \
>                                                      \
>      if (unlikely(!ctx->altivec_enabled)) {          \
>          gen_exception(ctx, POWERPC_EXCP_VPU);       \
> @@ -7414,13 +7413,16 @@ static void gen_##op(DisasContext *ctx)             \
>      rd = gen_avr_ptr(rD(ctx->opcode));              \
>                                                      \
>      ps = tcg_const_i32((ctx->opcode & 0x200) != 0); \
> +    crf = tcg_temp_new_i32();                       \
>                                                      \
> -    gen_helper_##op(cpu_crf[6], rd, ra, rb, ps);    \
> +    gen_helper_##op(crf, rd, ra, rb, ps);           \
> +    gen_op_mtcr(6 << 2, crf, 0);                    \
>                                                      \
>      tcg_temp_free_ptr(ra);                          \
>      tcg_temp_free_ptr(rb);                          \
>      tcg_temp_free_ptr(rd);                          \
>      tcg_temp_free_i32(ps);                          \
> +    tcg_temp_free_i32(crf);                         \
>  }
>  
>  GEN_BCD(bcdadd)
> @@ -8248,6 +8250,7 @@ static void gen_##name(DisasContext *ctx)        \
>  static void gen_##name(DisasContext *ctx)         \
>  {                                                 \
>      TCGv_ptr ra, rb;                              \
> +    TCGv_i32 tmp;                                 \
>      if (unlikely(!ctx->fpu_enabled)) {            \
>          gen_exception(ctx, POWERPC_EXCP_FPU);     \
>          return;                                   \
> @@ -8255,8 +8258,10 @@ static void gen_##name(DisasContext *ctx)         \
>      gen_update_nip(ctx, ctx->nip - 4);            \
>      ra = gen_fprp_ptr(rA(ctx->opcode));           \
>      rb = gen_fprp_ptr(rB(ctx->opcode));           \
> -    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
> -                      cpu_env, ra, rb);           \
> +    tmp = tcg_temp_new_i32();                     \
> +    gen_helper_##name(tmp, cpu_env, ra, rb);      \
> +    gen_op_mtcr(crfD(ctx->opcode) << 2, tmp, 0);  \
> +    tcg_temp_free_i32(tmp);                       \
>      tcg_temp_free_ptr(ra);                        \
>      tcg_temp_free_ptr(rb);                        \
>  }
> @@ -8265,7 +8270,7 @@ static void gen_##name(DisasContext *ctx)         \
>  static void gen_##name(DisasContext *ctx)         \
>  {                                                 \
>      TCGv_ptr ra;                                  \
> -    TCGv_i32 dcm;                                 \
> +    TCGv_i32 dcm, tmp;                            \
>      if (unlikely(!ctx->fpu_enabled)) {            \
>          gen_exception(ctx, POWERPC_EXCP_FPU);     \
>          return;                                   \
> @@ -8273,8 +8278,10 @@ static void gen_##name(DisasContext *ctx)         \
>      gen_update_nip(ctx, ctx->nip - 4);            \
>      ra = gen_fprp_ptr(rA(ctx->opcode));           \
>      dcm = tcg_const_i32(DCM(ctx->opcode));        \
> -    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
> -                      cpu_env, ra, dcm);          \
> +    tmp = tcg_temp_new_i32();                     \
> +    gen_helper_##name(tmp, cpu_env, ra, dcm);     \
> +    gen_op_mtcr(crfD(ctx->opcode) << 2, tmp, 0);  \
> +    tcg_temp_free_i32(tmp);                       \
>      tcg_temp_free_ptr(ra);                        \
>      tcg_temp_free_i32(dcm);                       \
>  }
> @@ -8699,37 +8706,32 @@ GEN_SPEOP_ARITH_IMM2(evsubifw, tcg_gen_subi_i32);
>  #define GEN_SPEOP_COMP(name, tcg_cond)                                       
>  \
>  static inline void gen_##name(DisasContext *ctx)                             
>  \
>  {                                                                            
>  \
> +    TCGv tmp = tcg_temp_new();                                               
>  \
> +                                                                             
>  \
>      if (unlikely(!ctx->spe_enabled)) {                                       
>  \
>          gen_exception(ctx, POWERPC_EXCP_SPEU);                               
>  \
>          return;                                                              
>  \
>      }                                                                        
>  \
> -    int l1 = gen_new_label();                                                
>  \
> -    int l2 = gen_new_label();                                                
>  \
> -    int l3 = gen_new_label();                                                
>  \
> -    int l4 = gen_new_label();                                                
>  \
>                                                                               
>  \
>      tcg_gen_ext32s_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);   
>  \
>      tcg_gen_ext32s_tl(cpu_gpr[rB(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);   
>  \
>      tcg_gen_ext32s_tl(cpu_gprh[rA(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]); 
>  \
>      tcg_gen_ext32s_tl(cpu_gprh[rB(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]); 
>  \
>                                                                               
>  \
> -    tcg_gen_brcond_tl(tcg_cond, cpu_gpr[rA(ctx->opcode)],                    
>  \
> -                       cpu_gpr[rB(ctx->opcode)], l1);                        
>  \
> -    tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 0);                         
>  \
> -    tcg_gen_br(l2);                                                          
>  \
> -    gen_set_label(l1);                                                       
>  \
> -    tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)],                             
>  \
> -                     CRF_CL | CRF_CH_OR_CL | CRF_CH_AND_CL);                 
>  \
> -    gen_set_label(l2);                                                       
>  \
> -    tcg_gen_brcond_tl(tcg_cond, cpu_gprh[rA(ctx->opcode)],                   
>  \
> -                       cpu_gprh[rB(ctx->opcode)], l3);                       
>  \
> -    tcg_gen_andi_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)], 
>  \
> -                     ~(CRF_CH | CRF_CH_AND_CL));                             
>  \
> -    tcg_gen_br(l4);                                                          
>  \
> -    gen_set_label(l3);                                                       
>  \
> -    tcg_gen_ori_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)],  
>  \
> -                    CRF_CH | CRF_CH_OR_CL);                                  
>  \
> -    gen_set_label(l4);                                                       
>  \
> +    tcg_gen_setcond_tl(tcg_cond, tmp,                                        
>  \
> +                       cpu_gpr[rA(ctx->opcode)],                             
>  \
> +                       cpu_gpr[rB(ctx->opcode)]);                            
>  \
> +    tcg_gen_trunc_tl_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL], tmp);       
>  \
> +    tcg_gen_setcond_tl(tcg_cond, tmp,                                        
>  \
> +                       cpu_gprh[rA(ctx->opcode)],                            
>  \
> +                       cpu_gprh[rB(ctx->opcode)]);                           
>  \
> +    tcg_gen_trunc_tl_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], tmp);       
>  \
> +    tcg_gen_or_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_OR_CL],             
>  \
> +                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                   
>  \
> +                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                  
>  \
> +    tcg_gen_and_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_AND_CL],           
>  \
> +                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                  
>  \
> +                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                 
>  \
>  }
>  GEN_SPEOP_COMP(evcmpgtu, TCG_COND_GTU);
>  GEN_SPEOP_COMP(evcmpgts, TCG_COND_GT);
> @@ -8800,22 +8802,20 @@ static inline void gen_evsel(DisasContext *ctx)
>      int l2 = gen_new_label();
>      int l3 = gen_new_label();
>      int l4 = gen_new_label();
> -    TCGv_i32 t0 = tcg_temp_local_new_i32();
> -    tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 3);
> -    tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
> +
> +    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[(ctx->opcode & 0x07) * 4], 0, 
> l1);
>      tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);
>      tcg_gen_br(l2);
>      gen_set_label(l1);
>      tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);
>      gen_set_label(l2);
> -    tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 2);
> -    tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l3);
> +
> +    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[(ctx->opcode & 0x07) * 4 + 1], 
> 0, l3);
>      tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
>      tcg_gen_br(l4);
>      gen_set_label(l3);
>      tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
>      gen_set_label(l4);
> -    tcg_temp_free_i32(t0);
>  }
>  
>  static void gen_evsel0(DisasContext *ctx)
> @@ -9397,9 +9397,12 @@ static inline void gen_##name(DisasContext *ctx)       
>                        \
>      t0 = tcg_temp_new_i32();                                                 
>  \
>      t1 = tcg_temp_new_i32();                                                 
>  \
>                                                                               
>  \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_LT], 0);             
>  \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_GT], 0);             
>  \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_SO], 0);             
>  \
>      tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);                      
>  \
>      tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);                      
>  \
> -    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], cpu_env, t0, t1);          
>  \
> +    gen_helper_##name(cpu_cr[crfD(ctx->opcode) * 4 + CRF_EQ], cpu_env, t0, 
> t1); \
>                                                                               
>  \
>      tcg_temp_free_i32(t0);                                                   
>  \
>      tcg_temp_free_i32(t1);                                                   
>  \
> @@ -9416,10 +9419,39 @@ static inline void gen_##name(DisasContext *ctx)      
>                         \
>      t1 = tcg_temp_new_i64();                                                 
>  \
>      gen_load_gpr64(t0, rA(ctx->opcode));                                     
>  \
>      gen_load_gpr64(t1, rB(ctx->opcode));                                     
>  \
> -    gen_helper_##name(cpu_crf[crfD(ctx->opcode)], cpu_env, t0, t1);          
>  \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_LT], 0);             
>  \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_GT], 0);             
>  \
> +    tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_SO], 0);             
>  \
> +    gen_helper_##name(cpu_cr[crfD(ctx->opcode) * 4 + CRF_EQ], cpu_env,       
>  \
> +                      t0, t1);                                               
>  \
>      tcg_temp_free_i64(t0);                                                   
>  \
>      tcg_temp_free_i64(t1);                                                   
>  \
>  }
> +#define GEN_SPEFPUOP_COMP_V64(name, helper)                                  
>      \
> +static inline void gen_##name(DisasContext *ctx)                             
>      \
> +{                                                                            
>      \
> +    TCGv_i32 t0, t1;                                                         
>      \
> +    if (unlikely(!ctx->spe_enabled)) {                                       
>      \
> +        gen_exception(ctx, POWERPC_EXCP_SPEU);                               
>      \
> +        return;                                                              
>      \
> +    }                                                                        
>      \
> +    t0 = tcg_temp_new_i32();                                                 
>      \
> +    t1 = tcg_temp_new_i32();                                                 
>      \
> +    tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);                      
>      \
> +    tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);                      
>      \
> +    gen_helper_##helper(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL], cpu_env, t0, 
> t1); \
> +    tcg_gen_trunc_tl_i32(t0, cpu_gprh[rA(ctx->opcode)]);                     
>      \
> +    tcg_gen_trunc_tl_i32(t1, cpu_gprh[rB(ctx->opcode)]);                     
>      \
> +    gen_helper_##helper(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], cpu_env, t0, 
> t1); \
> +    tcg_temp_free_i32(t0);                                                   
>      \
> +    tcg_temp_free_i32(t1);                                                   
>      \
> +    tcg_gen_or_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_OR_CL],             
>      \
> +                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                   
>      \
> +                   cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                  
>      \
> +    tcg_gen_and_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_AND_CL],           
>      \
> +                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH],                  
>      \
> +                    cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]);                 
>      \
> +}
>  
>  /* Single precision floating-point vectors operations */
>  /* Arithmetic */
> @@ -9474,12 +9506,12 @@ GEN_SPEFPUOP_CONV_64_64(evfsctuiz);
>  GEN_SPEFPUOP_CONV_64_64(evfsctsiz);
>  
>  /* Comparison */
> -GEN_SPEFPUOP_COMP_64(evfscmpgt);
> -GEN_SPEFPUOP_COMP_64(evfscmplt);
> -GEN_SPEFPUOP_COMP_64(evfscmpeq);
> -GEN_SPEFPUOP_COMP_64(evfststgt);
> -GEN_SPEFPUOP_COMP_64(evfststlt);
> -GEN_SPEFPUOP_COMP_64(evfststeq);
> +GEN_SPEFPUOP_COMP_V64(evfscmpgt, efscmpgt);
> +GEN_SPEFPUOP_COMP_V64(evfscmplt, efscmplt);
> +GEN_SPEFPUOP_COMP_V64(evfscmpeq, efscmpeq);
> +GEN_SPEFPUOP_COMP_V64(evfststgt, efststgt);
> +GEN_SPEFPUOP_COMP_V64(evfststlt, efststlt);
> +GEN_SPEFPUOP_COMP_V64(evfststeq, efststeq);
>  
>  /* Opcodes definitions */
>  GEN_SPE(evfsadd,   evfssub,   0x00, 0x0A, 0x00000000, 0x00000000, 
> PPC_SPE_SINGLE); //
> 

There are some other places in helper where env->crf[*] was still being set.  
Here are the ones that I found:

diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 3f656e5..e624f97 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -2141,7 +2141,8 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)       
              \
         }                                                               \
     }                                                                   \
                                                                         \
-    env->crf[BF(opcode)] = 0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0); \
+    ppc_set_crf(env, BF(opcode),                                        \
+                0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0));           \
 }

 VSX_TDIV(xstdivdp, 1, float64, VsrD(0), -1022, 1023, 52)
@@ -2195,7 +2196,8 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)       
              \
         }                                                               \
     }                                                                   \
                                                                         \
-    env->crf[BF(opcode)] = 0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0); \
+    ppc_set_crf(env, BF(opcode),                                        \
+                0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0));           \
 }

 VSX_TSQRT(xstsqrtdp, 1, float64, VsrD(0), -1022, 52)
@@ -2358,7 +2360,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)       
               \
                                                                          \
     env->fpscr &= ~(0x0F << FPSCR_FPRF);                                 \
     env->fpscr |= cc << FPSCR_FPRF;                                      \
-    env->crf[BF(opcode)] = cc;                                           \
+    ppc_set_crf(env, BF(opcode), cc);                                   \
                                                                          \
     helper_float_check_status(env);                                      \
 }
@@ -2450,7 +2452,8 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)       
                \
                                                                           \
     putVSR(xT(opcode), &xt, env);                                         \
     if ((opcode >> (31-21)) & 1) {                                        \
-        env->crf[6] = (all_true ? 0x8 : 0) | (all_false ? 0x2 : 0);       \
+        ppc_set_crf(env, 6,                                               \
+                    (all_true ? 0x8 : 0) | (all_false ? 0x2 : 0));        \
     }                                                                     \
     helper_float_check_status(env);                                       \
  }



Note that I do not have the capability of testing any of the SPE instructions.




reply via email to

[Prev in Thread] Current Thread [Next in Thread]