qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH v2 03/26] tcg-ppc64: Move functions around


From: Richard Henderson
Subject: [Qemu-devel] [PATCH v2 03/26] tcg-ppc64: Move functions around
Date: Tue, 27 May 2014 14:26:12 -0700

Code movement only.  This will allow us to make use of the
other tcg_out_* functions in tidying their implementations.

Signed-off-by: Richard Henderson <address@hidden>
---
 tcg/ppc64/tcg-target.c | 1178 ++++++++++++++++++++++++------------------------
 1 file changed, 589 insertions(+), 589 deletions(-)

diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 2f60924..951a392 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -710,39 +710,6 @@ static void tcg_out_b(TCGContext *s, int mask, 
tcg_insn_unit *target)
     }
 }
 
-static void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
-{
-#ifdef __APPLE__
-    tcg_out_b(s, LK, target);
-#else
-    /* Look through the descriptor.  If the branch is in range, and we
-       don't have to spend too much effort on building the toc.  */
-    void *tgt = ((void **)target)[0];
-    uintptr_t toc = ((uintptr_t *)target)[1];
-    intptr_t diff = tcg_pcrel_diff(s, tgt);
-
-    if (in_range_b(diff) && toc == (uint32_t)toc) {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, toc);
-        tcg_out_b(s, LK, tgt);
-    } else {
-        /* Fold the low bits of the constant into the addresses below.  */
-        intptr_t arg = (intptr_t)target;
-        int ofs = (int16_t)arg;
-
-        if (ofs + 8 < 0x8000) {
-            arg -= ofs;
-        } else {
-            ofs = 0;
-        }
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg);
-        tcg_out32(s, LD | TAI(TCG_REG_R0, TCG_REG_R2, ofs));
-        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
-        tcg_out32(s, LD | TAI(TCG_REG_R2, TCG_REG_R2, ofs + 8));
-        tcg_out32(s, BCCTR | BO_ALWAYS | LK);
-    }
-#endif
-}
-
 static void tcg_out_mem_long(TCGContext *s, int opi, int opx, TCGReg rt,
                              TCGReg base, tcg_target_long offset)
 {
@@ -795,680 +762,713 @@ static void tcg_out_mem_long(TCGContext *s, int opi, 
int opx, TCGReg rt,
     }
 }
 
-static const uint32_t qemu_ldx_opc[16] = {
-    [MO_UB] = LBZX,
-    [MO_UW] = LHZX,
-    [MO_UL] = LWZX,
-    [MO_Q]  = LDX,
-    [MO_SW] = LHAX,
-    [MO_SL] = LWAX,
-    [MO_BSWAP | MO_UB] = LBZX,
-    [MO_BSWAP | MO_UW] = LHBRX,
-    [MO_BSWAP | MO_UL] = LWBRX,
-    [MO_BSWAP | MO_Q]  = LDBRX,
-};
-
-static const uint32_t qemu_stx_opc[16] = {
-    [MO_UB] = STBX,
-    [MO_UW] = STHX,
-    [MO_UL] = STWX,
-    [MO_Q]  = STDX,
-    [MO_BSWAP | MO_UB] = STBX,
-    [MO_BSWAP | MO_UW] = STHBRX,
-    [MO_BSWAP | MO_UL] = STWBRX,
-    [MO_BSWAP | MO_Q]  = STDBRX,
-};
-
-static const uint32_t qemu_exts_opc[4] = {
-    EXTSB, EXTSH, EXTSW, 0
-};
-
-#if defined (CONFIG_SOFTMMU)
-/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
- *                                 int mmu_idx, uintptr_t ra)
- */
-static void * const qemu_ld_helpers[16] = {
-    [MO_UB]   = helper_ret_ldub_mmu,
-    [MO_LEUW] = helper_le_lduw_mmu,
-    [MO_LEUL] = helper_le_ldul_mmu,
-    [MO_LEQ]  = helper_le_ldq_mmu,
-    [MO_BEUW] = helper_be_lduw_mmu,
-    [MO_BEUL] = helper_be_ldul_mmu,
-    [MO_BEQ]  = helper_be_ldq_mmu,
-};
-
-/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
- *                                 uintxx_t val, int mmu_idx, uintptr_t ra)
- */
-static void * const qemu_st_helpers[16] = {
-    [MO_UB]   = helper_ret_stb_mmu,
-    [MO_LEUW] = helper_le_stw_mmu,
-    [MO_LEUL] = helper_le_stl_mmu,
-    [MO_LEQ]  = helper_le_stq_mmu,
-    [MO_BEUW] = helper_be_stw_mmu,
-    [MO_BEUL] = helper_be_stl_mmu,
-    [MO_BEQ]  = helper_be_stq_mmu,
-};
+static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
+                              TCGReg arg1, intptr_t arg2)
+{
+    int opi, opx;
 
-/* Perform the TLB load and compare.  Places the result of the comparison
-   in CR7, loads the addend of the TLB into R3, and returns the register
-   containing the guest address (zero-extended into R4).  Clobbers R0 and R2. 
*/
+    if (type == TCG_TYPE_I32) {
+        opi = LWZ, opx = LWZX;
+    } else {
+        opi = LD, opx = LDX;
+    }
+    tcg_out_mem_long(s, opi, opx, ret, arg1, arg2);
+}
 
-static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits, TCGReg addr_reg,
-                               int mem_index, bool is_read)
+static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+                              TCGReg arg1, intptr_t arg2)
 {
-    int cmp_off
-        = (is_read
-           ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
-           : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
-    int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
-    TCGReg base = TCG_AREG0;
+    int opi, opx;
 
-    /* Extract the page index, shifted into place for tlb index.  */
-    if (TARGET_LONG_BITS == 32) {
-        /* Zero-extend the address into a place helpful for further use.  */
-        tcg_out_ext32u(s, TCG_REG_R4, addr_reg);
-        addr_reg = TCG_REG_R4;
+    if (type == TCG_TYPE_I32) {
+        opi = STW, opx = STWX;
     } else {
-        tcg_out_rld(s, RLDICL, TCG_REG_R3, addr_reg,
-                    64 - TARGET_PAGE_BITS, 64 - CPU_TLB_BITS);
+        opi = STD, opx = STDX;
     }
+    tcg_out_mem_long(s, opi, opx, arg, arg1, arg2);
+}
 
-    /* Compensate for very large offsets.  */
-    if (add_off >= 0x8000) {
-        /* Most target env are smaller than 32k; none are larger than 64k.
-           Simplify the logic here merely to offset by 0x7ff0, giving us a
-           range just shy of 64k.  Check this assumption.  */
-        QEMU_BUILD_BUG_ON(offsetof(CPUArchState,
-                                   tlb_table[NB_MMU_MODES - 1][1])
-                          > 0x7ff0 + 0x7fff);
-        tcg_out32(s, ADDI | TAI(TCG_REG_R2, base, 0x7ff0));
-        base = TCG_REG_R2;
-        cmp_off -= 0x7ff0;
-        add_off -= 0x7ff0;
-    }
+static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
+                        int const_arg2, int cr, TCGType type)
+{
+    int imm;
+    uint32_t op;
 
-    /* Extraction and shifting, part 2.  */
-    if (TARGET_LONG_BITS == 32) {
-        tcg_out_rlw(s, RLWINM, TCG_REG_R3, addr_reg,
-                    32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS),
-                    32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS),
-                    31 - CPU_TLB_ENTRY_BITS);
-    } else {
-        tcg_out_shli64(s, TCG_REG_R3, TCG_REG_R3, CPU_TLB_ENTRY_BITS);
+    /* Simplify the comparisons below wrt CMPI.  */
+    if (type == TCG_TYPE_I32) {
+        arg2 = (int32_t)arg2;
     }
 
-    tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, base));
+    switch (cond) {
+    case TCG_COND_EQ:
+    case TCG_COND_NE:
+        if (const_arg2) {
+            if ((int16_t) arg2 == arg2) {
+                op = CMPI;
+                imm = 1;
+                break;
+            } else if ((uint16_t) arg2 == arg2) {
+                op = CMPLI;
+                imm = 1;
+                break;
+            }
+        }
+        op = CMPL;
+        imm = 0;
+        break;
 
-    /* Load the tlb comparator.  */
-    tcg_out32(s, LD_ADDR | TAI(TCG_REG_R2, TCG_REG_R3, cmp_off));
+    case TCG_COND_LT:
+    case TCG_COND_GE:
+    case TCG_COND_LE:
+    case TCG_COND_GT:
+        if (const_arg2) {
+            if ((int16_t) arg2 == arg2) {
+                op = CMPI;
+                imm = 1;
+                break;
+            }
+        }
+        op = CMP;
+        imm = 0;
+        break;
 
-    /* Load the TLB addend for use on the fast path.  Do this asap
-       to minimize any load use delay.  */
-    tcg_out32(s, LD | TAI(TCG_REG_R3, TCG_REG_R3, add_off));
+    case TCG_COND_LTU:
+    case TCG_COND_GEU:
+    case TCG_COND_LEU:
+    case TCG_COND_GTU:
+        if (const_arg2) {
+            if ((uint16_t) arg2 == arg2) {
+                op = CMPLI;
+                imm = 1;
+                break;
+            }
+        }
+        op = CMPL;
+        imm = 0;
+        break;
 
-    /* Clear the non-page, non-alignment bits from the address.  */
-    if (TARGET_LONG_BITS == 32) {
-        tcg_out_rlw(s, RLWINM, TCG_REG_R0, addr_reg, 0,
-                    (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS);
-    } else if (!s_bits) {
-        tcg_out_rld(s, RLDICR, TCG_REG_R0, addr_reg, 0, 63 - TARGET_PAGE_BITS);
-    } else {
-        tcg_out_rld(s, RLDICL, TCG_REG_R0, addr_reg,
-                    64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits);
-        tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
+    default:
+        tcg_abort();
     }
+    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
 
-    tcg_out32(s, CMP | BF(7) | RA(TCG_REG_R0) | RB(TCG_REG_R2) | CMP_L);
-
-    return addr_reg;
+    if (imm) {
+        tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
+    } else {
+        if (const_arg2) {
+            tcg_out_movi(s, type, TCG_REG_R0, arg2);
+            arg2 = TCG_REG_R0;
+        }
+        tcg_out32(s, op | RA(arg1) | RB(arg2));
+    }
 }
 
-/* Record the context of a call to the out of line helper code for the slow
-   path for a load or store, so that we can later generate the correct
-   helper code.  */
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc,
-                                int data_reg, int addr_reg, int mem_index,
-                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
+static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
+                                TCGReg dst, TCGReg src)
 {
-    TCGLabelQemuLdst *label = new_ldst_label(s);
-
-    label->is_ld = is_ld;
-    label->opc = opc;
-    label->datalo_reg = data_reg;
-    label->addrlo_reg = addr_reg;
-    label->mem_index = mem_index;
-    label->raddr = raddr;
-    label->label_ptr[0] = label_ptr;
+    tcg_out32(s, (type == TCG_TYPE_I64 ? CNTLZD : CNTLZW) | RS(src) | RA(dst));
+    tcg_out_shri64(s, dst, dst, type == TCG_TYPE_I64 ? 6 : 5);
 }
 
-static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src)
 {
-    TCGMemOp opc = lb->opc;
-
-    reloc_pc14(lb->label_ptr[0], s->code_ptr);
-
-    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0);
-
-    /* If the address needed to be zero-extended, we'll have already
-       placed it in R4.  The only remaining case is 64-bit guest.  */
-    tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, lb->addrlo_reg);
-
-    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, lb->mem_index);
-    tcg_out32(s, MFSPR | RT(TCG_REG_R6) | LR);
-
-    tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]);
-
-    if (opc & MO_SIGN) {
-        uint32_t insn = qemu_exts_opc[opc & MO_SIZE];
-        tcg_out32(s, insn | RA(lb->datalo_reg) | RS(TCG_REG_R3));
+    /* X != 0 implies X + -1 generates a carry.  Extra addition
+       trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.  */
+    if (dst != src) {
+        tcg_out32(s, ADDIC | TAI(dst, src, -1));
+        tcg_out32(s, SUBFE | TAB(dst, dst, src));
     } else {
-        tcg_out_mov(s, TCG_TYPE_I64, lb->datalo_reg, TCG_REG_R3);
+        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
+        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
     }
+}
 
-    tcg_out_b(s, 0, lb->raddr);
+static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
+                                  bool const_arg2)
+{
+    if (const_arg2) {
+        if ((uint32_t)arg2 == arg2) {
+            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
+        } else {
+            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
+            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
+        }
+    } else {
+        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
+    }
+    return TCG_REG_R0;
 }
 
-static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
+                            TCGArg arg0, TCGArg arg1, TCGArg arg2,
+                            int const_arg2)
 {
-    TCGMemOp opc = lb->opc;
-    TCGMemOp s_bits = opc & MO_SIZE;
+    int crop, sh;
 
-    reloc_pc14(lb->label_ptr[0], s->code_ptr);
+    /* Ignore high bits of a potential constant arg2.  */
+    if (type == TCG_TYPE_I32) {
+        arg2 = (uint32_t)arg2;
+    }
 
-    tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, TCG_AREG0);
+    /* Handle common and trivial cases before handling anything else.  */
+    if (arg2 == 0) {
+        switch (cond) {
+        case TCG_COND_EQ:
+            tcg_out_setcond_eq0(s, type, arg0, arg1);
+            return;
+        case TCG_COND_NE:
+            if (type == TCG_TYPE_I32) {
+                tcg_out_ext32u(s, TCG_REG_R0, arg1);
+                arg1 = TCG_REG_R0;
+            }
+            tcg_out_setcond_ne0(s, arg0, arg1);
+            return;
+        case TCG_COND_GE:
+            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
+            arg1 = arg0;
+            /* FALLTHRU */
+        case TCG_COND_LT:
+            /* Extract the sign bit.  */
+            tcg_out_rld(s, RLDICL, arg0, arg1,
+                        type == TCG_TYPE_I64 ? 1 : 33, 63);
+            return;
+        default:
+            break;
+        }
+    }
 
-    /* If the address needed to be zero-extended, we'll have already
-       placed it in R4.  The only remaining case is 64-bit guest.  */
-    tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, lb->addrlo_reg);
+    /* If we have ISEL, we can implement everything with 3 or 4 insns.
+       All other cases below are also at least 3 insns, so speed up the
+       code generator by not considering them and always using ISEL.  */
+    if (HAVE_ISEL) {
+        int isel, tab;
 
-    tcg_out_rld(s, RLDICL, TCG_REG_R5, lb->datalo_reg,
-                0, 64 - (1 << (3 + s_bits)));
-    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R6, lb->mem_index);
-    tcg_out32(s, MFSPR | RT(TCG_REG_R7) | LR);
+        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
 
-    tcg_out_call(s, qemu_st_helpers[opc]);
+        isel = tcg_to_isel[cond];
 
-    tcg_out_b(s, 0, lb->raddr);
-}
-#endif /* SOFTMMU */
+        tcg_out_movi(s, type, arg0, 1);
+        if (isel & 1) {
+            /* arg0 = (bc ? 0 : 1) */
+            tab = TAB(arg0, 0, arg0);
+            isel &= ~1;
+        } else {
+            /* arg0 = (bc ? 1 : 0) */
+            tcg_out_movi(s, type, TCG_REG_R0, 0);
+            tab = TAB(arg0, arg0, TCG_REG_R0);
+        }
+        tcg_out32(s, isel | tab);
+        return;
+    }
 
-static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
-                            TCGMemOp opc, int mem_index)
-{
-    TCGReg rbase;
-    uint32_t insn;
-    TCGMemOp s_bits = opc & MO_SIZE;
-#ifdef CONFIG_SOFTMMU
-    tcg_insn_unit *label_ptr;
-#endif
+    switch (cond) {
+    case TCG_COND_EQ:
+        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
+        tcg_out_setcond_eq0(s, type, arg0, arg1);
+        return;
 
-#ifdef CONFIG_SOFTMMU
-    addr_reg = tcg_out_tlb_read(s, s_bits, addr_reg, mem_index, true);
+    case TCG_COND_NE:
+        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
+        /* Discard the high bits only once, rather than both inputs.  */
+        if (type == TCG_TYPE_I32) {
+            tcg_out_ext32u(s, TCG_REG_R0, arg1);
+            arg1 = TCG_REG_R0;
+        }
+        tcg_out_setcond_ne0(s, arg0, arg1);
+        return;
 
-    /* Load a pointer into the current opcode w/conditional branch-link. */
-    label_ptr = s->code_ptr;
-    tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
+    case TCG_COND_GT:
+    case TCG_COND_GTU:
+        sh = 30;
+        crop = 0;
+        goto crtest;
 
-    rbase = TCG_REG_R3;
-#else  /* !CONFIG_SOFTMMU */
-    rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
-    if (TARGET_LONG_BITS == 32) {
-        tcg_out_ext32u(s, TCG_REG_R2, addr_reg);
-        addr_reg = TCG_REG_R2;
-    }
-#endif
+    case TCG_COND_LT:
+    case TCG_COND_LTU:
+        sh = 29;
+        crop = 0;
+        goto crtest;
 
-    insn = qemu_ldx_opc[opc];
-    if (!HAVE_ISA_2_06 && insn == LDBRX) {
-        tcg_out32(s, ADDI | TAI(TCG_REG_R0, addr_reg, 4));
-        tcg_out32(s, LWBRX | TAB(data_reg, rbase, addr_reg));
-        tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0));
-        tcg_out_rld(s, RLDIMI, data_reg, TCG_REG_R0, 32, 0);
-    } else if (insn) {
-        tcg_out32(s, insn | TAB(data_reg, rbase, addr_reg));
-    } else {
-        insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
-        tcg_out32(s, insn | TAB(data_reg, rbase, addr_reg));
-        insn = qemu_exts_opc[s_bits];
-        tcg_out32(s, insn | RA(data_reg) | RS(data_reg));
-    }
+    case TCG_COND_GE:
+    case TCG_COND_GEU:
+        sh = 31;
+        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT);
+        goto crtest;
 
-#ifdef CONFIG_SOFTMMU
-    add_qemu_ldst_label(s, true, opc, data_reg, addr_reg, mem_index,
-                        s->code_ptr, label_ptr);
-#endif
+    case TCG_COND_LE:
+    case TCG_COND_LEU:
+        sh = 31;
+        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT);
+    crtest:
+        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
+        if (crop) {
+            tcg_out32(s, crop);
+        }
+        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
+        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
+        break;
+
+    default:
+        tcg_abort();
+    }
 }
 
-static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
-                            TCGMemOp opc, int mem_index)
+static void tcg_out_bc(TCGContext *s, int bc, int label_index)
 {
-    TCGReg rbase;
-    uint32_t insn;
-#ifdef CONFIG_SOFTMMU
-    tcg_insn_unit *label_ptr;
-#endif
-
-#ifdef CONFIG_SOFTMMU
-    addr_reg = tcg_out_tlb_read(s, opc & MO_SIZE, addr_reg, mem_index, false);
-
-    /* Load a pointer into the current opcode w/conditional branch-link. */
-    label_ptr = s->code_ptr;
-    tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
-
-    rbase = TCG_REG_R3;
-#else  /* !CONFIG_SOFTMMU */
-    rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
-    if (TARGET_LONG_BITS == 32) {
-        tcg_out_ext32u(s, TCG_REG_R2, addr_reg);
-        addr_reg = TCG_REG_R2;
-    }
-#endif
+    TCGLabel *l = &s->labels[label_index];
 
-    insn = qemu_stx_opc[opc];
-    if (!HAVE_ISA_2_06 && insn == STDBRX) {
-        tcg_out32(s, STWBRX | SAB(data_reg, rbase, addr_reg));
-        tcg_out32(s, ADDI | TAI(TCG_REG_R2, addr_reg, 4));
-        tcg_out_shri64(s, TCG_REG_R0, data_reg, 32);
-        tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_R2));
+    if (l->has_value) {
+        tcg_out32(s, bc | reloc_pc14_val(s->code_ptr, l->u.value_ptr));
     } else {
-        tcg_out32(s, insn | SAB(data_reg, rbase, addr_reg));
+        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, label_index, 0);
+        tcg_out_bc_noaddr(s, bc);
     }
-
-#ifdef CONFIG_SOFTMMU
-    add_qemu_ldst_label(s, false, opc, data_reg, addr_reg, mem_index,
-                        s->code_ptr, label_ptr);
-#endif
 }
 
-#define FRAME_SIZE ((int) \
-    ((8                     /* back chain */              \
-      + 8                   /* CR */                      \
-      + 8                   /* LR */                      \
-      + 8                   /* compiler doubleword */     \
-      + 8                   /* link editor doubleword */  \
-      + 8                   /* TOC save area */           \
-      + TCG_STATIC_CALL_ARGS_SIZE                         \
-      + CPU_TEMP_BUF_NLONGS * sizeof(long)                \
-      + ARRAY_SIZE(tcg_target_callee_save_regs) * 8       \
-      + 15) & ~15))
-
-#define REG_SAVE_BOT (FRAME_SIZE - ARRAY_SIZE(tcg_target_callee_save_regs) * 8)
-
-static void tcg_target_qemu_prologue(TCGContext *s)
+static void tcg_out_brcond(TCGContext *s, TCGCond cond,
+                           TCGArg arg1, TCGArg arg2, int const_arg2,
+                           int label_index, TCGType type)
 {
-    int i;
+    tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
+    tcg_out_bc(s, tcg_to_bc[cond], label_index);
+}
 
-    tcg_set_frame(s, TCG_REG_CALL_STACK,
-                  REG_SAVE_BOT - CPU_TEMP_BUF_NLONGS * sizeof(long),
-                  CPU_TEMP_BUF_NLONGS * sizeof(long));
-
-#ifndef __APPLE__
-    /* First emit adhoc function descriptor */
-    tcg_out64(s, (uint64_t)s->code_ptr + 24); /* entry point */
-    tcg_out64(s, 0);                          /* toc */
-    tcg_out64(s, 0);                          /* environment pointer */
-#endif
-
-    /* Prologue */
-    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
-    tcg_out32(s, STDU | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
-    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
-        tcg_out32(s, STD | SAI(tcg_target_callee_save_regs[i], 1, 
-                               REG_SAVE_BOT + i * 8));
-    }
-    tcg_out32(s, STD | SAI(TCG_REG_R0, TCG_REG_R1, FRAME_SIZE + 16));
-
-#ifdef CONFIG_USE_GUEST_BASE
-    if (GUEST_BASE) {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
-        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
+                            TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
+                            TCGArg v2, bool const_c2)
+{
+    /* If for some reason both inputs are zero, don't produce bad code.  */
+    if (v1 == 0 && v2 == 0) {
+        tcg_out_movi(s, type, dest, 0);
+        return;
     }
-#endif
 
-    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
-    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
-    tcg_out32(s, BCCTR | BO_ALWAYS);
+    tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
 
-    /* Epilogue */
-    tb_ret_addr = s->code_ptr;
+    if (HAVE_ISEL) {
+        int isel = tcg_to_isel[cond];
 
-    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
-        tcg_out32(s, LD | TAI(tcg_target_callee_save_regs[i], TCG_REG_R1,
-                              REG_SAVE_BOT + i * 8));
+        /* Swap the V operands if the operation indicates inversion.  */
+        if (isel & 1) {
+            int t = v1;
+            v1 = v2;
+            v2 = t;
+            isel &= ~1;
+        }
+        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
+        if (v2 == 0) {
+            tcg_out_movi(s, type, TCG_REG_R0, 0);
+        }
+        tcg_out32(s, isel | TAB(dest, v1, v2));
+    } else {
+        if (dest == v2) {
+            cond = tcg_invert_cond(cond);
+            v2 = v1;
+        } else if (dest != v1) {
+            if (v1 == 0) {
+                tcg_out_movi(s, type, dest, 0);
+            } else {
+                tcg_out_mov(s, type, dest, v1);
+            }
+        }
+        /* Branch forward over one insn */
+        tcg_out32(s, tcg_to_bc[cond] | 8);
+        if (v2 == 0) {
+            tcg_out_movi(s, type, dest, 0);
+        } else {
+            tcg_out_mov(s, type, dest, v2);
+        }
     }
-    tcg_out32(s, LD | TAI(TCG_REG_R0, TCG_REG_R1, FRAME_SIZE + 16));
-    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
-    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
-    tcg_out32(s, BCLR | BO_ALWAYS);
 }
 
-static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
-                              TCGReg arg1, intptr_t arg2)
+void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
 {
-    int opi, opx;
+    TCGContext s;
 
-    if (type == TCG_TYPE_I32) {
-        opi = LWZ, opx = LWZX;
-    } else {
-        opi = LD, opx = LDX;
-    }
-    tcg_out_mem_long(s, opi, opx, ret, arg1, arg2);
+    s.code_buf = s.code_ptr = (tcg_insn_unit *)jmp_addr;
+    tcg_out_b(&s, 0, (tcg_insn_unit *)addr);
+    flush_icache_range(jmp_addr, jmp_addr + tcg_current_code_size(&s));
 }
 
-static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
-                              TCGReg arg1, intptr_t arg2)
+static void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
 {
-    int opi, opx;
+#ifdef __APPLE__
+    tcg_out_b(s, LK, target);
+#else
+    /* Look through the descriptor.  If the branch is in range, and we
+       don't have to spend too much effort on building the toc.  */
+    void *tgt = ((void **)target)[0];
+    uintptr_t toc = ((uintptr_t *)target)[1];
+    intptr_t diff = tcg_pcrel_diff(s, tgt);
 
-    if (type == TCG_TYPE_I32) {
-        opi = STW, opx = STWX;
+    if (in_range_b(diff) && toc == (uint32_t)toc) {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, toc);
+        tcg_out_b(s, LK, tgt);
     } else {
-        opi = STD, opx = STDX;
+        /* Fold the low bits of the constant into the addresses below.  */
+        intptr_t arg = (intptr_t)target;
+        int ofs = (int16_t)arg;
+
+        if (ofs + 8 < 0x8000) {
+            arg -= ofs;
+        } else {
+            ofs = 0;
+        }
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, arg);
+        tcg_out32(s, LD | TAI(TCG_REG_R0, TCG_REG_R2, ofs));
+        tcg_out32(s, MTSPR | RA(TCG_REG_R0) | CTR);
+        tcg_out32(s, LD | TAI(TCG_REG_R2, TCG_REG_R2, ofs + 8));
+        tcg_out32(s, BCCTR | BO_ALWAYS | LK);
     }
-    tcg_out_mem_long(s, opi, opx, arg, arg1, arg2);
+#endif
 }
 
-static void tcg_out_cmp(TCGContext *s, int cond, TCGArg arg1, TCGArg arg2,
-                        int const_arg2, int cr, TCGType type)
-{
-    int imm;
-    uint32_t op;
+static const uint32_t qemu_ldx_opc[16] = {
+    [MO_UB] = LBZX,
+    [MO_UW] = LHZX,
+    [MO_UL] = LWZX,
+    [MO_Q]  = LDX,
+    [MO_SW] = LHAX,
+    [MO_SL] = LWAX,
+    [MO_BSWAP | MO_UB] = LBZX,
+    [MO_BSWAP | MO_UW] = LHBRX,
+    [MO_BSWAP | MO_UL] = LWBRX,
+    [MO_BSWAP | MO_Q]  = LDBRX,
+};
 
-    /* Simplify the comparisons below wrt CMPI.  */
-    if (type == TCG_TYPE_I32) {
-        arg2 = (int32_t)arg2;
-    }
+static const uint32_t qemu_stx_opc[16] = {
+    [MO_UB] = STBX,
+    [MO_UW] = STHX,
+    [MO_UL] = STWX,
+    [MO_Q]  = STDX,
+    [MO_BSWAP | MO_UB] = STBX,
+    [MO_BSWAP | MO_UW] = STHBRX,
+    [MO_BSWAP | MO_UL] = STWBRX,
+    [MO_BSWAP | MO_Q]  = STDBRX,
+};
 
-    switch (cond) {
-    case TCG_COND_EQ:
-    case TCG_COND_NE:
-        if (const_arg2) {
-            if ((int16_t) arg2 == arg2) {
-                op = CMPI;
-                imm = 1;
-                break;
-            } else if ((uint16_t) arg2 == arg2) {
-                op = CMPLI;
-                imm = 1;
-                break;
-            }
-        }
-        op = CMPL;
-        imm = 0;
-        break;
+static const uint32_t qemu_exts_opc[4] = {
+    EXTSB, EXTSH, EXTSW, 0
+};
 
-    case TCG_COND_LT:
-    case TCG_COND_GE:
-    case TCG_COND_LE:
-    case TCG_COND_GT:
-        if (const_arg2) {
-            if ((int16_t) arg2 == arg2) {
-                op = CMPI;
-                imm = 1;
-                break;
-            }
-        }
-        op = CMP;
-        imm = 0;
-        break;
+#if defined (CONFIG_SOFTMMU)
+/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
+ *                                 int mmu_idx, uintptr_t ra)
+ */
+static void * const qemu_ld_helpers[16] = {
+    [MO_UB]   = helper_ret_ldub_mmu,
+    [MO_LEUW] = helper_le_lduw_mmu,
+    [MO_LEUL] = helper_le_ldul_mmu,
+    [MO_LEQ]  = helper_le_ldq_mmu,
+    [MO_BEUW] = helper_be_lduw_mmu,
+    [MO_BEUL] = helper_be_ldul_mmu,
+    [MO_BEQ]  = helper_be_ldq_mmu,
+};
 
-    case TCG_COND_LTU:
-    case TCG_COND_GEU:
-    case TCG_COND_LEU:
-    case TCG_COND_GTU:
-        if (const_arg2) {
-            if ((uint16_t) arg2 == arg2) {
-                op = CMPLI;
-                imm = 1;
-                break;
-            }
-        }
-        op = CMPL;
-        imm = 0;
-        break;
+/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
+ *                                 uintxx_t val, int mmu_idx, uintptr_t ra)
+ */
+static void * const qemu_st_helpers[16] = {
+    [MO_UB]   = helper_ret_stb_mmu,
+    [MO_LEUW] = helper_le_stw_mmu,
+    [MO_LEUL] = helper_le_stl_mmu,
+    [MO_LEQ]  = helper_le_stq_mmu,
+    [MO_BEUW] = helper_be_stw_mmu,
+    [MO_BEUL] = helper_be_stl_mmu,
+    [MO_BEQ]  = helper_be_stq_mmu,
+};
 
-    default:
-        tcg_abort();
+/* Perform the TLB load and compare.  Places the result of the comparison
+   in CR7, loads the addend of the TLB into R3, and returns the register
+   containing the guest address (zero-extended into R4).  Clobbers R0 and R2. 
*/
+
+static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits, TCGReg addr_reg,
+                               int mem_index, bool is_read)
+{
+    int cmp_off
+        = (is_read
+           ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
+           : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
+    int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
+    TCGReg base = TCG_AREG0;
+
+    /* Extract the page index, shifted into place for tlb index.  */
+    if (TARGET_LONG_BITS == 32) {
+        /* Zero-extend the address into a place helpful for further use.  */
+        tcg_out_ext32u(s, TCG_REG_R4, addr_reg);
+        addr_reg = TCG_REG_R4;
+    } else {
+        tcg_out_rld(s, RLDICL, TCG_REG_R3, addr_reg,
+                    64 - TARGET_PAGE_BITS, 64 - CPU_TLB_BITS);
     }
-    op |= BF(cr) | ((type == TCG_TYPE_I64) << 21);
 
-    if (imm) {
-        tcg_out32(s, op | RA(arg1) | (arg2 & 0xffff));
+    /* Compensate for very large offsets.  */
+    if (add_off >= 0x8000) {
+        /* Most target env are smaller than 32k; none are larger than 64k.
+           Simplify the logic here merely to offset by 0x7ff0, giving us a
+           range just shy of 64k.  Check this assumption.  */
+        QEMU_BUILD_BUG_ON(offsetof(CPUArchState,
+                                   tlb_table[NB_MMU_MODES - 1][1])
+                          > 0x7ff0 + 0x7fff);
+        tcg_out32(s, ADDI | TAI(TCG_REG_R2, base, 0x7ff0));
+        base = TCG_REG_R2;
+        cmp_off -= 0x7ff0;
+        add_off -= 0x7ff0;
+    }
+
+    /* Extraction and shifting, part 2.  */
+    if (TARGET_LONG_BITS == 32) {
+        tcg_out_rlw(s, RLWINM, TCG_REG_R3, addr_reg,
+                    32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS),
+                    32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS),
+                    31 - CPU_TLB_ENTRY_BITS);
     } else {
-        if (const_arg2) {
-            tcg_out_movi(s, type, TCG_REG_R0, arg2);
-            arg2 = TCG_REG_R0;
-        }
-        tcg_out32(s, op | RA(arg1) | RB(arg2));
+        tcg_out_shli64(s, TCG_REG_R3, TCG_REG_R3, CPU_TLB_ENTRY_BITS);
     }
-}
 
-static void tcg_out_setcond_eq0(TCGContext *s, TCGType type,
-                                TCGReg dst, TCGReg src)
-{
-    tcg_out32(s, (type == TCG_TYPE_I64 ? CNTLZD : CNTLZW) | RS(src) | RA(dst));
-    tcg_out_shri64(s, dst, dst, type == TCG_TYPE_I64 ? 6 : 5);
-}
+    tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, base));
 
-static void tcg_out_setcond_ne0(TCGContext *s, TCGReg dst, TCGReg src)
-{
-    /* X != 0 implies X + -1 generates a carry.  Extra addition
-       trickery means: R = X-1 + ~X + C = X-1 + (-X+1) + C = C.  */
-    if (dst != src) {
-        tcg_out32(s, ADDIC | TAI(dst, src, -1));
-        tcg_out32(s, SUBFE | TAB(dst, dst, src));
+    /* Load the tlb comparator.  */
+    tcg_out32(s, LD_ADDR | TAI(TCG_REG_R2, TCG_REG_R3, cmp_off));
+
+    /* Load the TLB addend for use on the fast path.  Do this asap
+       to minimize any load use delay.  */
+    tcg_out32(s, LD | TAI(TCG_REG_R3, TCG_REG_R3, add_off));
+
+    /* Clear the non-page, non-alignment bits from the address.  */
+    if (TARGET_LONG_BITS == 32) {
+        tcg_out_rlw(s, RLWINM, TCG_REG_R0, addr_reg, 0,
+                    (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS);
+    } else if (!s_bits) {
+        tcg_out_rld(s, RLDICR, TCG_REG_R0, addr_reg, 0, 63 - TARGET_PAGE_BITS);
     } else {
-        tcg_out32(s, ADDIC | TAI(TCG_REG_R0, src, -1));
-        tcg_out32(s, SUBFE | TAB(dst, TCG_REG_R0, src));
+        tcg_out_rld(s, RLDICL, TCG_REG_R0, addr_reg,
+                    64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits);
+        tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
     }
+
+    tcg_out32(s, CMP | BF(7) | RA(TCG_REG_R0) | RB(TCG_REG_R2) | CMP_L);
+
+    return addr_reg;
 }
 
-static TCGReg tcg_gen_setcond_xor(TCGContext *s, TCGReg arg1, TCGArg arg2,
-                                  bool const_arg2)
+/* Record the context of a call to the out of line helper code for the slow
+   path for a load or store, so that we can later generate the correct
+   helper code.  */
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc,
+                                int data_reg, int addr_reg, int mem_index,
+                                tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
 {
-    if (const_arg2) {
-        if ((uint32_t)arg2 == arg2) {
-            tcg_out_xori32(s, TCG_REG_R0, arg1, arg2);
-        } else {
-            tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R0, arg2);
-            tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, TCG_REG_R0));
-        }
-    } else {
-        tcg_out32(s, XOR | SAB(arg1, TCG_REG_R0, arg2));
-    }
-    return TCG_REG_R0;
+    TCGLabelQemuLdst *label = new_ldst_label(s);
+
+    label->is_ld = is_ld;
+    label->opc = opc;
+    label->datalo_reg = data_reg;
+    label->addrlo_reg = addr_reg;
+    label->mem_index = mem_index;
+    label->raddr = raddr;
+    label->label_ptr[0] = label_ptr;
 }
 
-static void tcg_out_setcond(TCGContext *s, TCGType type, TCGCond cond,
-                            TCGArg arg0, TCGArg arg1, TCGArg arg2,
-                            int const_arg2)
+static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
-    int crop, sh;
+    TCGMemOp opc = lb->opc;
 
-    /* Ignore high bits of a potential constant arg2.  */
-    if (type == TCG_TYPE_I32) {
-        arg2 = (uint32_t)arg2;
-    }
+    reloc_pc14(lb->label_ptr[0], s->code_ptr);
 
-    /* Handle common and trivial cases before handling anything else.  */
-    if (arg2 == 0) {
-        switch (cond) {
-        case TCG_COND_EQ:
-            tcg_out_setcond_eq0(s, type, arg0, arg1);
-            return;
-        case TCG_COND_NE:
-            if (type == TCG_TYPE_I32) {
-                tcg_out_ext32u(s, TCG_REG_R0, arg1);
-                arg1 = TCG_REG_R0;
-            }
-            tcg_out_setcond_ne0(s, arg0, arg1);
-            return;
-        case TCG_COND_GE:
-            tcg_out32(s, NOR | SAB(arg1, arg0, arg1));
-            arg1 = arg0;
-            /* FALLTHRU */
-        case TCG_COND_LT:
-            /* Extract the sign bit.  */
-            tcg_out_rld(s, RLDICL, arg0, arg1,
-                        type == TCG_TYPE_I64 ? 1 : 33, 63);
-            return;
-        default:
-            break;
-        }
-    }
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0);
 
-    /* If we have ISEL, we can implement everything with 3 or 4 insns.
-       All other cases below are also at least 3 insns, so speed up the
-       code generator by not considering them and always using ISEL.  */
-    if (HAVE_ISEL) {
-        int isel, tab;
+    /* If the address needed to be zero-extended, we'll have already
+       placed it in R4.  The only remaining case is 64-bit guest.  */
+    tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, lb->addrlo_reg);
 
-        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, lb->mem_index);
+    tcg_out32(s, MFSPR | RT(TCG_REG_R6) | LR);
 
-        isel = tcg_to_isel[cond];
+    tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]);
 
-        tcg_out_movi(s, type, arg0, 1);
-        if (isel & 1) {
-            /* arg0 = (bc ? 0 : 1) */
-            tab = TAB(arg0, 0, arg0);
-            isel &= ~1;
-        } else {
-            /* arg0 = (bc ? 1 : 0) */
-            tcg_out_movi(s, type, TCG_REG_R0, 0);
-            tab = TAB(arg0, arg0, TCG_REG_R0);
-        }
-        tcg_out32(s, isel | tab);
-        return;
+    if (opc & MO_SIGN) {
+        uint32_t insn = qemu_exts_opc[opc & MO_SIZE];
+        tcg_out32(s, insn | RA(lb->datalo_reg) | RS(TCG_REG_R3));
+    } else {
+        tcg_out_mov(s, TCG_TYPE_I64, lb->datalo_reg, TCG_REG_R3);
     }
 
-    switch (cond) {
-    case TCG_COND_EQ:
-        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
-        tcg_out_setcond_eq0(s, type, arg0, arg1);
-        return;
+    tcg_out_b(s, 0, lb->raddr);
+}
 
-    case TCG_COND_NE:
-        arg1 = tcg_gen_setcond_xor(s, arg1, arg2, const_arg2);
-        /* Discard the high bits only once, rather than both inputs.  */
-        if (type == TCG_TYPE_I32) {
-            tcg_out_ext32u(s, TCG_REG_R0, arg1);
-            arg1 = TCG_REG_R0;
-        }
-        tcg_out_setcond_ne0(s, arg0, arg1);
-        return;
+static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+    TCGMemOp opc = lb->opc;
+    TCGMemOp s_bits = opc & MO_SIZE;
 
-    case TCG_COND_GT:
-    case TCG_COND_GTU:
-        sh = 30;
-        crop = 0;
-        goto crtest;
+    reloc_pc14(lb->label_ptr[0], s->code_ptr);
 
-    case TCG_COND_LT:
-    case TCG_COND_LTU:
-        sh = 29;
-        crop = 0;
-        goto crtest;
+    tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, TCG_AREG0);
 
-    case TCG_COND_GE:
-    case TCG_COND_GEU:
-        sh = 31;
-        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_LT) | BB(7, CR_LT);
-        goto crtest;
+    /* If the address needed to be zero-extended, we'll have already
+       placed it in R4.  The only remaining case is 64-bit guest.  */
+    tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, lb->addrlo_reg);
 
-    case TCG_COND_LE:
-    case TCG_COND_LEU:
-        sh = 31;
-        crop = CRNOR | BT(7, CR_EQ) | BA(7, CR_GT) | BB(7, CR_GT);
-    crtest:
-        tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
-        if (crop) {
-            tcg_out32(s, crop);
-        }
-        tcg_out32(s, MFOCRF | RT(TCG_REG_R0) | FXM(7));
-        tcg_out_rlw(s, RLWINM, arg0, TCG_REG_R0, sh, 31, 31);
-        break;
+    tcg_out_rld(s, RLDICL, TCG_REG_R5, lb->datalo_reg,
+                0, 64 - (1 << (3 + s_bits)));
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R6, lb->mem_index);
+    tcg_out32(s, MFSPR | RT(TCG_REG_R7) | LR);
 
-    default:
-        tcg_abort();
-    }
+    tcg_out_call(s, qemu_st_helpers[opc]);
+
+    tcg_out_b(s, 0, lb->raddr);
 }
+#endif /* SOFTMMU */
 
-static void tcg_out_bc(TCGContext *s, int bc, int label_index)
+static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
+                            TCGMemOp opc, int mem_index)
 {
-    TCGLabel *l = &s->labels[label_index];
+    TCGReg rbase;
+    uint32_t insn;
+    TCGMemOp s_bits = opc & MO_SIZE;
+#ifdef CONFIG_SOFTMMU
+    tcg_insn_unit *label_ptr;
+#endif
 
-    if (l->has_value) {
-        tcg_out32(s, bc | reloc_pc14_val(s->code_ptr, l->u.value_ptr));
+#ifdef CONFIG_SOFTMMU
+    addr_reg = tcg_out_tlb_read(s, s_bits, addr_reg, mem_index, true);
+
+    /* Load a pointer into the current opcode w/conditional branch-link. */
+    label_ptr = s->code_ptr;
+    tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
+
+    rbase = TCG_REG_R3;
+#else  /* !CONFIG_SOFTMMU */
+    rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
+    if (TARGET_LONG_BITS == 32) {
+        tcg_out_ext32u(s, TCG_REG_R2, addr_reg);
+        addr_reg = TCG_REG_R2;
+    }
+#endif
+
+    insn = qemu_ldx_opc[opc];
+    if (!HAVE_ISA_2_06 && insn == LDBRX) {
+        tcg_out32(s, ADDI | TAI(TCG_REG_R0, addr_reg, 4));
+        tcg_out32(s, LWBRX | TAB(data_reg, rbase, addr_reg));
+        tcg_out32(s, LWBRX | TAB(TCG_REG_R0, rbase, TCG_REG_R0));
+        tcg_out_rld(s, RLDIMI, data_reg, TCG_REG_R0, 32, 0);
+    } else if (insn) {
+        tcg_out32(s, insn | TAB(data_reg, rbase, addr_reg));
     } else {
-        tcg_out_reloc(s, s->code_ptr, R_PPC_REL14, label_index, 0);
-        tcg_out_bc_noaddr(s, bc);
+        insn = qemu_ldx_opc[opc & (MO_SIZE | MO_BSWAP)];
+        tcg_out32(s, insn | TAB(data_reg, rbase, addr_reg));
+        insn = qemu_exts_opc[s_bits];
+        tcg_out32(s, insn | RA(data_reg) | RS(data_reg));
     }
-}
 
-static void tcg_out_brcond(TCGContext *s, TCGCond cond,
-                           TCGArg arg1, TCGArg arg2, int const_arg2,
-                           int label_index, TCGType type)
-{
-    tcg_out_cmp(s, cond, arg1, arg2, const_arg2, 7, type);
-    tcg_out_bc(s, tcg_to_bc[cond], label_index);
+#ifdef CONFIG_SOFTMMU
+    add_qemu_ldst_label(s, true, opc, data_reg, addr_reg, mem_index,
+                        s->code_ptr, label_ptr);
+#endif
 }
 
-static void tcg_out_movcond(TCGContext *s, TCGType type, TCGCond cond,
-                            TCGArg dest, TCGArg c1, TCGArg c2, TCGArg v1,
-                            TCGArg v2, bool const_c2)
+static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
+                            TCGMemOp opc, int mem_index)
 {
-    /* If for some reason both inputs are zero, don't produce bad code.  */
-    if (v1 == 0 && v2 == 0) {
-        tcg_out_movi(s, type, dest, 0);
-        return;
-    }
+    TCGReg rbase;
+    uint32_t insn;
+#ifdef CONFIG_SOFTMMU
+    tcg_insn_unit *label_ptr;
+#endif
 
-    tcg_out_cmp(s, cond, c1, c2, const_c2, 7, type);
+#ifdef CONFIG_SOFTMMU
+    addr_reg = tcg_out_tlb_read(s, opc & MO_SIZE, addr_reg, mem_index, false);
 
-    if (HAVE_ISEL) {
-        int isel = tcg_to_isel[cond];
+    /* Load a pointer into the current opcode w/conditional branch-link. */
+    label_ptr = s->code_ptr;
+    tcg_out_bc_noaddr(s, BC | BI(7, CR_EQ) | BO_COND_FALSE | LK);
 
-        /* Swap the V operands if the operation indicates inversion.  */
-        if (isel & 1) {
-            int t = v1;
-            v1 = v2;
-            v2 = t;
-            isel &= ~1;
-        }
-        /* V1 == 0 is handled by isel; V2 == 0 must be handled by hand.  */
-        if (v2 == 0) {
-            tcg_out_movi(s, type, TCG_REG_R0, 0);
-        }
-        tcg_out32(s, isel | TAB(dest, v1, v2));
+    rbase = TCG_REG_R3;
+#else  /* !CONFIG_SOFTMMU */
+    rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
+    if (TARGET_LONG_BITS == 32) {
+        tcg_out_ext32u(s, TCG_REG_R2, addr_reg);
+        addr_reg = TCG_REG_R2;
+    }
+#endif
+
+    insn = qemu_stx_opc[opc];
+    if (!HAVE_ISA_2_06 && insn == STDBRX) {
+        tcg_out32(s, STWBRX | SAB(data_reg, rbase, addr_reg));
+        tcg_out32(s, ADDI | TAI(TCG_REG_R2, addr_reg, 4));
+        tcg_out_shri64(s, TCG_REG_R0, data_reg, 32);
+        tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, TCG_REG_R2));
     } else {
-        if (dest == v2) {
-            cond = tcg_invert_cond(cond);
-            v2 = v1;
-        } else if (dest != v1) {
-            if (v1 == 0) {
-                tcg_out_movi(s, type, dest, 0);
-            } else {
-                tcg_out_mov(s, type, dest, v1);
-            }
-        }
-        /* Branch forward over one insn */
-        tcg_out32(s, tcg_to_bc[cond] | 8);
-        if (v2 == 0) {
-            tcg_out_movi(s, type, dest, 0);
-        } else {
-            tcg_out_mov(s, type, dest, v2);
-        }
+        tcg_out32(s, insn | SAB(data_reg, rbase, addr_reg));
     }
+
+#ifdef CONFIG_SOFTMMU
+    add_qemu_ldst_label(s, false, opc, data_reg, addr_reg, mem_index,
+                        s->code_ptr, label_ptr);
+#endif
 }
 
-void ppc_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
+#define FRAME_SIZE ((int) \
+    ((8                     /* back chain */              \
+      + 8                   /* CR */                      \
+      + 8                   /* LR */                      \
+      + 8                   /* compiler doubleword */     \
+      + 8                   /* link editor doubleword */  \
+      + 8                   /* TOC save area */           \
+      + TCG_STATIC_CALL_ARGS_SIZE                         \
+      + CPU_TEMP_BUF_NLONGS * sizeof(long)                \
+      + ARRAY_SIZE(tcg_target_callee_save_regs) * 8       \
+      + 15) & ~15))
+
+#define REG_SAVE_BOT (FRAME_SIZE - ARRAY_SIZE(tcg_target_callee_save_regs) * 8)
+
+static void tcg_target_qemu_prologue(TCGContext *s)
 {
-    TCGContext s;
+    int i;
 
-    s.code_buf = s.code_ptr = (tcg_insn_unit *)jmp_addr;
-    tcg_out_b(&s, 0, (tcg_insn_unit *)addr);
-    flush_icache_range(jmp_addr, jmp_addr + tcg_current_code_size(&s));
+    tcg_set_frame(s, TCG_REG_CALL_STACK,
+                  REG_SAVE_BOT - CPU_TEMP_BUF_NLONGS * sizeof(long),
+                  CPU_TEMP_BUF_NLONGS * sizeof(long));
+
+#ifndef __APPLE__
+    /* First emit adhoc function descriptor */
+    tcg_out64(s, (uint64_t)s->code_ptr + 24); /* entry point */
+    tcg_out64(s, 0);                          /* toc */
+    tcg_out64(s, 0);                          /* environment pointer */
+#endif
+
+    /* Prologue */
+    tcg_out32(s, MFSPR | RT(TCG_REG_R0) | LR);
+    tcg_out32(s, STDU | SAI(TCG_REG_R1, TCG_REG_R1, -FRAME_SIZE));
+    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
+        tcg_out32(s, STD | SAI(tcg_target_callee_save_regs[i], 1, 
+                               REG_SAVE_BOT + i * 8));
+    }
+    tcg_out32(s, STD | SAI(TCG_REG_R0, TCG_REG_R1, FRAME_SIZE + 16));
+
+#ifdef CONFIG_USE_GUEST_BASE
+    if (GUEST_BASE) {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
+        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+    }
+#endif
+
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+    tcg_out32(s, MTSPR | RS(tcg_target_call_iarg_regs[1]) | CTR);
+    tcg_out32(s, BCCTR | BO_ALWAYS);
+
+    /* Epilogue */
+    tb_ret_addr = s->code_ptr;
+
+    for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); ++i) {
+        tcg_out32(s, LD | TAI(tcg_target_callee_save_regs[i], TCG_REG_R1,
+                              REG_SAVE_BOT + i * 8));
+    }
+    tcg_out32(s, LD | TAI(TCG_REG_R0, TCG_REG_R1, FRAME_SIZE + 16));
+    tcg_out32(s, MTSPR | RS(TCG_REG_R0) | LR);
+    tcg_out32(s, ADDI | TAI(TCG_REG_R1, TCG_REG_R1, FRAME_SIZE));
+    tcg_out32(s, BCLR | BO_ALWAYS);
 }
 
 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
-- 
1.9.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]