qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 3/3] tci: Rewrite from scratch


From: Richard Henderson
Subject: [Qemu-devel] [PATCH 3/3] tci: Rewrite from scratch
Date: Tue, 13 May 2014 14:22:05 -0700

Use aligned 32-bit insn units, so that we have no unaligned accesses
when interpreting the bytecode stream.  Encode bytecodes more efficiently.
Use libffi for invoking the handlers.  Provide a meaningful disassembler.

Signed-off-by: Richard Henderson <address@hidden>
---
 disas/tci.c             |  415 ++++++++++-
 include/exec/cpu-all.h  |    6 +-
 include/exec/exec-all.h |    5 +-
 include/qemu/tci.h      |  171 +++++
 tcg/tcg.c               |   54 +-
 tcg/tci/README          |  113 ++-
 tcg/tci/tcg-target.c    | 1303 +++++++++++++++--------------------
 tcg/tci/tcg-target.h    |  156 ++---
 tci.c                   | 1752 ++++++++++++++++-------------------------------
 translate-all.c         |    5 +-
 10 files changed, 1852 insertions(+), 2128 deletions(-)
 create mode 100644 include/qemu/tci.h

diff --git a/disas/tci.c b/disas/tci.c
index a606b63..8b50b03 100644
--- a/disas/tci.c
+++ b/disas/tci.c
@@ -19,41 +19,400 @@
 
 #include "disas/bfd.h"
 #include "tcg/tcg.h"
+#include "qemu/tci.h"
 
-/* Disassemble TCI bytecode. */
-int print_insn_tci(bfd_vma addr, disassemble_info *info)
+
+#define O(OPC)  [TCI_##OPC] = #OPC
+
+static const char * const opcode_name[1 << LEN_OP] = {
+    O(invalid),
+    O(add),
+    O(sub),
+    O(mul),
+    O(divu),
+    O(remu),
+    O(divs),
+    O(rems),
+    O(and),
+    O(ior),
+    O(xor),
+    O(andc),
+    O(iorc),
+    O(xorc),
+    O(nand),
+    O(nior),
+    O(shl),
+    O(shr4),
+    O(sar4),
+    O(rol4),
+    O(ror4),
+    O(shr8),
+    O(sar8),
+    O(rol8),
+    O(ror8),
+    O(cmp4eq),
+    O(cmp4ne),
+    O(cmp4lt),
+    O(cmp4le),
+    O(cmp4gt),
+    O(cmp4ge),
+    O(cmp4ltu),
+    O(cmp4leu),
+    O(cmp4gtu),
+    O(cmp4geu),
+    O(cmp8eq),
+    O(cmp8ne),
+    O(cmp8lt),
+    O(cmp8le),
+    O(cmp8gt),
+    O(cmp8ge),
+    O(cmp8ltu),
+    O(cmp8leu),
+    O(cmp8gtu),
+    O(cmp8geu),
+    O(qst1),
+    O(qst2_le),
+    O(qst2_be),
+    O(qst4_le),
+    O(qst4_be),
+    O(qst8_le),
+    O(qst8_be),
+    O(movc),
+    O(deposit),
+    O(concat4),
+    O(sxt1),
+    O(sxt2),
+    O(sxt4),
+    O(zxt1),
+    O(zxt2),
+    O(zxt4),
+    O(bswap2),
+    O(bswap4),
+    O(bswap8),
+    O(qld1u),
+    O(qld1s),
+    O(qld2u_le),
+    O(qld2u_be),
+    O(qld2s_le),
+    O(qld2s_be),
+    O(qld4u_le),
+    O(qld4u_be),
+    O(qld4s_le),
+    O(qld4s_be),
+    O(qld8_le),
+    O(qld8_be),
+    O(b),
+    O(bc),
+    O(exit),
+    O(call0),
+    O(call4),
+    O(call8),
+    O(setc),
+    O(st1),
+    O(st2),
+    O(st4),
+    O(st8),
+    O(ld1u),
+    O(ld1s),
+    O(ld2u),
+    O(ld2s),
+    O(ld4u),
+    O(ld4s),
+    O(ld8),
+    O(cmppeq),
+    O(cmppne),
+    O(cmpplt),
+    O(cmpple),
+    O(cmppgt),
+    O(cmppge),
+    O(cmppltu),
+    O(cmppleu),
+    O(cmppgtu),
+    O(cmppgeu),
+    O(add2),
+    O(sub2),
+    O(mulu2),
+    O(muls2),
+};
+
+#undef O
+
+static char const reg_names[8][4] = {
+    "a",  "b",  "c",  "d", "e",  "f", "vp", "sp",
+};
+
+static inline const char *get_r(int n)
 {
-    int length;
-    uint8_t byte;
-    int status;
-    TCGOpcode op;
-
-    status = info->read_memory_func(addr, &byte, 1, info);
-    if (status != 0) {
-        info->memory_error_func(status, addr, info);
-        return -1;
+    assert(n < 8);
+    return &reg_names[n][0];
+}
+
+static inline uint64_t concat4(uint32_t xv, uint32_t yv)
+{
+    return (uint64_t)xv << 32 | yv;
+}
+
+static inline void *concatp(int32_t xv, int32_t yv)
+{
+    if (sizeof(void *) == 8) {
+        return (void *)(uintptr_t)concat4(xv, yv);
     }
-    op = byte;
+    return (void *)(uintptr_t)yv;
+}
+
+/* Disassemble TCI bytecode. */
+int print_insn_tci(bfd_vma baddr, disassemble_info *info)
+{
+    /* These are host address.  Dispense with the indirection.  */
+    uint32_t *addr = (uint32_t *)(uintptr_t)baddr;
+    uint32_t *orig_addr = addr;
+    char buf1[16], buf2[16];
+    const char *name, *r, *w, *x, *y;
+    int ri, wi, xi, yi;
+    int32_t xv, yv;
+    uint32_t insn;
+    TCIOp opc;
+
+    insn = *addr++;
+
+    opc = extract32(insn, POS_OP, LEN_OP);
+    ri  = extract32(insn, POS_R, LEN_R);
+    wi  = extract32(insn, POS_W, LEN_W);
+    xi  = extract32(insn, POS_X, LEN_X);
+    yi  = extract32(insn, POS_Y, LEN_Y);
+
+    name = opcode_name[opc];
+    r = reg_names[ri];
+    w = reg_names[wi];
 
-    addr++;
-    status = info->read_memory_func(addr, &byte, 1, info);
-    if (status != 0) {
-        info->memory_error_func(status, addr, info);
-        return -1;
+    if (xi < 8) {
+        x = reg_names[xi];
+        xv = 0xdeadbeef;
+    } else {
+        if (xi == 8) {
+            xv = (int32_t)*addr++;
+        } else {
+            xv = xi - BIAS_X;
+        }
+        snprintf(buf1, sizeof(buf1), "%d", xv);
+        x = buf1;
     }
-    length = byte;
 
-    if (op >= tcg_op_defs_max) {
-        info->fprintf_func(info->stream, "illegal opcode %d", op);
+    if (yi < 8) {
+        y = reg_names[yi];
+        yv = 0xdeadbeef;
     } else {
-        const TCGOpDef *def = &tcg_op_defs[op];
-        int nb_oargs = def->nb_oargs;
-        int nb_iargs = def->nb_iargs;
-        int nb_cargs = def->nb_cargs;
-        /* TODO: Improve disassembler output. */
-        info->fprintf_func(info->stream, "%s\to=%d i=%d c=%d",
-                           def->name, nb_oargs, nb_iargs, nb_cargs);
+        if (yi == 8) {
+            yv = (int32_t)*addr++;
+        } else {
+            yv = yi - BIAS_Y;
+        }
+        snprintf(buf2, sizeof(buf2), "%d", yv);
+        y = buf2;
+    }
+
+    switch (opc) {
+    case TCI_cmp4eq:
+    case TCI_cmp4ne:
+    case TCI_cmp4lt:
+    case TCI_cmp4le:
+    case TCI_cmp4gt:
+    case TCI_cmp4ge:
+    case TCI_cmp4ltu:
+    case TCI_cmp4leu:
+    case TCI_cmp4gtu:
+    case TCI_cmp4geu:
+    case TCI_cmp8eq:
+    case TCI_cmp8ne:
+    case TCI_cmp8lt:
+    case TCI_cmp8le:
+    case TCI_cmp8gt:
+    case TCI_cmp8ge:
+    case TCI_cmp8ltu:
+    case TCI_cmp8leu:
+    case TCI_cmp8gtu:
+    case TCI_cmp8geu:
+        info->fprintf_func(info->stream, "%-10s%s,%s", name, x, y);
+        break;
+
+    case TCI_qst8_le:
+    case TCI_qst8_be:
+        if (TCG_TARGET_REG_BITS == 32) {
+            info->fprintf_func(info->stream, "%-10s[%s,%s]=%s:%s",
+                               name, w, y, r, x);
+            break;
+        }
+        /* FALLTHRU */
+    case TCI_qst1:
+    case TCI_qst2_le:
+    case TCI_qst2_be:
+    case TCI_qst4_le:
+    case TCI_qst4_be:
+        info->fprintf_func(info->stream, "%-10s[%s,%s]=%s", name, w, y, x);
+        break;
+
+    case TCI_qld8_le:
+    case TCI_qld8_be:
+        if (TCG_TARGET_REG_BITS == 32) {
+            info->fprintf_func(info->stream, "%-10s%s:%s=[%s,%s]",
+                               name, w, r, x, y);
+            break;
+        }
+        /* FALLTHRU */
+    case TCI_qld1u:
+    case TCI_qld1s:
+    case TCI_qld2u_le:
+    case TCI_qld2u_be:
+    case TCI_qld2s_le:
+    case TCI_qld2s_be:
+    case TCI_qld4u_le:
+    case TCI_qld4u_be:
+    case TCI_qld4s_le:
+    case TCI_qld4s_be:
+        info->fprintf_func(info->stream, "%-10s%s=[%s,%s]", name, r, x, y);
+        break;
+
+    case TCI_deposit:
+        assert(yi >= 8);
+        info->fprintf_func(info->stream, "%-10s%s=%s,%d,%d,%s",
+                           name, r, w, yv >> 6, yv & 0x3f, x);
+        break;
+
+    case TCI_concat4:
+        if (xi >= 8 && yi >= 8) {
+            /* Special case R = X:Y as MOV.  */
+            info->fprintf_func(info->stream, "%-10s%s=0x%016" PRIx64,
+                               "mov", r, concat4(xv, yv));
+        } else {
+            info->fprintf_func(info->stream, "%-10s%s=%s:%s", name, r, x, y);
+        }
+        break;
+
+    case TCI_ior:
+        if (xi == BIAS_X) {
+            /* Special case R = 0 | Y as MOV.  */
+            info->fprintf_func(info->stream, "%-10s%s=%s", "mov", r, y);
+            break;
+        }
+        /* FALLTHRU */
+    case TCI_add:
+    case TCI_sub:
+    case TCI_mul:
+    case TCI_divu:
+    case TCI_remu:
+    case TCI_divs:
+    case TCI_rems:
+    case TCI_and:
+    case TCI_xor:
+    case TCI_andc:
+    case TCI_iorc:
+    case TCI_xorc:
+    case TCI_nand:
+    case TCI_nior:
+    case TCI_shl:
+    case TCI_shr4:
+    case TCI_sar4:
+    case TCI_rol4:
+    case TCI_ror4:
+    case TCI_shr8:
+    case TCI_sar8:
+    case TCI_rol8:
+    case TCI_ror8:
+    case TCI_movc:
+        info->fprintf_func(info->stream, "%-10s%s=%s,%s", name, r, x, y);
+        break;
+
+    case TCI_sxt1:
+    case TCI_sxt2:
+    case TCI_sxt4:
+    case TCI_zxt1:
+    case TCI_zxt2:
+    case TCI_zxt4:
+    case TCI_bswap2:
+    case TCI_bswap4:
+    case TCI_bswap8:
+        info->fprintf_func(info->stream, "%-10s%s=%s", name, r, y);
+        break;
+
+    case TCI_b:
+        /* Special case B with Y extention word as GOTO.  */
+        if (yi == 8) {
+            name = "goto";
+        }
+        /* FALLTHRU */
+    case TCI_bc:
+        assert(yi >= 8);
+        info->fprintf_func(info->stream, "%-10s%p", name, addr + yv);
+        break;
+
+    case TCI_exit:
+    case TCI_call0:
+        assert(xi >= 8 && yi >= 8);
+        info->fprintf_func(info->stream, "%-10s%p", name, concatp(xv, yv));
+        break;
+    case TCI_call8:
+        assert(xi >= 8 && yi >= 8);
+        if (TCG_TARGET_REG_BITS == 32) {
+            info->fprintf_func(info->stream, "%-10s%s:%s=%p",
+                               name, w, r, concatp(xv, yv));
+            break;
+        }
+        /* FALLTHRU */
+    case TCI_call4:
+        assert(xi >= 8 && yi >= 8);
+        info->fprintf_func(info->stream, "%-10s%s=%p",
+                           name, r, concatp(xv, yv));
+        break;
+
+    case TCI_setc:
+        info->fprintf_func(info->stream, "%-10s%s", name, r);
+        break;
+
+    case TCI_st1:
+    case TCI_st2:
+    case TCI_st4:
+    case TCI_st8:
+        info->fprintf_func(info->stream, "%-10s[%s+%s]=%s", name, w, y, x);
+        break;
+
+    case TCI_ld1u:
+    case TCI_ld1s:
+    case TCI_ld2u:
+    case TCI_ld2s:
+    case TCI_ld4u:
+    case TCI_ld4s:
+    case TCI_ld8:
+        info->fprintf_func(info->stream, "%-10s%s=[%s+%s]", name, r, w, y);
+        break;
+
+    case TCI_cmppeq:
+    case TCI_cmppne:
+    case TCI_cmpplt:
+    case TCI_cmpple:
+    case TCI_cmppgt:
+    case TCI_cmppge:
+    case TCI_cmppltu:
+    case TCI_cmppleu:
+    case TCI_cmppgtu:
+    case TCI_cmppgeu:
+        info->fprintf_func(info->stream, "%-10s%s:%s,%s:%s", name, w, r, x, y);
+        break;
+
+    case TCI_mulu2:
+    case TCI_muls2:
+        info->fprintf_func(info->stream, "%-10s%s:%s=%s,%s", name, w, r, x, y);
+        break;
+
+    case TCI_add2:
+    case TCI_sub2:
+        info->fprintf_func(info->stream, "%-10s%s:%s=%s:%s,%s:%s",
+                           name, w, r, w, r, x, y);
+        break;
+
+    case TCI_invalid:
+    default:
+        info->fprintf_func(info->stream, "illegal opcode %d", opc);
     }
 
-    return length;
+    return (addr - orig_addr) * (sizeof(*addr));
 }
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index fb649a4..424c0b6 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -199,20 +199,20 @@ extern unsigned long reserved_va;
 #endif
 
 /* All direct uses of g2h and h2g need to go away for usermode softmmu.  */
-#define g2h(x) ((void *)((unsigned long)(target_ulong)(x) + GUEST_BASE))
+#define g2h(x) ((void *)(uintptr_t)(x) + GUEST_BASE)
 
 #if HOST_LONG_BITS <= TARGET_VIRT_ADDR_SPACE_BITS
 #define h2g_valid(x) 1
 #else
 #define h2g_valid(x) ({ \
-    unsigned long __guest = (unsigned long)(x) - GUEST_BASE; \
+    uintptr_t __guest = (uintptr_t)(x) - GUEST_BASE; \
     (__guest < (1ul << TARGET_VIRT_ADDR_SPACE_BITS)) && \
     (!RESERVED_VA || (__guest < RESERVED_VA)); \
 })
 #endif
 
 #define h2g_nocheck(x) ({ \
-    unsigned long __ret = (unsigned long)(x) - GUEST_BASE; \
+    uintptr_t __ret = (uintptr_t)(x) - GUEST_BASE; \
     (abi_ulong)__ret; \
 })
 
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 0766e24..d2bc093 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -218,8 +218,9 @@ void tb_phys_invalidate(TranslationBlock *tb, 
tb_page_addr_t page_addr);
 #if defined(CONFIG_TCG_INTERPRETER)
 static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
 {
-    /* patch the branch destination */
-    *(uint32_t *)jmp_addr = addr - (jmp_addr + 4);
+    intptr_t disp = (intptr_t)(addr - jmp_addr - 4) / 4;
+    assert(disp == (int32_t)disp);
+    *(int32_t *)jmp_addr = disp;
     /* no need to flush icache explicitly */
 }
 #elif defined(_ARCH_PPC)
diff --git a/include/qemu/tci.h b/include/qemu/tci.h
new file mode 100644
index 0000000..2fb1ab6
--- /dev/null
+++ b/include/qemu/tci.h
@@ -0,0 +1,171 @@
+#ifndef QEMU_TCI_H
+#define QEMU_TCI_H
+
+typedef enum {
+    TCI_invalid,
+
+    /* Binary opcodes.  */
+
+    TCI_add,
+    TCI_sub,
+    TCI_mul,
+    TCI_divu,
+    TCI_remu,
+    TCI_divs,
+    TCI_rems,
+    TCI_and,
+    TCI_ior,
+    TCI_xor,
+    TCI_andc,
+    TCI_iorc,
+    TCI_xorc,
+    TCI_nand,
+    TCI_nior,
+    TCI_shl,
+    TCI_shr4,
+    TCI_sar4,
+    TCI_rol4,
+    TCI_ror4,
+    TCI_shr8,
+    TCI_sar8,
+    TCI_rol8,
+    TCI_ror8,
+
+    TCI_cmp4eq,
+    TCI_cmp4ne,
+    TCI_cmp4lt,
+    TCI_cmp4le,
+    TCI_cmp4gt,
+    TCI_cmp4ge,
+    TCI_cmp4ltu,
+    TCI_cmp4leu,
+    TCI_cmp4gtu,
+    TCI_cmp4geu,
+
+    TCI_cmp8eq,
+    TCI_cmp8ne,
+    TCI_cmp8lt,
+    TCI_cmp8le,
+    TCI_cmp8gt,
+    TCI_cmp8ge,
+    TCI_cmp8ltu,
+    TCI_cmp8leu,
+    TCI_cmp8gtu,
+    TCI_cmp8geu,
+
+    TCI_movc,
+    TCI_concat4,
+    TCI_LAST_BINARY_OPC = TCI_concat4,
+
+    /* Unary opcodes.  */
+
+    TCI_sxt1,
+    TCI_sxt2,
+    TCI_sxt4,
+    TCI_zxt1,
+    TCI_zxt2,
+    TCI_zxt4,
+    TCI_bswap2,
+    TCI_bswap4,
+    TCI_bswap8,
+    TCI_LAST_UNARY_OPC = TCI_bswap8,
+
+    /* Control flow opcodes.  */
+
+    TCI_b,
+    TCI_bc,
+    TCI_exit,
+    TCI_call0,
+    TCI_call4,
+    TCI_call8,
+
+    /* Qemu load/store operations.  */
+
+    TCI_qst1,
+    TCI_qst2_le,
+    TCI_qst2_be,
+    TCI_qst4_le,
+    TCI_qst4_be,
+    TCI_qst8_le,
+    TCI_qst8_be,
+
+    TCI_qld1u,
+    TCI_qld1s,
+    TCI_qld2u_le,
+    TCI_qld2u_be,
+    TCI_qld2s_le,
+    TCI_qld2s_be,
+    TCI_qld4u_le,
+    TCI_qld4u_be,
+    TCI_qld4s_le,
+    TCI_qld4s_be,
+    TCI_qld8_le,
+    TCI_qld8_be,
+
+    /* Load and store opcodes.  */
+
+    TCI_st1,
+    TCI_st2,
+    TCI_st4,
+    TCI_st8,
+
+    TCI_ld1u,
+    TCI_ld1s,
+    TCI_ld2u,
+    TCI_ld2s,
+    TCI_ld4u,
+    TCI_ld4s,
+    TCI_ld8,
+
+    /* Zero-ary opcode.  */
+
+    TCI_setc,
+
+    /* 3 and 4-operand opcodes.  */
+
+    TCI_cmppeq,
+    TCI_cmppne,
+    TCI_cmpplt,
+    TCI_cmpple,
+    TCI_cmppgt,
+    TCI_cmppge,
+    TCI_cmppltu,
+    TCI_cmppleu,
+    TCI_cmppgtu,
+    TCI_cmppgeu,
+
+    TCI_add2,
+    TCI_sub2,
+    TCI_mulu2,
+    TCI_muls2,
+
+    TCI_deposit,
+
+    TCI_NUM_OPC
+} TCIOp;
+
+#define LEN_Y   14
+#define LEN_X   5
+#define LEN_W   3
+#define LEN_R   3
+#define LEN_OP  7
+
+#define POS_Y   0
+#define POS_X   LEN_Y
+#define POS_W   (POS_X + LEN_X)
+#define POS_R   (POS_W + LEN_W)
+#define POS_OP  (POS_R + LEN_R)
+
+#define BIAS_Y  (1 << (LEN_Y - 1))
+#define BIAS_X  (1 << (LEN_X - 1))
+
+#define MAX_Y   ((1 << LEN_Y) - 1 - BIAS_Y)
+#define MAX_X   ((1 << LEN_X) - 1 - BIAS_X)
+
+#define MIN_Y   (9 - BIAS_Y)
+#define MIN_X   (9 - BIAS_X)
+
+QEMU_BUILD_BUG_ON(POS_OP + LEN_OP != 32);
+QEMU_BUILD_BUG_ON(TCI_NUM_OPC > (1 << LEN_OP));
+
+#endif /* QEMU_TCI_H */
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 11d1996..64af273 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -111,6 +111,15 @@ static int tcg_target_const_match(tcg_target_long val, 
TCGType type,
 static void tcg_out_tb_init(TCGContext *s);
 static void tcg_out_tb_finalize(TCGContext *s);
 
+typedef struct TCGHelperInfo {
+    void *func;
+#ifdef CONFIG_TCG_INTERPRETER
+    ffi_cif *cif;
+#endif
+    const char *name;
+    unsigned flags;
+    unsigned sizemask;
+} TCGHelperInfo;
 
 TCGOpDef tcg_op_defs[] = {
 #define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + 
oargs + cargs, flags },
@@ -311,16 +320,6 @@ void tcg_pool_reset(TCGContext *s)
     s->pool_current = NULL;
 }
 
-typedef struct TCGHelperInfo {
-    void *func;
-#ifdef CONFIG_TCG_INTERPRETER
-    ffi_cif *cif;
-#endif
-    const char *name;
-    unsigned flags;
-    unsigned sizemask;
-} TCGHelperInfo;
-
 #include "exec/helper-proto.h"
 
 #ifdef CONFIG_TCG_INTERPRETER
@@ -372,6 +371,15 @@ void tcg_context_init(TCGContext *s)
                             (gpointer)&all_helpers[i]);
     }
 
+#ifdef CONFIG_TCG_INTERPRETER
+    for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
+        ffi_cif *cif = all_helpers[i].cif;
+        ffi_status ok = ffi_prep_cif(cif, FFI_DEFAULT_ABI, cif->nargs,
+                                     cif->rtype, cif->arg_types);
+        tcg_debug_assert(ok == FFI_OK);
+    }
+#endif
+
     tcg_target_init(s);
 }
 
@@ -796,17 +804,27 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
     } else {
         nb_rets = 0;
     }
+
     real_args = 0;
     for (i = 0; i < nargs; i++) {
-        int is_64bit = sizemask & (1 << (i+1)*2);
-        if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
-#ifdef TCG_TARGET_CALL_ALIGN_ARGS
-            /* some targets want aligned 64 bit args */
-            if (real_args & 1) {
-                *s->gen_opparam_ptr++ = TCG_CALL_DUMMY_ARG;
-                real_args++;
-            }
+        bool is_64bit = sizemask & (1 << (i+1)*2);
+        bool want_align = false;
+
+#if defined(CONFIG_TCG_INTERPRETER)
+        /* Align all arguments, so that they land in predictable places
+           for passing off to ffi_call.  */
+        want_align = true;
+#elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
+        /* Some targets want aligned 64 bit args.  */
+        want_align = is_64bit;
 #endif
+
+        if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
+            *s->gen_opparam_ptr++ = TCG_CALL_DUMMY_ARG;
+            real_args++;
+        }
+
+        if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
            /* If stack grows up, then we will be placing successive
               arguments at lower addresses, which means we need to
               reverse the order compared to how we would normally
diff --git a/tcg/tci/README b/tcg/tci/README
index dc57f07..ce26194 100644
--- a/tcg/tci/README
+++ b/tcg/tci/README
@@ -18,17 +18,7 @@ support (almost) any host.
 
 This is what TCI (Tiny Code Interpreter) does.
 
-2) Implementation
-
-Like each TCG host frontend, TCI implements the code generator in
-tcg-target.c, tcg-target.h. Both files are in directory tcg/tci.
-
-The additional file tcg/tci.c adds the interpreter.
-
-The bytecode consists of opcodes (same numeric values as those used by
-TCG), command length and arguments of variable size and number.
-
-3) Usage
+2) Usage
 
 For hosts without native TCG, the interpreter TCI must be enabled by
 
@@ -66,65 +56,44 @@ Hosts with native TCG can also enable TCI by claiming to be 
unsupported:
 configure then no longer uses the native linker script (*.ld) for
 user mode emulation.
 
+3) Implementation
+
+Like each TCG host frontend, TCI implements the code generator in
+tcg-target.c, tcg-target.h. Both files are in directory tcg/tci.
+
+The additional file tcg/tci.c adds the interpreter.
 
-4) Status
-
-TCI needs special implementation for 32 and 64 bit host, 32 and 64 bit target,
-host and target with same or different endianness.
-
-            | host (le)                     host (be)
-            | 32             64             32             64
-------------+------------------------------------------------------------
-target (le) | s0, u0         s1, u1         s?, u?         s?, u?
-32 bit      |
-            |
-target (le) | sc, uc         s1, u1         s?, u?         s?, u?
-64 bit      |
-            |
-target (be) | sc, u0         sc, uc         s?, u?         s?, u?
-32 bit      |
-            |
-target (be) | sc, uc         sc, uc         s?, u?         s?, u?
-64 bit      |
-            |
-
-System emulation
-s? = untested
-sc = compiles
-s0 = bios works
-s1 = grub works
-s2 = Linux boots
-
-Linux user mode emulation
-u? = untested
-uc = compiles
-u0 = static hello works
-u1 = linux-user-test works
-
-5) Todo list
-
-* TCI is not widely tested. It was written and tested on a x86_64 host
-  running i386 and x86_64 system emulation and Linux user mode.
-  A cross compiled QEMU for i386 host also works with the same basic tests.
-  A cross compiled QEMU for mipsel host works, too. It is terribly slow
-  because I run it in a mips malta emulation, so it is an interpreted
-  emulation in an emulation.
-  A cross compiled QEMU for arm host works (tested with pc bios).
-  A cross compiled QEMU for ppc host works at least partially:
-  i386-linux-user/qemu-i386 can run a simple hello-world program
-  (tested in a ppc emulation).
-
-* Some TCG opcodes are either missing in the code generator and/or
-  in the interpreter. These opcodes raise a runtime exception, so it is
-  possible to see where code must be added.
-
-* The pseudo code is not optimized and still ugly. For hosts with special
-  alignment requirements, it needs some fixes (maybe aligned bytecode
-  would also improve speed for hosts which support byte alignment).
-
-* A better disassembler for the pseudo code would be nice (a very primitive
-  disassembler is included in tcg-target.c).
-
-* It might be useful to have a runtime option which selects the native TCG
-  or TCI, so QEMU would have to include two TCGs. Today, selecting TCI
-  is a configure option, so you need two compilations of QEMU.
+The bytecode consists of opcodes from include/qemu/tci.h, and a
+set of operands:
+
+   31    25  22  19    15               0
+  +--------+---+---+-----+---------------+
+  |   op   | r | w |  x  |       y       |
+  +--------+---+---+-----+---------------+
+
+  Y: Ldst offset, deposit location, binary 2nd argument,
+     branch displacement, qemu memindex, low part input.
+  X: Low part store source, deposit field, binary 1st argument,
+     high part input
+  W: Ldst base, deposit source, high part output
+  R: Low part output, high part store source
+
+For both X and Y, values less than 8 refer to a register operand, and
+value 8 refers to a follow-on 32-bit signed operand (X coming before Y).
+For values larger than 8, the value is biased by 16 and 16768 respectivly.
+Thus X can immediately store values 7 to -1, and Y 16383 to -16375, and
+both have escapes to a full 32-bit signed operand.
+
+Design considerations:
+*  One input (Y) should be able to directly encode most offsets to env.
+   This is a range of around -8 < y < 1000.
+*  One input (X) should be able to directly encode 0 (sub, deposit)
+   and -1 (deposit), as those are common operations.
+*  There are several opcodes that need to be able to take 64-bit values,
+   particularly on LP64 hosts.
+*  Having 2 inputs that can encode arbitrary 32-bit values suggests a
+   single 64-bit value can be encoded as a faux binary operation.
+*  The X and Y operands should always be decodable, even when unused.
+   Thus the literal 0 is ususally stored when the input is unused.
+*  Using X as the store (low part) source, as opposed to R or W means
+   that 32-bit constants can be stored directly.
diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
index 9b39231..860fc7a 100644
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -23,371 +23,51 @@
  */
 
 #include "tcg-be-null.h"
-
-/* TODO list:
- * - See TODO comments in code.
- */
-
-/* Marker for missing code. */
-#define TODO() \
-    do { \
-        fprintf(stderr, "TODO %s:%u: %s()\n", \
-                __FILE__, __LINE__, __func__); \
-        tcg_abort(); \
-    } while (0)
-
-/* Bitfield n...m (in 32 bit value). */
-#define BITS(n, m) (((0xffffffffU << (31 - n)) >> (31 - n + m)) << m)
-
-/* Macros used in tcg_target_op_defs. */
-#define R       "r"
-#define RI      "ri"
-#if TCG_TARGET_REG_BITS == 32
-# define R64    "r", "r"
-#else
-# define R64    "r"
-#endif
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-# define L      "L", "L"
-# define S      "S", "S"
-#else
-# define L      "L"
-# define S      "S"
-#endif
-
-/* TODO: documentation. */
-static const TCGTargetOpDef tcg_target_op_defs[] = {
-    { INDEX_op_exit_tb, { NULL } },
-    { INDEX_op_goto_tb, { NULL } },
-    { INDEX_op_br, { NULL } },
-
-    { INDEX_op_ld8u_i32, { R, R } },
-    { INDEX_op_ld8s_i32, { R, R } },
-    { INDEX_op_ld16u_i32, { R, R } },
-    { INDEX_op_ld16s_i32, { R, R } },
-    { INDEX_op_ld_i32, { R, R } },
-    { INDEX_op_st8_i32, { R, R } },
-    { INDEX_op_st16_i32, { R, R } },
-    { INDEX_op_st_i32, { R, R } },
-
-    { INDEX_op_add_i32, { R, RI, RI } },
-    { INDEX_op_sub_i32, { R, RI, RI } },
-    { INDEX_op_mul_i32, { R, RI, RI } },
-#if TCG_TARGET_HAS_div_i32
-    { INDEX_op_div_i32, { R, R, R } },
-    { INDEX_op_divu_i32, { R, R, R } },
-    { INDEX_op_rem_i32, { R, R, R } },
-    { INDEX_op_remu_i32, { R, R, R } },
-#elif TCG_TARGET_HAS_div2_i32
-    { INDEX_op_div2_i32, { R, R, "0", "1", R } },
-    { INDEX_op_divu2_i32, { R, R, "0", "1", R } },
-#endif
-    /* TODO: Does R, RI, RI result in faster code than R, R, RI?
-       If both operands are constants, we can optimize. */
-    { INDEX_op_and_i32, { R, RI, RI } },
-#if TCG_TARGET_HAS_andc_i32
-    { INDEX_op_andc_i32, { R, RI, RI } },
-#endif
-#if TCG_TARGET_HAS_eqv_i32
-    { INDEX_op_eqv_i32, { R, RI, RI } },
-#endif
-#if TCG_TARGET_HAS_nand_i32
-    { INDEX_op_nand_i32, { R, RI, RI } },
-#endif
-#if TCG_TARGET_HAS_nor_i32
-    { INDEX_op_nor_i32, { R, RI, RI } },
-#endif
-    { INDEX_op_or_i32, { R, RI, RI } },
-#if TCG_TARGET_HAS_orc_i32
-    { INDEX_op_orc_i32, { R, RI, RI } },
-#endif
-    { INDEX_op_xor_i32, { R, RI, RI } },
-    { INDEX_op_shl_i32, { R, RI, RI } },
-    { INDEX_op_shr_i32, { R, RI, RI } },
-    { INDEX_op_sar_i32, { R, RI, RI } },
-#if TCG_TARGET_HAS_rot_i32
-    { INDEX_op_rotl_i32, { R, RI, RI } },
-    { INDEX_op_rotr_i32, { R, RI, RI } },
-#endif
-#if TCG_TARGET_HAS_deposit_i32
-    { INDEX_op_deposit_i32, { R, "0", R } },
-#endif
-
-    { INDEX_op_brcond_i32, { R, RI } },
-
-    { INDEX_op_setcond_i32, { R, R, RI } },
-#if TCG_TARGET_REG_BITS == 64
-    { INDEX_op_setcond_i64, { R, R, RI } },
-#endif /* TCG_TARGET_REG_BITS == 64 */
-
-#if TCG_TARGET_REG_BITS == 32
-    /* TODO: Support R, R, R, R, RI, RI? Will it be faster? */
-    { INDEX_op_add2_i32, { R, R, R, R, R, R } },
-    { INDEX_op_sub2_i32, { R, R, R, R, R, R } },
-    { INDEX_op_brcond2_i32, { R, R, RI, RI } },
-    { INDEX_op_mulu2_i32, { R, R, R, R } },
-    { INDEX_op_setcond2_i32, { R, R, R, RI, RI } },
-#endif
-
-#if TCG_TARGET_HAS_not_i32
-    { INDEX_op_not_i32, { R, R } },
-#endif
-#if TCG_TARGET_HAS_neg_i32
-    { INDEX_op_neg_i32, { R, R } },
-#endif
-
-#if TCG_TARGET_REG_BITS == 64
-    { INDEX_op_ld8u_i64, { R, R } },
-    { INDEX_op_ld8s_i64, { R, R } },
-    { INDEX_op_ld16u_i64, { R, R } },
-    { INDEX_op_ld16s_i64, { R, R } },
-    { INDEX_op_ld32u_i64, { R, R } },
-    { INDEX_op_ld32s_i64, { R, R } },
-    { INDEX_op_ld_i64, { R, R } },
-
-    { INDEX_op_st8_i64, { R, R } },
-    { INDEX_op_st16_i64, { R, R } },
-    { INDEX_op_st32_i64, { R, R } },
-    { INDEX_op_st_i64, { R, R } },
-
-    { INDEX_op_add_i64, { R, RI, RI } },
-    { INDEX_op_sub_i64, { R, RI, RI } },
-    { INDEX_op_mul_i64, { R, RI, RI } },
-#if TCG_TARGET_HAS_div_i64
-    { INDEX_op_div_i64, { R, R, R } },
-    { INDEX_op_divu_i64, { R, R, R } },
-    { INDEX_op_rem_i64, { R, R, R } },
-    { INDEX_op_remu_i64, { R, R, R } },
-#elif TCG_TARGET_HAS_div2_i64
-    { INDEX_op_div2_i64, { R, R, "0", "1", R } },
-    { INDEX_op_divu2_i64, { R, R, "0", "1", R } },
-#endif
-    { INDEX_op_and_i64, { R, RI, RI } },
-#if TCG_TARGET_HAS_andc_i64
-    { INDEX_op_andc_i64, { R, RI, RI } },
-#endif
-#if TCG_TARGET_HAS_eqv_i64
-    { INDEX_op_eqv_i64, { R, RI, RI } },
-#endif
-#if TCG_TARGET_HAS_nand_i64
-    { INDEX_op_nand_i64, { R, RI, RI } },
-#endif
-#if TCG_TARGET_HAS_nor_i64
-    { INDEX_op_nor_i64, { R, RI, RI } },
-#endif
-    { INDEX_op_or_i64, { R, RI, RI } },
-#if TCG_TARGET_HAS_orc_i64
-    { INDEX_op_orc_i64, { R, RI, RI } },
-#endif
-    { INDEX_op_xor_i64, { R, RI, RI } },
-    { INDEX_op_shl_i64, { R, RI, RI } },
-    { INDEX_op_shr_i64, { R, RI, RI } },
-    { INDEX_op_sar_i64, { R, RI, RI } },
-#if TCG_TARGET_HAS_rot_i64
-    { INDEX_op_rotl_i64, { R, RI, RI } },
-    { INDEX_op_rotr_i64, { R, RI, RI } },
-#endif
-#if TCG_TARGET_HAS_deposit_i64
-    { INDEX_op_deposit_i64, { R, "0", R } },
-#endif
-    { INDEX_op_brcond_i64, { R, RI } },
-
-#if TCG_TARGET_HAS_ext8s_i64
-    { INDEX_op_ext8s_i64, { R, R } },
-#endif
-#if TCG_TARGET_HAS_ext16s_i64
-    { INDEX_op_ext16s_i64, { R, R } },
-#endif
-#if TCG_TARGET_HAS_ext32s_i64
-    { INDEX_op_ext32s_i64, { R, R } },
-#endif
-#if TCG_TARGET_HAS_ext8u_i64
-    { INDEX_op_ext8u_i64, { R, R } },
-#endif
-#if TCG_TARGET_HAS_ext16u_i64
-    { INDEX_op_ext16u_i64, { R, R } },
-#endif
-#if TCG_TARGET_HAS_ext32u_i64
-    { INDEX_op_ext32u_i64, { R, R } },
-#endif
-#if TCG_TARGET_HAS_bswap16_i64
-    { INDEX_op_bswap16_i64, { R, R } },
-#endif
-#if TCG_TARGET_HAS_bswap32_i64
-    { INDEX_op_bswap32_i64, { R, R } },
-#endif
-#if TCG_TARGET_HAS_bswap64_i64
-    { INDEX_op_bswap64_i64, { R, R } },
-#endif
-#if TCG_TARGET_HAS_not_i64
-    { INDEX_op_not_i64, { R, R } },
-#endif
-#if TCG_TARGET_HAS_neg_i64
-    { INDEX_op_neg_i64, { R, R } },
-#endif
-#endif /* TCG_TARGET_REG_BITS == 64 */
-
-    { INDEX_op_qemu_ld8u, { R, L } },
-    { INDEX_op_qemu_ld8s, { R, L } },
-    { INDEX_op_qemu_ld16u, { R, L } },
-    { INDEX_op_qemu_ld16s, { R, L } },
-    { INDEX_op_qemu_ld32, { R, L } },
-#if TCG_TARGET_REG_BITS == 64
-    { INDEX_op_qemu_ld32u, { R, L } },
-    { INDEX_op_qemu_ld32s, { R, L } },
-#endif
-    { INDEX_op_qemu_ld64, { R64, L } },
-
-    { INDEX_op_qemu_st8, { R, S } },
-    { INDEX_op_qemu_st16, { R, S } },
-    { INDEX_op_qemu_st32, { R, S } },
-    { INDEX_op_qemu_st64, { R64, S } },
-
-#if TCG_TARGET_HAS_ext8s_i32
-    { INDEX_op_ext8s_i32, { R, R } },
-#endif
-#if TCG_TARGET_HAS_ext16s_i32
-    { INDEX_op_ext16s_i32, { R, R } },
-#endif
-#if TCG_TARGET_HAS_ext8u_i32
-    { INDEX_op_ext8u_i32, { R, R } },
-#endif
-#if TCG_TARGET_HAS_ext16u_i32
-    { INDEX_op_ext16u_i32, { R, R } },
-#endif
-
-#if TCG_TARGET_HAS_bswap16_i32
-    { INDEX_op_bswap16_i32, { R, R } },
-#endif
-#if TCG_TARGET_HAS_bswap32_i32
-    { INDEX_op_bswap32_i32, { R, R } },
-#endif
-
-    { -1 },
-};
+#include "qemu/tci.h"
 
 static const int tcg_target_reg_alloc_order[] = {
-    TCG_REG_R0,
-    TCG_REG_R1,
-    TCG_REG_R2,
-    TCG_REG_R3,
-#if 0 /* used for TCG_REG_CALL_STACK */
-    TCG_REG_R4,
-#endif
-    TCG_REG_R5,
-    TCG_REG_R6,
-    TCG_REG_R7,
-#if TCG_TARGET_NB_REGS >= 16
-    TCG_REG_R8,
-    TCG_REG_R9,
-    TCG_REG_R10,
-    TCG_REG_R11,
-    TCG_REG_R12,
-    TCG_REG_R13,
-    TCG_REG_R14,
-    TCG_REG_R15,
-#endif
+    TCG_REG_F, TCG_REG_E, TCG_REG_D, TCG_REG_C, TCG_REG_B, TCG_REG_A
 };
 
-#if MAX_OPC_PARAM_IARGS != 5
-# error Fix needed, number of supported input arguments changed!
-#endif
-
-static const int tcg_target_call_iarg_regs[] = {
-    TCG_REG_R0,
-    TCG_REG_R1,
-    TCG_REG_R2,
-    TCG_REG_R3,
-#if 0 /* used for TCG_REG_CALL_STACK */
-    TCG_REG_R4,
-#endif
-    TCG_REG_R5,
-#if TCG_TARGET_REG_BITS == 32
-    /* 32 bit hosts need 2 * MAX_OPC_PARAM_IARGS registers. */
-    TCG_REG_R6,
-    TCG_REG_R7,
-#if TCG_TARGET_NB_REGS >= 16
-    TCG_REG_R8,
-    TCG_REG_R9,
-    TCG_REG_R10,
-#else
-# error Too few input registers available
-#endif
-#endif
-};
+/* No call arguments via registers.  All will be stored on the "stack".  */
+static const int tcg_target_call_iarg_regs[] = { };
 
 static const int tcg_target_call_oarg_regs[] = {
-    TCG_REG_R0,
+    TCG_REG_A,
 #if TCG_TARGET_REG_BITS == 32
-    TCG_REG_R1
+    TCG_REG_B
 #endif
 };
 
 #ifndef NDEBUG
 static const char *const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
-    "r00",
-    "r01",
-    "r02",
-    "r03",
-    "r04",
-    "r05",
-    "r06",
-    "r07",
-#if TCG_TARGET_NB_REGS >= 16
-    "r08",
-    "r09",
-    "r10",
-    "r11",
-    "r12",
-    "r13",
-    "r14",
-    "r15",
-#if TCG_TARGET_NB_REGS >= 32
-    "r16",
-    "r17",
-    "r18",
-    "r19",
-    "r20",
-    "r21",
-    "r22",
-    "r23",
-    "r24",
-    "r25",
-    "r26",
-    "r27",
-    "r28",
-    "r29",
-    "r30",
-    "r31"
-#endif
-#endif
+    "a", "b", "c", "d", "e", "f", "vp", "sp"
 };
-#endif
+#endif /* NDEBUG */
 
-static void patch_reloc(tcg_insn_unit *code_ptr, int type,
+static void patch_reloc(tcg_insn_unit *ptr, int type,
                         intptr_t value, intptr_t addend)
 {
-    /* tcg_out_reloc always uses the same type, addend. */
-    assert(type == sizeof(tcg_target_long));
-    assert(addend == 0);
-    assert(value != 0);
-    if (TCG_TARGET_REG_BITS == 32) {
-        tcg_patch32(code_ptr, value);
-    } else {
-        tcg_patch64(code_ptr, value);
-    }
+    intptr_t disp = (tcg_insn_unit *)value - (ptr + 1);
+
+    assert(disp >= MIN_Y && disp <= MAX_Y);
+    *ptr = deposit32(*ptr, POS_Y, LEN_Y, disp + BIAS_Y);
 }
 
+/* Constants we accept.  */
+#define TCG_CT_CONST_S32 0x100
+
 /* Parse target specific constraints. */
 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 {
     const char *ct_str = *pct_str;
     switch (ct_str[0]) {
     case 'r':
-    case 'L':                   /* qemu_ld constraint */
-    case 'S':                   /* qemu_st constraint */
         ct->ct |= TCG_CT_REG;
-        tcg_regset_set32(ct->u.regs, 0, BIT(TCG_TARGET_NB_REGS) - 1);
+        tcg_regset_set32(ct->u.regs, 0, 0xff);
+        break;
+    case 'e':
+        ct->ct |= TCG_CT_CONST_S32;
         break;
     default:
         return -1;
@@ -397,505 +77,662 @@ static int target_parse_constraint(TCGArgConstraint *ct, 
const char **pct_str)
     return 0;
 }
 
-#if defined(CONFIG_DEBUG_TCG_INTERPRETER)
-/* Show current bytecode. Used by tcg interpreter. */
-void tci_disas(uint8_t opc)
+/* Test if a constant matches the constraint. */
+static int tcg_target_const_match(tcg_target_long val, TCGType type,
+                                  const TCGArgConstraint *arg_ct)
 {
-    const TCGOpDef *def = &tcg_op_defs[opc];
-    fprintf(stderr, "TCG %s %u, %u, %u\n",
-            def->name, def->nb_oargs, def->nb_iargs, def->nb_cargs);
+    int ct = arg_ct->ct;
+    if (ct & TCG_CT_CONST) {
+        return 1;
+    }
+    if ((ct & TCG_CT_CONST_S32) && val == (int32_t)val) {
+        return 1;
+    }
+    return 0;
 }
-#endif
 
-/* Write value (native size). */
-static void tcg_out_i(TCGContext *s, tcg_target_ulong v)
+static void tcg_fmt_rwxy(TCGContext *s, TCIOp opc, TCGReg r, TCGReg w,
+                         tcg_target_long x, tcg_target_long y,
+                         bool xc, bool yc)
 {
-    if (TCG_TARGET_REG_BITS == 32) {
-        tcg_out32(s, v);
+    tcg_insn_unit *start = s->code_ptr;
+    tcg_insn_unit insn;
+
+    s->code_ptr += 1;
+
+    assert((unsigned)opc < TCI_NUM_OPC);
+    insn = opc << POS_OP;
+
+    assert((unsigned)r < 8);
+    insn |= r << POS_R;
+
+    assert((unsigned)w < 8);
+    insn |= w << POS_W;
+
+    if (xc) {
+        assert(x == (int32_t)x);
+        if (x >= MIN_X && x <= MAX_X) {
+            x += BIAS_X;
+        } else {
+            tcg_out32(s, x);
+            x = 8;
+        }
+    } else {
+        assert(x < 8);
+    }
+    insn |= x << POS_X;
+
+    if (yc) {
+        assert(y == (int32_t)y);
+        if (y >= MIN_Y && y <= MAX_Y) {
+            y += BIAS_Y;
+        } else {
+            tcg_out32(s, y);
+            y = 8;
+        }
     } else {
-        tcg_out64(s, v);
+        assert(y < 8);
     }
+    insn |= y << POS_Y;
+
+    *start = insn;
 }
 
-/* Write opcode. */
-static void tcg_out_op_t(TCGContext *s, TCGOpcode op)
+static inline void tcg_fmt_rxy(TCGContext *s, TCIOp opc, TCGReg r,
+                               TCGArg x, TCGArg y, bool xc, bool yc)
 {
-    tcg_out8(s, op);
-    tcg_out8(s, 0);
+    tcg_fmt_rwxy(s, opc, r, 0, x, y, xc, yc);
 }
 
-/* Write register. */
-static void tcg_out_r(TCGContext *s, TCGArg t0)
+static inline void tcg_fmt_r(TCGContext *s, TCIOp opc, TCGReg r)
 {
-    assert(t0 < TCG_TARGET_NB_REGS);
-    tcg_out8(s, t0);
+    tcg_fmt_rwxy(s, opc, r, 0, 0, 0, 1, 1);
 }
 
-/* Write register or constant (native size). */
-static void tcg_out_ri(TCGContext *s, int const_arg, TCGArg arg)
+static inline void tcg_fmt_xy(TCGContext *s, TCIOp opc, TCGArg x, TCGArg y,
+                              bool xc, bool yc)
 {
-    if (const_arg) {
-        assert(const_arg == 1);
-        tcg_out8(s, TCG_CONST);
-        tcg_out_i(s, arg);
-    } else {
-        tcg_out_r(s, arg);
-    }
+    tcg_fmt_rwxy(s, opc, 0, 0, x, y, xc, yc);
 }
 
-/* Write register or constant (32 bit). */
-static void tcg_out_ri32(TCGContext *s, int const_arg, TCGArg arg)
+static inline void tcg_fmt_x(TCGContext *s, TCIOp opc, TCGArg x, bool xc)
 {
-    if (const_arg) {
-        assert(const_arg == 1);
-        tcg_out8(s, TCG_CONST);
-        tcg_out32(s, arg);
-    } else {
-        tcg_out_r(s, arg);
+    tcg_fmt_rwxy(s, opc, 0, 0, x, 0, xc, 1);
+}
+
+static inline void tcg_fmt_ptr(TCGContext *s, TCIOp opc, uintptr_t p)
+{
+    /* Set R appropriate for calls and W appropriate for 32-bit call8.
+       The values are ignored in all other cases.  */
+    tcg_fmt_rwxy(s, opc, TCG_REG_A, TCG_REG_B,
+                 (int32_t)(p >> 31 >> 1), (int32_t)p, 1, 1);
+}
+
+static inline void tcg_out_mov(TCGContext *s, TCGType type,
+                               TCGReg ret, TCGReg val)
+{
+    if (ret != val) {
+        tcg_fmt_rxy(s, TCI_ior, ret, 0, val, 1, 0);
     }
 }
 
-#if TCG_TARGET_REG_BITS == 64
-/* Write register or constant (64 bit). */
-static void tcg_out_ri64(TCGContext *s, int const_arg, TCGArg arg)
+static void tcg_out_movi(TCGContext *s, TCGType type,
+                         TCGReg ret, tcg_target_long val)
 {
-    if (const_arg) {
-        assert(const_arg == 1);
-        tcg_out8(s, TCG_CONST);
-        tcg_out64(s, arg);
+    if (type == TCG_TYPE_I32 || val == (int32_t)val) {
+        tcg_fmt_rxy(s, TCI_ior, ret, 0, (int32_t)val, 1, 1);
     } else {
-        tcg_out_r(s, arg);
+        tcg_fmt_rxy(s, TCI_concat4, ret, val >> 31 >> 1, (int32_t)val, 1, 1);
     }
 }
-#endif
 
-/* Write label. */
-static void tci_out_label(TCGContext *s, TCGArg arg)
+static void tcg_out_br(TCGContext *s, TCIOp opc, int arg)
 {
     TCGLabel *label = &s->labels[arg];
+
     if (label->has_value) {
-        tcg_out_i(s, label->u.value);
-        assert(label->u.value);
+        intptr_t disp = label->u.value_ptr - (s->code_ptr + 1);
+        assert(disp >= MIN_Y && disp < MAX_Y);
+        tcg_fmt_rwxy(s, opc, 0, 0, 0, disp, 1, 1);
     } else {
-        tcg_out_reloc(s, s->code_ptr, sizeof(tcg_target_ulong), arg, 0);
-        s->code_ptr += sizeof(tcg_target_ulong);
+        /* Retain the current contents of Y during retranslation.  */
+        QEMU_BUILD_BUG_ON(POS_Y != 0);
+        tcg_out_reloc(s, s->code_ptr, 0, arg, 0);
+        tcg_out32(s, deposit32(opc << POS_OP, 0, LEN_Y, *s->code_ptr));
     }
 }
 
-static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
-                       intptr_t arg2)
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
+                       TCGReg base, intptr_t ofs)
 {
-    uint8_t *old_code_ptr = s->code_ptr;
-    if (type == TCG_TYPE_I32) {
-        tcg_out_op_t(s, INDEX_op_ld_i32);
-        tcg_out_r(s, ret);
-        tcg_out_r(s, arg1);
-        tcg_out32(s, arg2);
-    } else {
-        assert(type == TCG_TYPE_I64);
-#if TCG_TARGET_REG_BITS == 64
-        tcg_out_op_t(s, INDEX_op_ld_i64);
-        tcg_out_r(s, ret);
-        tcg_out_r(s, arg1);
-        assert(arg2 == (int32_t)arg2);
-        tcg_out32(s, arg2);
-#else
-        TODO();
-#endif
-    }
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+    tcg_fmt_rwxy(s, type == TCG_TYPE_I32 ? TCI_ld4s : TCI_ld8,
+                 ret, base, 0, ofs, 1, 1);
 }
 
-static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg val,
+                       TCGReg base, intptr_t ofs)
 {
-    uint8_t *old_code_ptr = s->code_ptr;
-    assert(ret != arg);
-#if TCG_TARGET_REG_BITS == 32
-    tcg_out_op_t(s, INDEX_op_mov_i32);
-#else
-    tcg_out_op_t(s, INDEX_op_mov_i64);
-#endif
-    tcg_out_r(s, ret);
-    tcg_out_r(s, arg);
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
+    tcg_fmt_rwxy(s, type == TCG_TYPE_I32 ? TCI_st4 : TCI_st8,
+                 0, base, val, ofs, 0, 1);
 }
 
-static void tcg_out_movi(TCGContext *s, TCGType type,
-                         TCGReg t0, tcg_target_long arg)
-{
-    uint8_t *old_code_ptr = s->code_ptr;
-    uint32_t arg32 = arg;
-    if (type == TCG_TYPE_I32 || arg == arg32) {
-        tcg_out_op_t(s, INDEX_op_movi_i32);
-        tcg_out_r(s, t0);
-        tcg_out32(s, arg32);
-    } else {
-        assert(type == TCG_TYPE_I64);
+static const TCIOp tci_cond4[16] = {
+    [TCG_COND_EQ] = TCI_cmp4eq,
+    [TCG_COND_NE] = TCI_cmp4ne,
+    [TCG_COND_LT] = TCI_cmp4lt,
+    [TCG_COND_LE] = TCI_cmp4le,
+    [TCG_COND_GT] = TCI_cmp4gt,
+    [TCG_COND_GE] = TCI_cmp4ge,
+    [TCG_COND_LTU] = TCI_cmp4ltu,
+    [TCG_COND_LEU] = TCI_cmp4leu,
+    [TCG_COND_GTU] = TCI_cmp4gtu,
+    [TCG_COND_GEU] = TCI_cmp4geu,
+};
+
+static const TCIOp tci_cond8[16] = {
+    [TCG_COND_EQ] = TCI_cmp8eq,
+    [TCG_COND_NE] = TCI_cmp8ne,
+    [TCG_COND_LT] = TCI_cmp8lt,
+    [TCG_COND_LE] = TCI_cmp8le,
+    [TCG_COND_GT] = TCI_cmp8gt,
+    [TCG_COND_GE] = TCI_cmp8ge,
+    [TCG_COND_LTU] = TCI_cmp8ltu,
+    [TCG_COND_LEU] = TCI_cmp8leu,
+    [TCG_COND_GTU] = TCI_cmp8gtu,
+    [TCG_COND_GEU] = TCI_cmp8geu,
+};
+
+static const TCIOp tci_condp[16] = {
+    [TCG_COND_EQ] = TCI_cmppeq,
+    [TCG_COND_NE] = TCI_cmppne,
+    [TCG_COND_LT] = TCI_cmpplt,
+    [TCG_COND_LE] = TCI_cmpple,
+    [TCG_COND_GT] = TCI_cmppgt,
+    [TCG_COND_GE] = TCI_cmppge,
+    [TCG_COND_LTU] = TCI_cmppltu,
+    [TCG_COND_LEU] = TCI_cmppleu,
+    [TCG_COND_GTU] = TCI_cmppgtu,
+    [TCG_COND_GEU] = TCI_cmppgeu,
+};
+
+static const TCIOp tci_qemu_ld[16] = {
+    [MO_UB]    = TCI_qld1u,
+    [MO_SB]    = TCI_qld1s,
+    [MO_LEUW]  = TCI_qld2u_le,
+    [MO_LESW]  = TCI_qld2s_le,
+    [MO_LEUL]  = TCI_qld4u_le,
+    [MO_LESL]  = TCI_qld4s_le,
+    [MO_LEQ]   = TCI_qld8_le,
+    [MO_BEUW]  = TCI_qld2u_be,
+    [MO_BESW]  = TCI_qld2s_be,
+    [MO_BEUL]  = TCI_qld4u_be,
+    [MO_BESL]  = TCI_qld4s_be,
+    [MO_BEQ]   = TCI_qld8_be,
+};
+
+static const TCIOp tci_qemu_st[16] = {
+    [MO_UB]    = TCI_qst1,
+    [MO_LEUW]  = TCI_qst2_le,
+    [MO_LEUL]  = TCI_qst4_le,
+    [MO_LEQ]   = TCI_qst8_le,
+    [MO_BEUW]  = TCI_qst2_be,
+    [MO_BEUL]  = TCI_qst4_be,
+    [MO_BEQ]   = TCI_qst8_be,
+};
+
+static const TCIOp tci_operation[NB_OPS] = {
+    [INDEX_op_ld8u_i32]     = TCI_ld1u,
+    [INDEX_op_ld8u_i64]     = TCI_ld1u,
+    [INDEX_op_ld8s_i32]     = TCI_ld1s,
+    [INDEX_op_ld8s_i64]     = TCI_ld1s,
+    [INDEX_op_ld16u_i32]    = TCI_ld2u,
+    [INDEX_op_ld16u_i64]    = TCI_ld2u,
+    [INDEX_op_ld16s_i32]    = TCI_ld2s,
+    [INDEX_op_ld16s_i64]    = TCI_ld2s,
+    [INDEX_op_ld32u_i64]    = TCI_ld4u,
+    [INDEX_op_ld32s_i64]    = TCI_ld4s,
+    [INDEX_op_ld_i32]       = TCI_ld4s,
+    [INDEX_op_ld_i64]       = TCI_ld8,
+
+    [INDEX_op_st8_i32]      = TCI_st1,
+    [INDEX_op_st8_i64]      = TCI_st1,
+    [INDEX_op_st16_i32]     = TCI_st2,
+    [INDEX_op_st16_i64]     = TCI_st2,
+    [INDEX_op_st32_i64]     = TCI_st4,
+    [INDEX_op_st_i32]       = TCI_st4,
+    [INDEX_op_st_i64]       = TCI_st8,
+
+    [INDEX_op_add_i32]      = TCI_add,
+    [INDEX_op_add_i64]      = TCI_add,
+    [INDEX_op_sub_i32]      = TCI_sub,
+    [INDEX_op_sub_i64]      = TCI_sub,
+    [INDEX_op_mul_i32]      = TCI_mul,
+    [INDEX_op_mul_i64]      = TCI_mul,
+    [INDEX_op_div_i32]      = TCI_divs,
+    [INDEX_op_div_i64]      = TCI_divs,
+    [INDEX_op_rem_i32]      = TCI_rems,
+    [INDEX_op_rem_i64]      = TCI_rems,
+    [INDEX_op_divu_i32]     = TCI_divu,
+    [INDEX_op_divu_i64]     = TCI_divu,
+    [INDEX_op_divu_i64]     = TCI_divu,
+    [INDEX_op_remu_i32]     = TCI_remu,
+    [INDEX_op_remu_i64]     = TCI_remu,
+    [INDEX_op_and_i32]      = TCI_and,
+    [INDEX_op_and_i64]      = TCI_and,
+    [INDEX_op_or_i32]       = TCI_ior,
+    [INDEX_op_or_i64]       = TCI_ior,
+    [INDEX_op_xor_i32]      = TCI_xor,
+    [INDEX_op_xor_i64]      = TCI_xor,
+    [INDEX_op_andc_i32]     = TCI_andc,
+    [INDEX_op_andc_i64]     = TCI_andc,
+    [INDEX_op_orc_i32]      = TCI_iorc,
+    [INDEX_op_orc_i64]      = TCI_iorc,
+    [INDEX_op_eqv_i32]      = TCI_xorc,
+    [INDEX_op_eqv_i64]      = TCI_xorc,
+    [INDEX_op_nand_i32]     = TCI_nand,
+    [INDEX_op_nand_i64]     = TCI_nand,
+    [INDEX_op_nor_i32]      = TCI_nior,
+    [INDEX_op_nor_i64]      = TCI_nior,
+    [INDEX_op_shl_i32]      = TCI_shl,
+    [INDEX_op_shl_i64]      = TCI_shl,
+    [INDEX_op_sar_i32]      = TCI_sar4,
+    [INDEX_op_sar_i64]      = TCI_sar8,
+    [INDEX_op_shr_i32]      = TCI_shr4,
+    [INDEX_op_shr_i64]      = TCI_shr8,
+    [INDEX_op_rotl_i32]     = TCI_rol4,
+    [INDEX_op_rotr_i32]     = TCI_ror4,
+    [INDEX_op_rotl_i64]     = TCI_rol8,
+    [INDEX_op_rotr_i64]     = TCI_ror8,
+
+    [INDEX_op_bswap16_i32]  = TCI_bswap2,
+    [INDEX_op_bswap16_i64]  = TCI_bswap2,
+    [INDEX_op_bswap32_i32]  = TCI_bswap4,
+    [INDEX_op_bswap32_i64]  = TCI_bswap4,
+    [INDEX_op_bswap64_i64]  = TCI_bswap8,
+    [INDEX_op_ext8s_i32]    = TCI_sxt1,
+    [INDEX_op_ext8s_i64]    = TCI_sxt1,
+    [INDEX_op_ext8u_i32]    = TCI_zxt1,
+    [INDEX_op_ext8u_i64]    = TCI_zxt1,
+    [INDEX_op_ext16s_i32]   = TCI_sxt2,
+    [INDEX_op_ext16s_i64]   = TCI_sxt2,
+    [INDEX_op_ext16u_i32]   = TCI_zxt2,
+    [INDEX_op_ext16u_i64]   = TCI_zxt2,
+    [INDEX_op_ext32s_i64]   = TCI_sxt4,
+    [INDEX_op_ext32u_i64]   = TCI_zxt4,
+
+    [INDEX_op_setcond_i32]  = TCI_setc,
+    [INDEX_op_setcond_i64]  = TCI_setc,
+    [INDEX_op_movcond_i32]  = TCI_movc,
+    [INDEX_op_movcond_i64]  = TCI_movc,
+
 #if TCG_TARGET_REG_BITS == 64
-        tcg_out_op_t(s, INDEX_op_movi_i64);
-        tcg_out_r(s, t0);
-        tcg_out64(s, arg);
+    [INDEX_op_add2_i64]     = TCI_add2,
+    [INDEX_op_sub2_i64]     = TCI_sub2,
+    [INDEX_op_mulu2_i64]    = TCI_mulu2,
+    [INDEX_op_muls2_i64]    = TCI_muls2,
 #else
-        TODO();
+    [INDEX_op_add2_i32]     = TCI_add2,
+    [INDEX_op_sub2_i32]     = TCI_sub2,
+    [INDEX_op_mulu2_i32]    = TCI_mulu2,
+    [INDEX_op_muls2_i32]    = TCI_muls2,
 #endif
-    }
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
-}
+};
 
-static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *arg)
+static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg)
 {
-    tcg_out_ri(s, 1, (uintptr_t)arg);
+    const TCGHelperInfo *info;
+    TCIOp op;
+
+    info = g_hash_table_lookup(s->helpers, (gpointer)arg);
+    if (info->cif->rtype == &ffi_type_void) {
+        op = TCI_call0;
+    } else if (info->cif->rtype->size == 4) {
+        op = TCI_call4;
+    } else {
+        assert(info->cif->rtype->size == 8);
+        op = TCI_call8;
+    }
+
+    tcg_fmt_ptr(s, op, (uintptr_t)info);
 }
 
-static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
+static void tcg_out_op(TCGContext *s, TCGOpcode tcg_opc, const TCGArg *args,
                        const int *const_args)
 {
-    uint8_t *old_code_ptr = s->code_ptr;
+    TCIOp tci_opc = tci_operation[tcg_opc];
+    TCGArg a0, a1, a2;
+    int c1, c2;
 
-    tcg_out_op_t(s, opc);
+    a0 = args[0];
+    a1 = args[1];
+    a2 = args[2];
+    c1 = const_args[1];
+    c2 = const_args[2];
 
-    switch (opc) {
+    switch (tcg_opc) {
     case INDEX_op_exit_tb:
-        tcg_out64(s, args[0]);
+        tcg_fmt_ptr(s, TCI_exit, a0);
         break;
+
     case INDEX_op_goto_tb:
-        if (s->tb_jmp_offset) {
-            /* Direct jump method. */
-            assert(args[0] < ARRAY_SIZE(s->tb_jmp_offset));
-            s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
-            tcg_out32(s, 0);
-        } else {
-            /* Indirect jump method. */
-            TODO();
-        }
-        assert(args[0] < ARRAY_SIZE(s->tb_next_offset));
-        s->tb_next_offset[args[0]] = tcg_current_code_size(s);
+        /* Direct jump method. */
+        assert(s->tb_jmp_offset != 0);
+        /* OPC + Y=imm4  */
+        tcg_out32(s, (TCI_b << POS_OP) + (8 << POS_Y));
+        s->tb_jmp_offset[a0] = tcg_current_code_size(s);
+        s->code_ptr += 1;
+        s->tb_next_offset[a0] = tcg_current_code_size(s);
         break;
+
     case INDEX_op_br:
-        tci_out_label(s, args[0]);
-        break;
-    case INDEX_op_setcond_i32:
-        tcg_out_r(s, args[0]);
-        tcg_out_r(s, args[1]);
-        tcg_out_ri32(s, const_args[2], args[2]);
-        tcg_out8(s, args[3]);   /* condition */
+        tcg_out_br(s, TCI_b, a0);
         break;
-#if TCG_TARGET_REG_BITS == 32
-    case INDEX_op_setcond2_i32:
-        /* setcond2_i32 cond, t0, t1_low, t1_high, t2_low, t2_high */
-        tcg_out_r(s, args[0]);
-        tcg_out_r(s, args[1]);
-        tcg_out_r(s, args[2]);
-        tcg_out_ri32(s, const_args[3], args[3]);
-        tcg_out_ri32(s, const_args[4], args[4]);
-        tcg_out8(s, args[5]);   /* condition */
-        break;
-#elif TCG_TARGET_REG_BITS == 64
-    case INDEX_op_setcond_i64:
-        tcg_out_r(s, args[0]);
-        tcg_out_r(s, args[1]);
-        tcg_out_ri64(s, const_args[2], args[2]);
-        tcg_out8(s, args[3]);   /* condition */
-        break;
-#endif
-    case INDEX_op_ld8u_i32:
-    case INDEX_op_ld8s_i32:
-    case INDEX_op_ld16u_i32:
-    case INDEX_op_ld16s_i32:
-    case INDEX_op_ld_i32:
+
     case INDEX_op_st8_i32:
+    case INDEX_op_st8_i64:
     case INDEX_op_st16_i32:
+    case INDEX_op_st16_i64:
     case INDEX_op_st_i32:
+    case INDEX_op_st32_i64:
+        a0 = (int32_t)a0;
+        /* fall through */
+    case INDEX_op_st_i64:
+        tcg_fmt_rwxy(s, tci_opc, 0, a1, a0, (intptr_t)a2, const_args[0], 1);
+        break;
+    case INDEX_op_ld8u_i32:
     case INDEX_op_ld8u_i64:
+    case INDEX_op_ld8s_i32:
     case INDEX_op_ld8s_i64:
+    case INDEX_op_ld16u_i32:
     case INDEX_op_ld16u_i64:
+    case INDEX_op_ld16s_i32:
     case INDEX_op_ld16s_i64:
     case INDEX_op_ld32u_i64:
+    case INDEX_op_ld_i32:
     case INDEX_op_ld32s_i64:
     case INDEX_op_ld_i64:
-    case INDEX_op_st8_i64:
-    case INDEX_op_st16_i64:
-    case INDEX_op_st32_i64:
-    case INDEX_op_st_i64:
-        tcg_out_r(s, args[0]);
-        tcg_out_r(s, args[1]);
-        assert(args[2] == (int32_t)args[2]);
-        tcg_out32(s, args[2]);
-        break;
-    case INDEX_op_add_i32:
-    case INDEX_op_sub_i32:
-    case INDEX_op_mul_i32:
-    case INDEX_op_and_i32:
-    case INDEX_op_andc_i32:     /* Optional (TCG_TARGET_HAS_andc_i32). */
-    case INDEX_op_eqv_i32:      /* Optional (TCG_TARGET_HAS_eqv_i32). */
-    case INDEX_op_nand_i32:     /* Optional (TCG_TARGET_HAS_nand_i32). */
-    case INDEX_op_nor_i32:      /* Optional (TCG_TARGET_HAS_nor_i32). */
-    case INDEX_op_or_i32:
-    case INDEX_op_orc_i32:      /* Optional (TCG_TARGET_HAS_orc_i32). */
-    case INDEX_op_xor_i32:
-    case INDEX_op_shl_i32:
-    case INDEX_op_shr_i32:
-    case INDEX_op_sar_i32:
-    case INDEX_op_rotl_i32:     /* Optional (TCG_TARGET_HAS_rot_i32). */
-    case INDEX_op_rotr_i32:     /* Optional (TCG_TARGET_HAS_rot_i32). */
-        tcg_out_r(s, args[0]);
-        tcg_out_ri32(s, const_args[1], args[1]);
-        tcg_out_ri32(s, const_args[2], args[2]);
-        break;
-    case INDEX_op_deposit_i32:  /* Optional (TCG_TARGET_HAS_deposit_i32). */
-        tcg_out_r(s, args[0]);
-        tcg_out_r(s, args[1]);
-        tcg_out_r(s, args[2]);
-        assert(args[3] <= UINT8_MAX);
-        tcg_out8(s, args[3]);
-        assert(args[4] <= UINT8_MAX);
-        tcg_out8(s, args[4]);
+        tcg_fmt_rwxy(s, tci_opc, a0, a1, 0, (intptr_t)a2, 1, 1);
         break;
 
-#if TCG_TARGET_REG_BITS == 64
-    case INDEX_op_add_i64:
-    case INDEX_op_sub_i64:
-    case INDEX_op_mul_i64:
-    case INDEX_op_and_i64:
-    case INDEX_op_andc_i64:     /* Optional (TCG_TARGET_HAS_andc_i64). */
-    case INDEX_op_eqv_i64:      /* Optional (TCG_TARGET_HAS_eqv_i64). */
-    case INDEX_op_nand_i64:     /* Optional (TCG_TARGET_HAS_nand_i64). */
-    case INDEX_op_nor_i64:      /* Optional (TCG_TARGET_HAS_nor_i64). */
-    case INDEX_op_or_i64:
-    case INDEX_op_orc_i64:      /* Optional (TCG_TARGET_HAS_orc_i64). */
-    case INDEX_op_xor_i64:
-    case INDEX_op_shl_i64:
-    case INDEX_op_shr_i64:
-    case INDEX_op_sar_i64:
-    case INDEX_op_rotl_i64:     /* Optional (TCG_TARGET_HAS_rot_i64). */
-    case INDEX_op_rotr_i64:     /* Optional (TCG_TARGET_HAS_rot_i64). */
-        tcg_out_r(s, args[0]);
-        tcg_out_ri64(s, const_args[1], args[1]);
-        tcg_out_ri64(s, const_args[2], args[2]);
+    case INDEX_op_deposit_i32:
+        a2 = (int32_t)a2;
+    case INDEX_op_deposit_i64:
+    {
+        int pos = args[3], len = args[4];
+        if (pos == 32 && len == 32) {
+            tcg_fmt_rxy(s, TCI_concat4, a0, a2, a1, c2, 0);
+        } else {
+            int poslen = (pos << 6) | len;
+            tcg_fmt_rwxy(s, TCI_deposit, a0, a1, a2, poslen, c2, 1);
+        }
         break;
-    case INDEX_op_deposit_i64:  /* Optional (TCG_TARGET_HAS_deposit_i64). */
-        tcg_out_r(s, args[0]);
-        tcg_out_r(s, args[1]);
-        tcg_out_r(s, args[2]);
-        assert(args[3] <= UINT8_MAX);
-        tcg_out8(s, args[3]);
-        assert(args[4] <= UINT8_MAX);
-        tcg_out8(s, args[4]);
+    }
+
+    case INDEX_op_setcond_i32:
+        tci_opc = tci_cond4[args[3]];
+        a1 = (int32_t)a1;
+        a2 = (int32_t)a2;
+        goto do_setcond;
+    case INDEX_op_setcond_i64:
+        tci_opc = tci_cond8[args[3]];
+    do_setcond:
+        tcg_fmt_xy(s, tci_opc, a1, a2, c1, c2);
+        tcg_fmt_r(s, TCI_setc, a0);
         break;
-    case INDEX_op_div_i64:      /* Optional (TCG_TARGET_HAS_div_i64). */
-    case INDEX_op_divu_i64:     /* Optional (TCG_TARGET_HAS_div_i64). */
-    case INDEX_op_rem_i64:      /* Optional (TCG_TARGET_HAS_div_i64). */
-    case INDEX_op_remu_i64:     /* Optional (TCG_TARGET_HAS_div_i64). */
-        TODO();
+
+    case INDEX_op_brcond_i32:
+        tci_opc = tci_cond4[a2];
+        a0 = (int32_t)a0;
+        a1 = (int32_t)a1;
+        goto do_brcond;
+    case INDEX_op_brcond_i64:
+        tci_opc = tci_cond8[a2];
+    do_brcond:
+        tcg_fmt_xy(s, tci_opc, a0, a1, const_args[0], c1);
+        tcg_out_br(s, TCI_bc, args[3]);
         break;
-    case INDEX_op_div2_i64:     /* Optional (TCG_TARGET_HAS_div2_i64). */
-    case INDEX_op_divu2_i64:    /* Optional (TCG_TARGET_HAS_div2_i64). */
-        TODO();
+
+    case INDEX_op_movcond_i32:
+        tci_opc = tci_cond4[args[5]];
+        tcg_fmt_xy(s, tci_opc, (int32_t)a1, (int32_t)a2, c1, c2);
+        tcg_fmt_rxy(s, TCI_movc, a0, (int32_t)args[3], (int32_t)args[4],
+                    const_args[3], const_args[4]);
         break;
-    case INDEX_op_brcond_i64:
-        tcg_out_r(s, args[0]);
-        tcg_out_ri64(s, const_args[1], args[1]);
-        tcg_out8(s, args[2]);           /* condition */
-        tci_out_label(s, args[3]);
+    case INDEX_op_movcond_i64:
+        tci_opc = tci_cond8[args[5]];
+        tcg_fmt_xy(s, tci_opc, a1, a2, c1, c2);
+        tcg_fmt_rxy(s, TCI_movc, a0, args[3], args[4],
+                    const_args[3], const_args[4]);
         break;
-    case INDEX_op_bswap16_i64:  /* Optional (TCG_TARGET_HAS_bswap16_i64). */
-    case INDEX_op_bswap32_i64:  /* Optional (TCG_TARGET_HAS_bswap32_i64). */
-    case INDEX_op_bswap64_i64:  /* Optional (TCG_TARGET_HAS_bswap64_i64). */
-    case INDEX_op_not_i64:      /* Optional (TCG_TARGET_HAS_not_i64). */
-    case INDEX_op_neg_i64:      /* Optional (TCG_TARGET_HAS_neg_i64). */
-    case INDEX_op_ext8s_i64:    /* Optional (TCG_TARGET_HAS_ext8s_i64). */
-    case INDEX_op_ext8u_i64:    /* Optional (TCG_TARGET_HAS_ext8u_i64). */
-    case INDEX_op_ext16s_i64:   /* Optional (TCG_TARGET_HAS_ext16s_i64). */
-    case INDEX_op_ext16u_i64:   /* Optional (TCG_TARGET_HAS_ext16u_i64). */
-    case INDEX_op_ext32s_i64:   /* Optional (TCG_TARGET_HAS_ext32s_i64). */
-    case INDEX_op_ext32u_i64:   /* Optional (TCG_TARGET_HAS_ext32u_i64). */
-#endif /* TCG_TARGET_REG_BITS == 64 */
-    case INDEX_op_neg_i32:      /* Optional (TCG_TARGET_HAS_neg_i32). */
-    case INDEX_op_not_i32:      /* Optional (TCG_TARGET_HAS_not_i32). */
-    case INDEX_op_ext8s_i32:    /* Optional (TCG_TARGET_HAS_ext8s_i32). */
-    case INDEX_op_ext16s_i32:   /* Optional (TCG_TARGET_HAS_ext16s_i32). */
-    case INDEX_op_ext8u_i32:    /* Optional (TCG_TARGET_HAS_ext8u_i32). */
-    case INDEX_op_ext16u_i32:   /* Optional (TCG_TARGET_HAS_ext16u_i32). */
-    case INDEX_op_bswap16_i32:  /* Optional (TCG_TARGET_HAS_bswap16_i32). */
-    case INDEX_op_bswap32_i32:  /* Optional (TCG_TARGET_HAS_bswap32_i32). */
-        tcg_out_r(s, args[0]);
-        tcg_out_r(s, args[1]);
+
+    case INDEX_op_qemu_ld_i64:
+        if (TCG_TARGET_REG_BITS == 32) {
+            tci_opc = tci_qemu_ld[args[3]];
+            tcg_fmt_rwxy(s, tci_opc, a0, a1, a2, args[4], c2, 1);
+            break;
+        }
+        /* fall through */
+    case INDEX_op_qemu_ld_i32:
+        tci_opc = tci_qemu_ld[a2];
+        tcg_fmt_rwxy(s, tci_opc, a0, 0, a1, args[3], c1, 1);
         break;
-    case INDEX_op_div_i32:      /* Optional (TCG_TARGET_HAS_div_i32). */
-    case INDEX_op_divu_i32:     /* Optional (TCG_TARGET_HAS_div_i32). */
-    case INDEX_op_rem_i32:      /* Optional (TCG_TARGET_HAS_div_i32). */
-    case INDEX_op_remu_i32:     /* Optional (TCG_TARGET_HAS_div_i32). */
-        tcg_out_r(s, args[0]);
-        tcg_out_ri32(s, const_args[1], args[1]);
-        tcg_out_ri32(s, const_args[2], args[2]);
+
+    case INDEX_op_qemu_st_i64:
+        if (TCG_TARGET_REG_BITS == 32) {
+            tci_opc = tci_qemu_st[args[3]];
+            tcg_fmt_rwxy(s, tci_opc, a1, a2, a0, args[4], const_args[0], 1);
+        } else {
+            tci_opc = tci_qemu_st[a2];
+            tcg_fmt_rwxy(s, tci_opc, 0, a1, a0, args[3], const_args[0], 1);
+        }
         break;
-    case INDEX_op_div2_i32:     /* Optional (TCG_TARGET_HAS_div2_i32). */
-    case INDEX_op_divu2_i32:    /* Optional (TCG_TARGET_HAS_div2_i32). */
-        TODO();
+    case INDEX_op_qemu_st_i32:
+        tci_opc = tci_qemu_st[a2];
+        tcg_fmt_rwxy(s, tci_opc, 0, a1, (int32_t)a0, args[3], const_args[0], 
1);
         break;
-#if TCG_TARGET_REG_BITS == 32
+
     case INDEX_op_add2_i32:
     case INDEX_op_sub2_i32:
-        tcg_out_r(s, args[0]);
-        tcg_out_r(s, args[1]);
-        tcg_out_r(s, args[2]);
-        tcg_out_r(s, args[3]);
-        tcg_out_r(s, args[4]);
-        tcg_out_r(s, args[5]);
+        tcg_fmt_rwxy(s, tci_opc, a0, a1, (int32_t)args[5], (int32_t)args[4],
+                     const_args[5], const_args[4]);
         break;
-    case INDEX_op_brcond2_i32:
-        tcg_out_r(s, args[0]);
-        tcg_out_r(s, args[1]);
-        tcg_out_ri32(s, const_args[2], args[2]);
-        tcg_out_ri32(s, const_args[3], args[3]);
-        tcg_out8(s, args[4]);           /* condition */
-        tci_out_label(s, args[5]);
+    case INDEX_op_add2_i64:
+    case INDEX_op_sub2_i64:
+        tcg_fmt_rwxy(s, tci_opc, a0, a1, args[5], args[4],
+                     const_args[5], const_args[4]);
         break;
+
     case INDEX_op_mulu2_i32:
-        tcg_out_r(s, args[0]);
-        tcg_out_r(s, args[1]);
-        tcg_out_r(s, args[2]);
-        tcg_out_r(s, args[3]);
-        break;
-#endif
-    case INDEX_op_brcond_i32:
-        tcg_out_r(s, args[0]);
-        tcg_out_ri32(s, const_args[1], args[1]);
-        tcg_out8(s, args[2]);           /* condition */
-        tci_out_label(s, args[3]);
+    case INDEX_op_muls2_i32:
+        tcg_fmt_rwxy(s, tci_opc, a0, a1, (int32_t)a2, (int32_t)args[3],
+                     c2, const_args[3]);
         break;
-    case INDEX_op_qemu_ld8u:
-    case INDEX_op_qemu_ld8s:
-    case INDEX_op_qemu_ld16u:
-    case INDEX_op_qemu_ld16s:
-    case INDEX_op_qemu_ld32:
-#if TCG_TARGET_REG_BITS == 64
-    case INDEX_op_qemu_ld32s:
-    case INDEX_op_qemu_ld32u:
-#endif
-        tcg_out_r(s, *args++);
-        tcg_out_r(s, *args++);
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-        tcg_out_r(s, *args++);
-#endif
-#ifdef CONFIG_SOFTMMU
-        tcg_out_i(s, *args);
-#endif
+    case INDEX_op_mulu2_i64:
+    case INDEX_op_muls2_i64:
+        tcg_fmt_rwxy(s, tci_opc, a0, a1, a2, args[3], c2, const_args[3]);
         break;
-    case INDEX_op_qemu_ld64:
-        tcg_out_r(s, *args++);
-#if TCG_TARGET_REG_BITS == 32
-        tcg_out_r(s, *args++);
-#endif
-        tcg_out_r(s, *args++);
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-        tcg_out_r(s, *args++);
-#endif
-#ifdef CONFIG_SOFTMMU
-        tcg_out_i(s, *args);
-#endif
+
+    case INDEX_op_setcond2_i32:
+        tci_opc = tci_condp[args[5]];
+        tcg_fmt_rwxy(s, tci_opc, a1, a2, (int32_t)args[4], (int32_t)args[3],
+                     const_args[4], const_args[3]);
+        tcg_fmt_r(s, TCI_setc, a0);
         break;
-    case INDEX_op_qemu_st8:
-    case INDEX_op_qemu_st16:
-    case INDEX_op_qemu_st32:
-        tcg_out_r(s, *args++);
-        tcg_out_r(s, *args++);
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-        tcg_out_r(s, *args++);
-#endif
-#ifdef CONFIG_SOFTMMU
-        tcg_out_i(s, *args);
-#endif
+    case INDEX_op_brcond2_i32:
+        tci_opc = tci_condp[args[4]];
+        tcg_fmt_rwxy(s, tci_opc, a0, a1, args[3], a2, const_args[3], c2);
+        tcg_out_br(s, TCI_bc, args[5]);
         break;
-    case INDEX_op_qemu_st64:
-        tcg_out_r(s, *args++);
-#if TCG_TARGET_REG_BITS == 32
-        tcg_out_r(s, *args++);
-#endif
-        tcg_out_r(s, *args++);
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-        tcg_out_r(s, *args++);
-#endif
-#ifdef CONFIG_SOFTMMU
-        tcg_out_i(s, *args);
-#endif
+
+    default:
+        if ((tcg_op_defs[tcg_opc].flags & TCG_OPF_64BIT) == 0) {
+            a1 = (int32_t)a1;
+            a2 = (int32_t)a2;
+        }
+        assert(tci_opc != TCI_invalid);
+        if (tci_opc <= TCI_LAST_BINARY_OPC) {
+            tcg_fmt_rxy(s, tci_opc, a0, a1, a2, c1, c2);
+        } else {
+            assert(tci_opc <= TCI_LAST_UNARY_OPC);
+            tcg_fmt_rxy(s, tci_opc, a0, 0, a1, 1, c1);
+        }
         break;
+
     case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
     case INDEX_op_mov_i64:
     case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi.  */
     case INDEX_op_movi_i64:
     case INDEX_op_call:     /* Always emitted via tcg_out_call.  */
-    default:
         tcg_abort();
     }
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
 }
 
-static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
-                       intptr_t arg2)
+/* Generate global QEMU prologue and epilogue code. */
+static inline void tcg_target_qemu_prologue(TCGContext *s)
 {
-    uint8_t *old_code_ptr = s->code_ptr;
-    if (type == TCG_TYPE_I32) {
-        tcg_out_op_t(s, INDEX_op_st_i32);
-        tcg_out_r(s, arg);
-        tcg_out_r(s, arg1);
-        tcg_out32(s, arg2);
-    } else {
-        assert(type == TCG_TYPE_I64);
+}
+
+static const TCGTargetOpDef tcg_target_op_defs[] = {
+    { INDEX_op_exit_tb,     { } },
+    { INDEX_op_goto_tb,     { } },
+    { INDEX_op_br,          { } },
+
+    { INDEX_op_ld8u_i32,    { "r", "r" } },
+    { INDEX_op_ld8s_i32,    { "r", "r" } },
+    { INDEX_op_ld16u_i32,   { "r", "r" } },
+    { INDEX_op_ld16s_i32,   { "r", "r" } },
+    { INDEX_op_ld_i32,      { "r", "r" } },
+
+    { INDEX_op_st8_i32,     { "ri", "r" } },
+    { INDEX_op_st16_i32,    { "ri", "r" } },
+    { INDEX_op_st_i32,      { "ri", "r" } },
+
+    { INDEX_op_add_i32,     { "r", "ri", "ri" } },
+    { INDEX_op_sub_i32,     { "r", "ri", "ri" } },
+    { INDEX_op_mul_i32,     { "r", "ri", "ri" } },
+    { INDEX_op_div_i32,     { "r", "ri", "ri" } },
+    { INDEX_op_rem_i32,     { "r", "ri", "ri" } },
+    { INDEX_op_divu_i32,    { "r", "ri", "ri" } },
+    { INDEX_op_remu_i32,    { "r", "ri", "ri" } },
+    { INDEX_op_and_i32,     { "r", "ri", "ri" } },
+    { INDEX_op_or_i32,      { "r", "ri", "ri" } },
+    { INDEX_op_xor_i32,     { "r", "ri", "ri" } },
+    { INDEX_op_andc_i32,    { "r", "ri", "ri" } },
+    { INDEX_op_orc_i32,     { "r", "ri", "ri" } },
+    { INDEX_op_eqv_i32,     { "r", "ri", "ri" } },
+    { INDEX_op_nand_i32,    { "r", "ri", "ri" } },
+    { INDEX_op_nor_i32,     { "r", "ri", "ri" } },
+    { INDEX_op_shl_i32,     { "r", "ri", "ri" } },
+    { INDEX_op_shr_i32,     { "r", "ri", "ri" } },
+    { INDEX_op_sar_i32,     { "r", "ri", "ri" } },
+    { INDEX_op_rotl_i32,    { "r", "ri", "ri" } },
+    { INDEX_op_rotr_i32,    { "r", "ri", "ri" } },
+
+    { INDEX_op_ext8s_i32,   { "r", "r" } },
+    { INDEX_op_ext8u_i32,   { "r", "r" } },
+    { INDEX_op_ext16s_i32,  { "r", "r" } },
+    { INDEX_op_ext16u_i32,  { "r", "r" } },
+    { INDEX_op_bswap16_i32, { "r", "r" } },
+    { INDEX_op_bswap32_i32, { "r", "r" } },
+
+    { INDEX_op_brcond_i32,  { "ri", "ri" } },
+    { INDEX_op_setcond_i32, { "r", "ri", "ri" } },
+    { INDEX_op_movcond_i32, { "r", "ri", "ri", "ri", "ri" } },
+
+    { INDEX_op_deposit_i32, { "r", "r", "ri" } },
+
+    { INDEX_op_qemu_ld_i32, { "r", "r" } },
+    { INDEX_op_qemu_st_i32, { "ri", "r" } },
+
 #if TCG_TARGET_REG_BITS == 64
-        tcg_out_op_t(s, INDEX_op_st_i64);
-        tcg_out_r(s, arg);
-        tcg_out_r(s, arg1);
-        tcg_out32(s, arg2);
+    { INDEX_op_ld8u_i64,    { "r", "r" } },
+    { INDEX_op_ld8s_i64,    { "r", "r" } },
+    { INDEX_op_ld16u_i64,   { "r", "r" } },
+    { INDEX_op_ld16s_i64,   { "r", "r" } },
+    { INDEX_op_ld32u_i64,   { "r", "r" } },
+    { INDEX_op_ld32s_i64,   { "r", "r" } },
+    { INDEX_op_ld_i64,      { "r", "r" } },
+
+    { INDEX_op_st8_i64,     { "re", "r" } },
+    { INDEX_op_st16_i64,    { "re", "r" } },
+    { INDEX_op_st32_i64,    { "re", "r" } },
+    { INDEX_op_st_i64,      { "re", "r" } },
+
+    { INDEX_op_add_i64,     { "r", "re", "re" } },
+    { INDEX_op_sub_i64,     { "r", "re", "re" } },
+    { INDEX_op_mul_i64,     { "r", "re", "re" } },
+    { INDEX_op_div_i64,     { "r", "re", "re" } },
+    { INDEX_op_rem_i64,     { "r", "re", "re" } },
+    { INDEX_op_divu_i64,    { "r", "re", "re" } },
+    { INDEX_op_remu_i64,    { "r", "re", "re" } },
+    { INDEX_op_and_i64,     { "r", "re", "re" } },
+    { INDEX_op_or_i64,      { "r", "re", "re" } },
+    { INDEX_op_xor_i64,     { "r", "re", "re" } },
+    { INDEX_op_andc_i64,    { "r", "re", "re" } },
+    { INDEX_op_orc_i64,     { "r", "re", "re" } },
+    { INDEX_op_eqv_i64,     { "r", "re", "re" } },
+    { INDEX_op_nand_i64,    { "r", "re", "re" } },
+    { INDEX_op_nor_i64,     { "r", "re", "re" } },
+    { INDEX_op_shl_i64,     { "r", "re", "re" } },
+    { INDEX_op_shr_i64,     { "r", "re", "re" } },
+    { INDEX_op_sar_i64,     { "r", "re", "re" } },
+    { INDEX_op_rotl_i64,    { "r", "re", "re" } },
+    { INDEX_op_rotr_i64,    { "r", "re", "re" } },
+
+    { INDEX_op_ext8s_i64,   { "r", "r" } },
+    { INDEX_op_ext8u_i64,   { "r", "r" } },
+    { INDEX_op_ext16s_i64,  { "r", "r" } },
+    { INDEX_op_ext16u_i64,  { "r", "r" } },
+    { INDEX_op_ext32s_i64,  { "r", "r" } },
+    { INDEX_op_ext32u_i64,  { "r", "r" } },
+    { INDEX_op_bswap16_i64, { "r", "r" } },
+    { INDEX_op_bswap32_i64, { "r", "r" } },
+    { INDEX_op_bswap64_i64, { "r", "r" } },
+
+    { INDEX_op_brcond_i64,  { "re", "re" } },
+    { INDEX_op_setcond_i64, { "r", "re", "re" } },
+    { INDEX_op_movcond_i64, { "r", "re", "re", "re", "re" } },
+
+    { INDEX_op_deposit_i64, { "r", "r", "re" } },
+
+    { INDEX_op_add2_i64,    { "r", "r", "0", "1", "re", "re" } },
+    { INDEX_op_sub2_i64,    { "r", "r", "0", "1", "re", "re" } },
+    { INDEX_op_mulu2_i64,   { "r", "r", "re", "re" } },
+    { INDEX_op_muls2_i64,   { "r", "r", "re", "re" } },
+
+    { INDEX_op_qemu_ld_i64, { "r", "re" } },
+    { INDEX_op_qemu_st_i64, { "re", "r" } },
 #else
-        TODO();
-#endif
-    }
-    old_code_ptr[1] = s->code_ptr - old_code_ptr;
-}
+    { INDEX_op_add2_i32,    { "r", "r", "0", "1", "re", "re" } },
+    { INDEX_op_sub2_i32,    { "r", "r", "0", "1", "re", "re" } },
+    { INDEX_op_mulu2_i32,   { "r", "r", "re", "re" } },
+    { INDEX_op_muls2_i32,   { "r", "r", "re", "re" } },
+    { INDEX_op_brcond2_i32, { "r", "r", "re", "re" } },
+    { INDEX_op_setcond2_i32, { "r", "r", "r", "re", "re" } },
 
-/* Test if a constant matches the constraint. */
-static int tcg_target_const_match(tcg_target_long val, TCGType type,
-                                  const TCGArgConstraint *arg_ct)
-{
-    /* No need to return 0 or 1, 0 or != 0 is good enough. */
-    return arg_ct->ct & TCG_CT_CONST;
-}
+    { INDEX_op_qemu_ld_i64, { "r", "r", "re" } },
+    { INDEX_op_qemu_st_i64, { "re", "r", "r" } },
+#endif
+    { -1 },
+};
 
 static void tcg_target_init(TCGContext *s)
 {
-#if defined(CONFIG_DEBUG_TCG_INTERPRETER)
-    const char *envval = getenv("DEBUG_TCG");
-    if (envval) {
-        qemu_set_log(strtol(envval, NULL, 0));
+    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xff);
+    if (TCG_TARGET_REG_BITS == 64) {
+        tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xff);
     }
-#endif
-
-    /* The current code uses uint8_t for tcg operations. */
-    assert(ARRAY_SIZE(tcg_op_defs) <= UINT8_MAX);
 
-    /* Registers available for 32 bit operations. */
-    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0,
-                     BIT(TCG_TARGET_NB_REGS) - 1);
-    /* Registers available for 64 bit operations. */
-    tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0,
-                     BIT(TCG_TARGET_NB_REGS) - 1);
-    /* TODO: Which registers should be set here? */
-    tcg_regset_set32(tcg_target_call_clobber_regs, 0,
-                     BIT(TCG_TARGET_NB_REGS) - 1);
+    tcg_regset_clear(tcg_target_call_clobber_regs);
+    tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_A);
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_B);
+    }
 
     tcg_regset_clear(s->reserved_regs);
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
-    tcg_add_target_add_op_defs(tcg_target_op_defs);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
 
-    /* We use negative offsets from "sp" so that we can distinguish
-       stores that might pretend to be call arguments.  */
-    tcg_set_frame(s, TCG_REG_CALL_STACK,
-                  -CPU_TEMP_BUF_NLONGS * sizeof(long),
-                  CPU_TEMP_BUF_NLONGS * sizeof(long));
-}
+    tcg_add_target_add_op_defs(tcg_target_op_defs);
 
-/* Generate global QEMU prologue and epilogue code. */
-static inline void tcg_target_qemu_prologue(TCGContext *s)
-{
+    tcg_set_frame(s, TCG_REG_SP, 0, TCG_STATIC_CALL_ARGS_SIZE);
 }
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 0be5acd..6cd66ba 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -37,145 +37,109 @@
  * Therefore, we need both 32 and 64 bit virtual machines (interpreter).
  */
 
-#if !defined(TCG_TARGET_H)
+#ifndef TCG_TARGET_H
 #define TCG_TARGET_H
 
 #include "config-host.h"
 
 #define TCG_TARGET_INTERPRETER 1
-#define TCG_TARGET_INSN_UNIT_SIZE 1
+#define TCG_TARGET_INSN_UNIT_SIZE 4
 
-#if UINTPTR_MAX == UINT32_MAX
-# define TCG_TARGET_REG_BITS 32
+/* We want to produce the fewest number of opcodes.  That means if we have
+   a 64-bit target, we want to produce a 64-bit interpreter.  */
+#if TARGET_LONG_BITS == 64
+# define TCG_TARGET_REG_BITS 64
 #elif UINTPTR_MAX == UINT64_MAX
 # define TCG_TARGET_REG_BITS 64
 #else
-# error Unknown pointer size for tci target
-#endif
-
-#ifdef CONFIG_DEBUG_TCG
-/* Enable debug output. */
-#define CONFIG_DEBUG_TCG_INTERPRETER
+# define TCG_TARGET_REG_BITS 32
 #endif
 
 /* Optional instructions. */
 
-#define TCG_TARGET_HAS_bswap16_i32      1
-#define TCG_TARGET_HAS_bswap32_i32      1
 #define TCG_TARGET_HAS_div_i32          1
 #define TCG_TARGET_HAS_rem_i32          1
 #define TCG_TARGET_HAS_ext8s_i32        1
-#define TCG_TARGET_HAS_ext16s_i32       1
 #define TCG_TARGET_HAS_ext8u_i32        1
+#define TCG_TARGET_HAS_ext16s_i32       1
 #define TCG_TARGET_HAS_ext16u_i32       1
-#define TCG_TARGET_HAS_andc_i32         0
+#define TCG_TARGET_HAS_bswap16_i32      1
+#define TCG_TARGET_HAS_bswap32_i32      1
 #define TCG_TARGET_HAS_deposit_i32      1
-#define TCG_TARGET_HAS_eqv_i32          0
-#define TCG_TARGET_HAS_nand_i32         0
-#define TCG_TARGET_HAS_nor_i32          0
-#define TCG_TARGET_HAS_neg_i32          1
-#define TCG_TARGET_HAS_not_i32          1
-#define TCG_TARGET_HAS_orc_i32          0
+#define TCG_TARGET_HAS_andc_i32         1
+#define TCG_TARGET_HAS_orc_i32          1
+#define TCG_TARGET_HAS_eqv_i32          1
+#define TCG_TARGET_HAS_nand_i32         1
+#define TCG_TARGET_HAS_nor_i32          1
+#define TCG_TARGET_HAS_neg_i32          0
+#define TCG_TARGET_HAS_not_i32          0
 #define TCG_TARGET_HAS_rot_i32          1
-#define TCG_TARGET_HAS_movcond_i32      0
-#define TCG_TARGET_HAS_muls2_i32        0
-#define TCG_TARGET_HAS_muluh_i32        0
-#define TCG_TARGET_HAS_mulsh_i32        0
+#define TCG_TARGET_HAS_movcond_i32      1
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_trunc_shr_i32    0
-#define TCG_TARGET_HAS_bswap16_i64      1
-#define TCG_TARGET_HAS_bswap32_i64      1
-#define TCG_TARGET_HAS_bswap64_i64      1
-#define TCG_TARGET_HAS_deposit_i64      1
-#define TCG_TARGET_HAS_div_i64          0
-#define TCG_TARGET_HAS_rem_i64          0
+#define TCG_TARGET_HAS_div_i64          1
+#define TCG_TARGET_HAS_rem_i64          1
 #define TCG_TARGET_HAS_ext8s_i64        1
-#define TCG_TARGET_HAS_ext16s_i64       1
-#define TCG_TARGET_HAS_ext32s_i64       1
 #define TCG_TARGET_HAS_ext8u_i64        1
+#define TCG_TARGET_HAS_ext16s_i64       1
 #define TCG_TARGET_HAS_ext16u_i64       1
+#define TCG_TARGET_HAS_ext32s_i64       1
 #define TCG_TARGET_HAS_ext32u_i64       1
-#define TCG_TARGET_HAS_andc_i64         0
-#define TCG_TARGET_HAS_eqv_i64          0
-#define TCG_TARGET_HAS_nand_i64         0
-#define TCG_TARGET_HAS_nor_i64          0
-#define TCG_TARGET_HAS_neg_i64          1
-#define TCG_TARGET_HAS_not_i64          1
-#define TCG_TARGET_HAS_orc_i64          0
+#define TCG_TARGET_HAS_bswap16_i64      1
+#define TCG_TARGET_HAS_bswap32_i64      1
+#define TCG_TARGET_HAS_bswap64_i64      1
+#define TCG_TARGET_HAS_deposit_i64      1
+#define TCG_TARGET_HAS_andc_i64         1
+#define TCG_TARGET_HAS_eqv_i64          1
+#define TCG_TARGET_HAS_nand_i64         1
+#define TCG_TARGET_HAS_nor_i64          1
+#define TCG_TARGET_HAS_neg_i64          0
+#define TCG_TARGET_HAS_not_i64          0
+#define TCG_TARGET_HAS_orc_i64          1
 #define TCG_TARGET_HAS_rot_i64          1
-#define TCG_TARGET_HAS_movcond_i64      0
-#define TCG_TARGET_HAS_muls2_i64        0
+#define TCG_TARGET_HAS_movcond_i64      1
 #define TCG_TARGET_HAS_add2_i32         0
 #define TCG_TARGET_HAS_sub2_i32         0
 #define TCG_TARGET_HAS_mulu2_i32        0
-#define TCG_TARGET_HAS_add2_i64         0
-#define TCG_TARGET_HAS_sub2_i64         0
-#define TCG_TARGET_HAS_mulu2_i64        0
+#define TCG_TARGET_HAS_muls2_i32        0
+#define TCG_TARGET_HAS_muluh_i32        0
+#define TCG_TARGET_HAS_mulsh_i32        0
+#define TCG_TARGET_HAS_add2_i64         1
+#define TCG_TARGET_HAS_sub2_i64         1
+#define TCG_TARGET_HAS_mulu2_i64        1
+#define TCG_TARGET_HAS_muls2_i64        1
 #define TCG_TARGET_HAS_muluh_i64        0
 #define TCG_TARGET_HAS_mulsh_i64        0
 #else
-#define TCG_TARGET_HAS_mulu2_i32        1
+#define TCG_TARGET_HAS_muls2_i32        1
+#define TCG_TARGET_HAS_muluh_i32        0
+#define TCG_TARGET_HAS_mulsh_i32        0
 #endif /* TCG_TARGET_REG_BITS == 64 */
 
-#define TCG_TARGET_HAS_new_ldst         0
+#define TCG_TARGET_HAS_new_ldst         1
 
-/* Number of registers available.
-   For 32 bit hosts, we need more than 8 registers (call arguments). */
-/* #define TCG_TARGET_NB_REGS 8 */
-#define TCG_TARGET_NB_REGS 16
-/* #define TCG_TARGET_NB_REGS 32 */
+#define TCG_TARGET_NB_REGS  8
 
-/* List of registers which are used by TCG. */
 typedef enum {
-    TCG_REG_R0 = 0,
-    TCG_REG_R1,
-    TCG_REG_R2,
-    TCG_REG_R3,
-    TCG_REG_R4,
-    TCG_REG_R5,
-    TCG_REG_R6,
-    TCG_REG_R7,
-#if TCG_TARGET_NB_REGS >= 16
-    TCG_REG_R8,
-    TCG_REG_R9,
-    TCG_REG_R10,
-    TCG_REG_R11,
-    TCG_REG_R12,
-    TCG_REG_R13,
-    TCG_REG_R14,
-    TCG_REG_R15,
-#if TCG_TARGET_NB_REGS >= 32
-    TCG_REG_R16,
-    TCG_REG_R17,
-    TCG_REG_R18,
-    TCG_REG_R19,
-    TCG_REG_R20,
-    TCG_REG_R21,
-    TCG_REG_R22,
-    TCG_REG_R23,
-    TCG_REG_R24,
-    TCG_REG_R25,
-    TCG_REG_R26,
-    TCG_REG_R27,
-    TCG_REG_R28,
-    TCG_REG_R29,
-    TCG_REG_R30,
-    TCG_REG_R31,
-#endif
-#endif
-    /* Special value UINT8_MAX is used by TCI to encode constant values. */
-    TCG_CONST = UINT8_MAX
+    TCG_REG_A,
+    TCG_REG_B,
+    TCG_REG_C,
+    TCG_REG_D,
+    TCG_REG_E,
+    TCG_REG_F,
+
+    TCG_REG_VP,
+    TCG_REG_SP,
+
+    TCG_AREG0 = TCG_REG_VP,
+    TCG_REG_CALL_STACK = TCG_REG_SP,
 } TCGReg;
 
-#define TCG_AREG0                       (TCG_TARGET_NB_REGS - 2)
-
 /* Used for function call generation. */
-#define TCG_REG_CALL_STACK              (TCG_TARGET_NB_REGS - 1)
 #define TCG_TARGET_CALL_STACK_OFFSET    0
-#define TCG_TARGET_STACK_ALIGN          16
-
-void tci_disas(uint8_t opc);
+#define TCG_TARGET_STACK_ALIGN          8
+#define TCG_TARGET_CALL_ALIGN_ARGS      1
 
 uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr);
 #define tcg_qemu_tb_exec tcg_qemu_tb_exec
diff --git a/tci.c b/tci.c
index 6523ab8..8c0fee4 100644
--- a/tci.c
+++ b/tci.c
@@ -26,1212 +26,614 @@
 
 #include "qemu-common.h"
 #include "exec/exec-all.h"           /* MAX_OPC_PARAM_IARGS */
-#include "tcg-op.h"
+#include "tcg.h"
+#include "qemu/tci.h"
+#include <ffi.h>
 
-/* Marker for missing code. */
-#define TODO() \
-    do { \
-        fprintf(stderr, "TODO %s:%u: %s()\n", \
-                __FILE__, __LINE__, __func__); \
-        tcg_abort(); \
-    } while (0)
 
-#if MAX_OPC_PARAM_IARGS != 5
-# error Fix needed, number of supported input arguments changed!
-#endif
-#if TCG_TARGET_REG_BITS == 32
-typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong,
-                                    tcg_target_ulong, tcg_target_ulong,
-                                    tcg_target_ulong, tcg_target_ulong,
-                                    tcg_target_ulong, tcg_target_ulong,
-                                    tcg_target_ulong, tcg_target_ulong);
-#else
-typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong,
-                                    tcg_target_ulong, tcg_target_ulong,
-                                    tcg_target_ulong);
-#endif
-
-/* Targets which don't use GETPC also don't need tci_tb_ptr
-   which makes them a little faster. */
-#if defined(GETPC)
 uintptr_t tci_tb_ptr;
-#endif
-
-static tcg_target_ulong tci_reg[TCG_TARGET_NB_REGS];
-
-static tcg_target_ulong tci_read_reg(TCGReg index)
-{
-    assert(index < ARRAY_SIZE(tci_reg));
-    return tci_reg[index];
-}
-
-#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64
-static int8_t tci_read_reg8s(TCGReg index)
-{
-    return (int8_t)tci_read_reg(index);
-}
-#endif
-
-#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64
-static int16_t tci_read_reg16s(TCGReg index)
-{
-    return (int16_t)tci_read_reg(index);
-}
-#endif
-
-#if TCG_TARGET_REG_BITS == 64
-static int32_t tci_read_reg32s(TCGReg index)
-{
-    return (int32_t)tci_read_reg(index);
-}
-#endif
-
-static uint8_t tci_read_reg8(TCGReg index)
-{
-    return (uint8_t)tci_read_reg(index);
-}
-
-static uint16_t tci_read_reg16(TCGReg index)
-{
-    return (uint16_t)tci_read_reg(index);
-}
-
-static uint32_t tci_read_reg32(TCGReg index)
-{
-    return (uint32_t)tci_read_reg(index);
-}
-
-#if TCG_TARGET_REG_BITS == 64
-static uint64_t tci_read_reg64(TCGReg index)
-{
-    return tci_read_reg(index);
-}
-#endif
-
-static void tci_write_reg(TCGReg index, tcg_target_ulong value)
-{
-    assert(index < ARRAY_SIZE(tci_reg));
-    assert(index != TCG_AREG0);
-    assert(index != TCG_REG_CALL_STACK);
-    tci_reg[index] = value;
-}
-
-static void tci_write_reg8s(TCGReg index, int8_t value)
-{
-    tci_write_reg(index, value);
-}
-
-static void tci_write_reg16s(TCGReg index, int16_t value)
-{
-    tci_write_reg(index, value);
-}
-
-#if TCG_TARGET_REG_BITS == 64
-static void tci_write_reg32s(TCGReg index, int32_t value)
-{
-    tci_write_reg(index, value);
-}
-#endif
-
-static void tci_write_reg8(TCGReg index, uint8_t value)
-{
-    tci_write_reg(index, value);
-}
-
-static void tci_write_reg16(TCGReg index, uint16_t value)
-{
-    tci_write_reg(index, value);
-}
-
-static void tci_write_reg32(TCGReg index, uint32_t value)
-{
-    tci_write_reg(index, value);
-}
-
-#if TCG_TARGET_REG_BITS == 32
-static void tci_write_reg64(uint32_t high_index, uint32_t low_index,
-                            uint64_t value)
-{
-    tci_write_reg(low_index, value);
-    tci_write_reg(high_index, value >> 32);
-}
-#elif TCG_TARGET_REG_BITS == 64
-static void tci_write_reg64(TCGReg index, uint64_t value)
-{
-    tci_write_reg(index, value);
-}
-#endif
-
-#if TCG_TARGET_REG_BITS == 32
-/* Create a 64 bit value from two 32 bit values. */
-static uint64_t tci_uint64(uint32_t high, uint32_t low)
-{
-    return ((uint64_t)high << 32) + low;
-}
-#endif
-
-/* Read constant (native size) from bytecode. */
-static tcg_target_ulong tci_read_i(uint8_t **tb_ptr)
-{
-    tcg_target_ulong value = *(tcg_target_ulong *)(*tb_ptr);
-    *tb_ptr += sizeof(value);
-    return value;
-}
-
-/* Read unsigned constant (32 bit) from bytecode. */
-static uint32_t tci_read_i32(uint8_t **tb_ptr)
-{
-    uint32_t value = *(uint32_t *)(*tb_ptr);
-    *tb_ptr += sizeof(value);
-    return value;
-}
-
-/* Read signed constant (32 bit) from bytecode. */
-static int32_t tci_read_s32(uint8_t **tb_ptr)
-{
-    int32_t value = *(int32_t *)(*tb_ptr);
-    *tb_ptr += sizeof(value);
-    return value;
-}
-
-#if TCG_TARGET_REG_BITS == 64
-/* Read constant (64 bit) from bytecode. */
-static uint64_t tci_read_i64(uint8_t **tb_ptr)
-{
-    uint64_t value = *(uint64_t *)(*tb_ptr);
-    *tb_ptr += sizeof(value);
-    return value;
-}
-#endif
-
-/* Read indexed register (native size) from bytecode. */
-static tcg_target_ulong tci_read_r(uint8_t **tb_ptr)
-{
-    tcg_target_ulong value = tci_read_reg(**tb_ptr);
-    *tb_ptr += 1;
-    return value;
-}
-
-/* Read indexed register (8 bit) from bytecode. */
-static uint8_t tci_read_r8(uint8_t **tb_ptr)
-{
-    uint8_t value = tci_read_reg8(**tb_ptr);
-    *tb_ptr += 1;
-    return value;
-}
-
-#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64
-/* Read indexed register (8 bit signed) from bytecode. */
-static int8_t tci_read_r8s(uint8_t **tb_ptr)
-{
-    int8_t value = tci_read_reg8s(**tb_ptr);
-    *tb_ptr += 1;
-    return value;
-}
-#endif
-
-/* Read indexed register (16 bit) from bytecode. */
-static uint16_t tci_read_r16(uint8_t **tb_ptr)
-{
-    uint16_t value = tci_read_reg16(**tb_ptr);
-    *tb_ptr += 1;
-    return value;
-}
 
-#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64
-/* Read indexed register (16 bit signed) from bytecode. */
-static int16_t tci_read_r16s(uint8_t **tb_ptr)
-{
-    int16_t value = tci_read_reg16s(**tb_ptr);
-    *tb_ptr += 1;
-    return value;
-}
-#endif
-
-/* Read indexed register (32 bit) from bytecode. */
-static uint32_t tci_read_r32(uint8_t **tb_ptr)
-{
-    uint32_t value = tci_read_reg32(**tb_ptr);
-    *tb_ptr += 1;
-    return value;
-}
-
-#if TCG_TARGET_REG_BITS == 32
-/* Read two indexed registers (2 * 32 bit) from bytecode. */
-static uint64_t tci_read_r64(uint8_t **tb_ptr)
-{
-    uint32_t low = tci_read_r32(tb_ptr);
-    return tci_uint64(tci_read_r32(tb_ptr), low);
-}
-#elif TCG_TARGET_REG_BITS == 64
-/* Read indexed register (32 bit signed) from bytecode. */
-static int32_t tci_read_r32s(uint8_t **tb_ptr)
-{
-    int32_t value = tci_read_reg32s(**tb_ptr);
-    *tb_ptr += 1;
-    return value;
-}
+#ifdef CONFIG_SOFTMMU
+# define qemu_ld(A, MMU, BIT, FAST, SLOW)                                    \
+    ({ target_ulong adr = A; uint##BIT##_t ret;                              \
+       target_ulong adr_mask = TARGET_PAGE_MASK | ((BIT / 8) - 1);           \
+       uintptr_t tlb_idx = (adr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);   \
+       const CPUTLBEntry *tlb = &env->tlb_table[MMU][tlb_idx];               \
+       if (likely((adr & adr_mask) == tlb->addr_read)) {                     \
+           ret = FAST((void *)((uintptr_t)adr + tlb->addend));               \
+       } else {                                                              \
+           ret = SLOW(env, adr, MMU, (uintptr_t)pc);                         \
+       }                                                                     \
+       ret; })
+
+# define qemu_ldub(A,M)    qemu_ld(A,M, 8, ldub_p, helper_ret_ldub_mmu)
+# define qemu_lduw_le(A,M) qemu_ld(A,M, 16, le16_to_cpup, helper_le_lduw_mmu)
+# define qemu_lduw_be(A,M) qemu_ld(A,M, 16, be16_to_cpup, helper_be_lduw_mmu)
+# define qemu_ldul_le(A,M) qemu_ld(A,M, 32, le32_to_cpup, helper_le_ldul_mmu)
+# define qemu_ldul_be(A,M) qemu_ld(A,M, 32, be32_to_cpup, helper_be_ldul_mmu)
+# define qemu_ldq_le(A,M)  qemu_ld(A,M, 64, le64_to_cpup, helper_le_ldq_mmu)
+# define qemu_ldq_be(A,M)  qemu_ld(A,M, 64, be64_to_cpup, helper_be_ldq_mmu)
+
+# define qemu_st(V, A, MMU, BIT, FAST, SLOW)                                 \
+    ({ target_ulong adr = A;                                                 \
+       target_ulong adr_mask = TARGET_PAGE_MASK | ((BIT / 8) - 1);           \
+       uintptr_t tlb_idx = (adr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);   \
+       const CPUTLBEntry *tlb = &env->tlb_table[MMU][tlb_idx];               \
+       if (likely((adr & adr_mask) == tlb->addr_write)) {                    \
+           FAST((void *)((uintptr_t)adr + tlb->addend), V);                  \
+       } else {                                                              \
+           SLOW(env, adr, V, MMU, (uintptr_t)pc);                            \
+       }                                                                     \
+       (void)0; })
+
+# define qemu_stb(V,A,M)    qemu_st(V,A,M,  8, stb_p, helper_ret_stb_mmu)
+# define qemu_stw_le(V,A,M) qemu_st(V,A,M, 16, cpu_to_le16w, helper_le_stw_mmu)
+# define qemu_stw_be(V,A,M) qemu_st(V,A,M, 16, cpu_to_be16w, helper_be_stw_mmu)
+# define qemu_stl_le(V,A,M) qemu_st(V,A,M, 32, cpu_to_le32w, helper_le_stl_mmu)
+# define qemu_stl_be(V,A,M) qemu_st(V,A,M, 32, cpu_to_be32w, helper_be_stl_mmu)
+# define qemu_stq_le(V,A,M) qemu_st(V,A,M, 64, cpu_to_le64w, helper_le_stq_mmu)
+# define qemu_stq_be(V,A,M) qemu_st(V,A,M, 64, cpu_to_be64w, helper_be_stq_mmu)
+#else
 
-/* Read indexed register (64 bit) from bytecode. */
-static uint64_t tci_read_r64(uint8_t **tb_ptr)
-{
-    uint64_t value = tci_read_reg64(**tb_ptr);
-    *tb_ptr += 1;
-    return value;
-}
+# define qemu_ldub(A,M)     ldub_p(g2h((target_ulong)A))
+# define qemu_lduw_le(A,M)  lduw_le_p(g2h((target_ulong)A))
+# define qemu_lduw_be(A,M)  lduw_be_p(g2h((target_ulong)A))
+# define qemu_ldul_le(A,M)  (uint32_t)ldl_le_p(g2h((target_ulong)A))
+# define qemu_ldul_be(A,M)  (uint32_t)ldl_be_p(g2h((target_ulong)A))
+# define qemu_ldq_le(A,M)   ldq_le_p(g2h((target_ulong)A))
+# define qemu_ldq_be(A,M)   ldq_be_p(g2h((target_ulong)A))
+# define qemu_stb(V,A,M)    stb_p(g2h((target_ulong)A), V)
+# define qemu_stw_le(V,A,M) stw_le_p(g2h((target_ulong)A), V)
+# define qemu_stw_be(V,A,M) stw_be_p(g2h((target_ulong)A), V)
+# define qemu_stl_le(V,A,M) stl_le_p(g2h((target_ulong)A), V)
+# define qemu_stl_be(V,A,M) stl_be_p(g2h((target_ulong)A), V)
+# define qemu_stq_le(V,A,M) stq_le_p(g2h((target_ulong)A), V)
+# define qemu_stq_be(V,A,M) stq_be_p(g2h((target_ulong)A), V)
 #endif
 
-/* Read indexed register(s) with target address from bytecode. */
-static target_ulong tci_read_ulong(uint8_t **tb_ptr)
+static inline uint64_t concat4(uint32_t x, uint32_t y)
 {
-    target_ulong taddr = tci_read_r(tb_ptr);
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-    taddr += (uint64_t)tci_read_r(tb_ptr) << 32;
-#endif
-    return taddr;
+    return ((uint64_t)x << 32) | y;
 }
 
-/* Read indexed register or constant (native size) from bytecode. */
-static tcg_target_ulong tci_read_ri(uint8_t **tb_ptr)
+static inline uintptr_t concatp(uint32_t x, uint32_t y)
 {
-    tcg_target_ulong value;
-    TCGReg r = **tb_ptr;
-    *tb_ptr += 1;
-    if (r == TCG_CONST) {
-        value = tci_read_i(tb_ptr);
-    } else {
-        value = tci_read_reg(r);
+    if (sizeof(void *) == 8) {
+        return concat4(x, y);
     }
-    return value;
+    return y;
 }
 
-/* Read indexed register or constant (32 bit) from bytecode. */
-static uint32_t tci_read_ri32(uint8_t **tb_ptr)
-{
-    uint32_t value;
-    TCGReg r = **tb_ptr;
-    *tb_ptr += 1;
-    if (r == TCG_CONST) {
-        value = tci_read_i32(tb_ptr);
+#define MAX_CALL_ARGS  (TCG_STATIC_CALL_ARGS_SIZE / sizeof(uint64_t))
+
+/* Interpret pseudo code in tb. */
+uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *pc8)
+{
+    uint64_t sp[MAX_CALL_ARGS + CPU_TEMP_BUF_NLONGS];
+    void *sp_slots[MAX_CALL_ARGS];
+    tcg_target_ulong regs[8];
+
+    int32_t *pc = (int32_t *)pc8;
+    tcg_target_ulong r, w, x, y;
+    intptr_t ri, wi, xi, yi;
+    bool cmp = false;
+    void *ptr;
+    uint64_t t64;
+    uint32_t insn;
+    TCIOp opc;
+
+    sp_slots[0] = NULL;
+    regs[6] = (uintptr_t)env;
+    regs[7] = (uintptr_t)sp;
+    goto next;
+
+ output_rw:
+    assert(wi < 6);
+    regs[wi] = w;
+ output_r:
+    assert(ri < 6);
+    regs[ri] = r;
+ next:
+    insn = *pc++;
+
+    opc = extract32(insn, POS_OP, LEN_OP);
+    ri  = extract32(insn, POS_R, LEN_R);
+    wi  = extract32(insn, POS_W, LEN_W);
+    xi  = extract32(insn, POS_X, LEN_X);
+    yi  = extract32(insn, POS_Y, LEN_Y);
+
+    w = regs[wi];
+
+    if (likely(xi < 8)) {
+        x = regs[xi];
+    } else if (xi == 8) {
+        x = *pc++;
     } else {
-        value = tci_read_reg32(r);
+        x = xi - BIAS_X;
     }
-    return value;
-}
 
-#if TCG_TARGET_REG_BITS == 32
-/* Read two indexed registers or constants (2 * 32 bit) from bytecode. */
-static uint64_t tci_read_ri64(uint8_t **tb_ptr)
-{
-    uint32_t low = tci_read_ri32(tb_ptr);
-    return tci_uint64(tci_read_ri32(tb_ptr), low);
-}
-#elif TCG_TARGET_REG_BITS == 64
-/* Read indexed register or constant (64 bit) from bytecode. */
-static uint64_t tci_read_ri64(uint8_t **tb_ptr)
-{
-    uint64_t value;
-    TCGReg r = **tb_ptr;
-    *tb_ptr += 1;
-    if (r == TCG_CONST) {
-        value = tci_read_i64(tb_ptr);
+    if (likely(yi < 8)) {
+        y = regs[yi];
+    } else if (yi == 8) {
+        y = *pc++;
     } else {
-        value = tci_read_reg64(r);
+        y = yi - BIAS_Y;
     }
-    return value;
-}
-#endif
 
-static tcg_target_ulong tci_read_label(uint8_t **tb_ptr)
-{
-    tcg_target_ulong label = tci_read_i(tb_ptr);
-    assert(label != 0);
-    return label;
-}
+    ptr = (void *)((uintptr_t)w + (uintptr_t)y);
+
+    switch (opc) {
+    /*
+     * Normal binary operations
+     */
+    case TCI_add:
+        r = x + y;
+        goto output_r;
+    case TCI_sub:
+        r = x - y;
+        goto output_r;
+    case TCI_mul:
+        r = x * y;
+        goto output_r;
+    case TCI_divu:
+        r = x / y;
+        goto output_r;
+    case TCI_remu:
+        r = x % y;
+        goto output_r;
+    case TCI_divs:
+        r = (tcg_target_long)x / (tcg_target_long)y;
+        goto output_r;
+    case TCI_rems:
+        r = (tcg_target_long)x % (tcg_target_long)y;
+        goto output_r;
+    case TCI_and:
+        r = x & y;
+        goto output_r;
+    case TCI_ior:
+        r = x | y;
+        goto output_r;
+    case TCI_xor:
+        r = x ^ y;
+        goto output_r;
+    case TCI_andc:
+        r = x & ~y;
+        goto output_r;
+    case TCI_iorc:
+        r = x | ~y;
+        goto output_r;
+    case TCI_xorc:
+        r = x ^ ~y;
+        goto output_r;
+    case TCI_nand:
+        r = ~(x & y);
+        goto output_r;
+    case TCI_nior:
+        r = ~(x | y);
+        goto output_r;
+    case TCI_shl:
+        r = x << (y & (TCG_TARGET_REG_BITS - 1));
+        goto output_r;
+    case TCI_shr4:
+        r = (uint32_t)x >> (y & 31);
+        goto output_r;
+    case TCI_sar4:
+        r = (int32_t)x >> (y & 31);
+        goto output_r;
+    case TCI_rol4:
+        r = rol32(x, y & 31);
+        goto output_r;
+    case TCI_ror4:
+        r = ror32(x, y & 31);
+        goto output_r;
+    case TCI_movc:
+        r = (cmp ? x : y);
+        goto output_r;
+#if TCG_TARGET_REG_BITS == 64
+    case TCI_shr8:
+        r = x >> (y & 63);
+        goto output_r;
+    case TCI_sar8:
+        r = (int64_t)x >> (y & 63);
+        goto output_r;
+    case TCI_rol8:
+        r = rol64(x, y & 63);
+        goto output_r;
+    case TCI_ror8:
+        r = ror64(x, y & 63);
+        goto output_r;
+    case TCI_concat4:
+        r = concat4(x, y);
+        goto output_r;
+#endif /* 64 */
+
+    /*
+     * Comparison operations
+     */
+    case TCI_cmp4eq:
+        cmp = ((int32_t)x == (int32_t)y);
+        goto next;
+    case TCI_cmp4ne:
+        cmp = ((int32_t)x != (int32_t)y);
+        goto next;
+    case TCI_cmp4lt:
+        cmp = ((int32_t)x < (int32_t)y);
+        goto next;
+    case TCI_cmp4le:
+        cmp = ((int32_t)x <= (int32_t)y);
+        goto next;
+    case TCI_cmp4gt:
+        cmp = ((int32_t)x > (int32_t)y);
+        goto next;
+    case TCI_cmp4ge:
+        cmp = ((int32_t)x >= (int32_t)y);
+        goto next;
+    case TCI_cmp4ltu:
+        cmp = ((uint32_t)x < (uint32_t)y);
+        goto next;
+    case TCI_cmp4leu:
+        cmp = ((uint32_t)x <= (uint32_t)y);
+        goto next;
+    case TCI_cmp4gtu:
+        cmp = ((uint32_t)x > (uint32_t)y);
+        goto next;
+    case TCI_cmp4geu:
+        cmp = ((uint32_t)x >= (uint32_t)y);
+        goto next;
+#if TCG_TARGET_REG_BITS == 64
+    case TCI_cmp8eq:
+        cmp = ((int64_t)x == (int64_t)y);
+        goto next;
+    case TCI_cmp8ne:
+        cmp = ((int64_t)x != (int64_t)y);
+        goto next;
+    case TCI_cmp8lt:
+        cmp = ((int64_t)x < (int64_t)y);
+        goto next;
+    case TCI_cmp8le:
+        cmp = ((int64_t)x <= (int64_t)y);
+        goto next;
+    case TCI_cmp8gt:
+        cmp = ((int64_t)x > (int64_t)y);
+        goto next;
+    case TCI_cmp8ge:
+        cmp = ((int64_t)x >= (int64_t)y);
+        goto next;
+    case TCI_cmp8ltu:
+        cmp = ((uint64_t)x < (uint64_t)y);
+        goto next;
+    case TCI_cmp8leu:
+        cmp = ((uint64_t)x <= (uint64_t)y);
+        goto next;
+    case TCI_cmp8gtu:
+        cmp = ((uint64_t)x > (uint64_t)y);
+        goto next;
+    case TCI_cmp8geu:
+        cmp = ((uint64_t)x >= (uint64_t)y);
+        goto next;
+#endif /* 64 */
+
+    /*
+     * Unary operations
+     */
+
+    case TCI_sxt1:
+        r = (int8_t)y;
+        goto output_r;
+    case TCI_sxt2:
+        r = (int16_t)y;
+        goto output_r;
+    case TCI_sxt4:
+        r = (int32_t)y;
+        goto output_r;
+    case TCI_zxt1:
+        r = (uint8_t)y;
+        goto output_r;
+    case TCI_zxt2:
+        r = (uint16_t)y;
+        goto output_r;
+    case TCI_zxt4:
+        r = (uint32_t)y;
+        goto output_r;
+    case TCI_bswap2:
+        r = bswap16(y);
+        goto output_r;
+    case TCI_bswap4:
+        r = bswap32(y);
+        goto output_r;
+    case TCI_bswap8:
+        r = bswap64(y);
+        goto output_r;
+
+    /*
+     * Zero-ary operation
+     */
+
+    case TCI_setc:
+        r = cmp;
+        goto output_r;
+
+    /*
+     * Trinary operation
+     */
+
+    case TCI_deposit:
+        {
+            int pos = y >> 6;
+            int len = y & 0x3f;
+            if (TCG_TARGET_REG_BITS == 32) {
+                r = deposit32(regs[wi], pos, len, x);
+            } else {
+                r = deposit64(regs[wi], pos, len, x);
+            }
+        }
+        goto output_r;
+
+    /*
+     * QEMU store operations
+     */
+
+    case TCI_qst1:
+        qemu_stb(x, w, y);
+        goto next;
+    case TCI_qst2_le:
+        qemu_stw_le(x, w, y);
+        goto next;
+    case TCI_qst2_be:
+        qemu_stw_be(x, w, y);
+        goto next;
+    case TCI_qst4_le:
+        qemu_stl_le(x, w, y);
+        goto next;
+    case TCI_qst4_be:
+        qemu_stl_be(x, w, y);
+        goto next;
+    case TCI_qst8_le:
+        if (TCG_TARGET_REG_BITS == 64) {
+            qemu_stq_le(x, w, y);
+        } else {
+            t64 = concat4(regs[ri], x);
+            qemu_stq_le(t64, w, y);
+        }
+        goto next;
+    case TCI_qst8_be:
+        r = regs[ri];
+        if (TCG_TARGET_REG_BITS == 64) {
+            qemu_stq_be(x, w, y);
+        } else {
+            t64 = concat4(regs[ri], x);
+            qemu_stq_be(t64, w, y);
+        }
+        goto next;
+
+    /*
+     * QEMU load operations
+     */
+
+    case TCI_qld1u:
+        r = qemu_ldub(x, y);
+        goto output_r;
+    case TCI_qld1s:
+        r = (int8_t)qemu_ldub(x, y);
+        goto output_r;
+    case TCI_qld2u_le:
+        r = qemu_lduw_le(x, y);
+        goto output_r;
+    case TCI_qld2u_be:
+        r = qemu_lduw_be(x, y);
+        goto output_r;
+    case TCI_qld2s_le:
+        r = (int16_t)qemu_lduw_le(x, y);
+        goto output_r;
+    case TCI_qld2s_be:
+        r = (int16_t)qemu_lduw_be(x, y);
+        goto output_r;
+    case TCI_qld4u_le:
+        r = qemu_ldul_le(x, y);
+        goto output_r;
+    case TCI_qld4u_be:
+        r = qemu_ldul_be(x, y);
+        goto output_r;
+    case TCI_qld4s_le:
+        r = (int32_t)qemu_ldul_le(x, y);
+        goto output_r;
+    case TCI_qld4s_be:
+        r = (int32_t)qemu_ldul_be(x, y);
+        goto output_r;
+    case TCI_qld8_le:
+        r = t64 = qemu_ldq_le(x, y);
+        if (TCG_TARGET_REG_BITS == 32) {
+            w = t64 >> 32;
+            goto output_rw;
+        }
+        goto output_r;
+    case TCI_qld8_be:
+        r = t64 = qemu_ldq_be(x, y);
+        if (TCG_TARGET_REG_BITS == 32) {
+            w = t64 >> 32;
+            goto output_rw;
+        }
+        goto output_r;
+
+    /*
+     * Normal stores - note that these must be naturally aligned
+     */
+
+    case TCI_st1:
+        *(uint8_t *)ptr = x;
+        goto next;
+    case TCI_st2:
+        *(uint16_t *)ptr = x;
+        goto next;
+    case TCI_st4:
+        *(uint32_t *)ptr = x;
+        goto next;
+#if TCG_TARGET_REG_BITS == 64
+    case TCI_st8:
+        *(uint64_t *)ptr = x;
+        goto next;
+#endif /* 64 */
+
+    /*
+     * Normal loads - note that these must be naturally aligned
+     */
+
+    case TCI_ld1u:
+        r = *(uint8_t *)ptr;
+        goto output_r;
+    case TCI_ld1s:
+        r = *(int8_t *)ptr;
+        goto output_r;
+    case TCI_ld2u:
+        r = *(uint16_t *)ptr;
+        goto output_r;
+    case TCI_ld2s:
+        r = *(int16_t *)ptr;
+        goto output_r;
+    case TCI_ld4u:
+        r = *(uint32_t *)ptr;
+        goto output_r;
+    case TCI_ld4s:
+        r = *(int32_t *)ptr;
+        goto output_r;
+#if TCG_TARGET_REG_BITS == 64
+    case TCI_ld8:
+        r = *(uint64_t *)ptr;
+        goto output_r;
+#endif /* 64 */
+
+    /*
+     * Control flow operations
+     */
+
+    case TCI_bc:
+        if (!cmp) {
+            goto next;
+        }
+        /* fall through */
+    case TCI_b:
+        pc += (ptrdiff_t)y;
+        goto next;
+
+    case TCI_exit:
+        return concatp(x, y);
+
+    case TCI_call0:
+    case TCI_call4:
+    case TCI_call8:
+    {
+        /* We're passed a pointer to the TCGHelperInfo, which contains
+           the function pointer followed by the ffi_cif pointer.  */
+        /* ??? Put the TCGHelperInfo struct somewhere it can be shared
+           between tcg.c and tci.c, but without pulling in <ffi.h> to
+           every user of tcg.h.  */
+        void **pptr = (void **)concatp(x, y);
+
+        /* Helper functions may need access to the "return address". */
+        tci_tb_ptr = (uintptr_t)pc;
+
+        /* Set up the ffi_avalue array once.  In tcg_gen_callN, we arranged
+           for every real argument to be "left-aligned" in each 64-bit slot.  
*/
+        if (sp_slots[0] == NULL) {
+            int i;
+            for (i = 0; i < MAX_CALL_ARGS; ++i) {
+                sp_slots[i] = &sp[i];
+            }
+        }
 
-static bool tci_compare32(uint32_t u0, uint32_t u1, TCGCond condition)
-{
-    bool result = false;
-    int32_t i0 = u0;
-    int32_t i1 = u1;
-    switch (condition) {
-    case TCG_COND_EQ:
-        result = (u0 == u1);
-        break;
-    case TCG_COND_NE:
-        result = (u0 != u1);
-        break;
-    case TCG_COND_LT:
-        result = (i0 < i1);
-        break;
-    case TCG_COND_GE:
-        result = (i0 >= i1);
-        break;
-    case TCG_COND_LE:
-        result = (i0 <= i1);
-        break;
-    case TCG_COND_GT:
-        result = (i0 > i1);
-        break;
-    case TCG_COND_LTU:
-        result = (u0 < u1);
-        break;
-    case TCG_COND_GEU:
-        result = (u0 >= u1);
-        break;
-    case TCG_COND_LEU:
-        result = (u0 <= u1);
-        break;
-    case TCG_COND_GTU:
-        result = (u0 > u1);
-        break;
-    default:
-        TODO();
-    }
-    return result;
-}
+        /* Call the helper function.  Any result winds up "left-aligned"
+           in the sp[0] slot.  */
+        ffi_call(pptr[1], pptr[0], &sp[0], sp_slots);
 
-static bool tci_compare64(uint64_t u0, uint64_t u1, TCGCond condition)
-{
-    bool result = false;
-    int64_t i0 = u0;
-    int64_t i1 = u1;
-    switch (condition) {
-    case TCG_COND_EQ:
-        result = (u0 == u1);
-        break;
-    case TCG_COND_NE:
-        result = (u0 != u1);
-        break;
-    case TCG_COND_LT:
-        result = (i0 < i1);
-        break;
-    case TCG_COND_GE:
-        result = (i0 >= i1);
-        break;
-    case TCG_COND_LE:
-        result = (i0 <= i1);
-        break;
-    case TCG_COND_GT:
-        result = (i0 > i1);
-        break;
-    case TCG_COND_LTU:
-        result = (u0 < u1);
-        break;
-    case TCG_COND_GEU:
-        result = (u0 >= u1);
-        break;
-    case TCG_COND_LEU:
-        result = (u0 <= u1);
-        break;
-    case TCG_COND_GTU:
-        result = (u0 > u1);
-        break;
-    default:
-        TODO();
+        if (opc == TCI_call8) {
+            r = t64 = sp[0];
+            if (TCG_TARGET_REG_BITS == 32) {
+                w = t64 >> 32;
+                goto output_rw;
+            }
+        } else {
+            r = *(uint32_t *)sp;
+        }
+        goto output_r;
     }
-    return result;
-}
-
-/* Interpret pseudo code in tb. */
-uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr)
-{
-    long tcg_temps[CPU_TEMP_BUF_NLONGS];
-    uintptr_t sp_value = (uintptr_t)(tcg_temps + CPU_TEMP_BUF_NLONGS);
-    uintptr_t next_tb = 0;
 
-    tci_reg[TCG_AREG0] = (tcg_target_ulong)env;
-    tci_reg[TCG_REG_CALL_STACK] = sp_value;
-    assert(tb_ptr);
-
-    for (;;) {
-        TCGOpcode opc = tb_ptr[0];
-#if !defined(NDEBUG)
-        uint8_t op_size = tb_ptr[1];
-        uint8_t *old_code_ptr = tb_ptr;
-#endif
-        tcg_target_ulong t0;
-        tcg_target_ulong t1;
-        tcg_target_ulong t2;
-        tcg_target_ulong label;
-        TCGCond condition;
-        target_ulong taddr;
-#ifndef CONFIG_SOFTMMU
-        tcg_target_ulong host_addr;
-#endif
-        uint8_t tmp8;
-        uint16_t tmp16;
-        uint32_t tmp32;
-        uint64_t tmp64;
-#if TCG_TARGET_REG_BITS == 32
-        uint64_t v64;
-#endif
-
-#if defined(GETPC)
-        tci_tb_ptr = (uintptr_t)tb_ptr;
-#endif
+    /*
+     * Widening multiply operations
+     */
+
+    case TCI_mulu2:
+        if (TCG_TARGET_REG_BITS == 32) {
+            r = t64 = (uint64_t)(uint32_t)x * (uint32_t)y;
+            w = t64 >> 32;
+        } else {
+            uint64_t l, h;
+            mulu64(&l, &h, x, y);
+            r = l, w = h;
+        }
+        goto output_rw;
+    case TCI_muls2:
+        if (TCG_TARGET_REG_BITS == 32) {
+            r = t64 = (uint64_t)(int32_t)x * (int32_t)y;
+            w = t64 >> 32;
+        } else {
+            uint64_t l, h;
+            muls64(&l, &h, x, y);
+            r = l, w = h;
+        }
+        goto output_rw;
 
-        /* Skip opcode and size entry. */
-        tb_ptr += 2;
+    /*
+     * 2-input double-word operations.  The two inputs are w:r and y:x.
+     */
+    default:
+        r = regs[ri];
 
         switch (opc) {
-        case INDEX_op_end:
-        case INDEX_op_nop:
-            break;
-        case INDEX_op_nop1:
-        case INDEX_op_nop2:
-        case INDEX_op_nop3:
-        case INDEX_op_nopn:
-        case INDEX_op_discard:
-            TODO();
-            break;
-        case INDEX_op_set_label:
-            TODO();
-            break;
-        case INDEX_op_call:
-            t0 = tci_read_ri(&tb_ptr);
-#if TCG_TARGET_REG_BITS == 32
-            tmp64 = ((helper_function)t0)(tci_read_reg(TCG_REG_R0),
-                                          tci_read_reg(TCG_REG_R1),
-                                          tci_read_reg(TCG_REG_R2),
-                                          tci_read_reg(TCG_REG_R3),
-                                          tci_read_reg(TCG_REG_R5),
-                                          tci_read_reg(TCG_REG_R6),
-                                          tci_read_reg(TCG_REG_R7),
-                                          tci_read_reg(TCG_REG_R8),
-                                          tci_read_reg(TCG_REG_R9),
-                                          tci_read_reg(TCG_REG_R10));
-            tci_write_reg(TCG_REG_R0, tmp64);
-            tci_write_reg(TCG_REG_R1, tmp64 >> 32);
-#else
-            tmp64 = ((helper_function)t0)(tci_read_reg(TCG_REG_R0),
-                                          tci_read_reg(TCG_REG_R1),
-                                          tci_read_reg(TCG_REG_R2),
-                                          tci_read_reg(TCG_REG_R3),
-                                          tci_read_reg(TCG_REG_R5));
-            tci_write_reg(TCG_REG_R0, tmp64);
-#endif
-            break;
-        case INDEX_op_br:
-            label = tci_read_label(&tb_ptr);
-            assert(tb_ptr == old_code_ptr + op_size);
-            tb_ptr = (uint8_t *)label;
-            continue;
-        case INDEX_op_setcond_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r32(&tb_ptr);
-            t2 = tci_read_ri32(&tb_ptr);
-            condition = *tb_ptr++;
-            tci_write_reg32(t0, tci_compare32(t1, t2, condition));
-            break;
-#if TCG_TARGET_REG_BITS == 32
-        case INDEX_op_setcond2_i32:
-            t0 = *tb_ptr++;
-            tmp64 = tci_read_r64(&tb_ptr);
-            v64 = tci_read_ri64(&tb_ptr);
-            condition = *tb_ptr++;
-            tci_write_reg32(t0, tci_compare64(tmp64, v64, condition));
-            break;
-#elif TCG_TARGET_REG_BITS == 64
-        case INDEX_op_setcond_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(&tb_ptr);
-            t2 = tci_read_ri64(&tb_ptr);
-            condition = *tb_ptr++;
-            tci_write_reg64(t0, tci_compare64(t1, t2, condition));
-            break;
-#endif
-        case INDEX_op_mov_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r32(&tb_ptr);
-            tci_write_reg32(t0, t1);
-            break;
-        case INDEX_op_movi_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_i32(&tb_ptr);
-            tci_write_reg32(t0, t1);
-            break;
-
-            /* Load/store operations (32 bit). */
-
-        case INDEX_op_ld8u_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_s32(&tb_ptr);
-            tci_write_reg8(t0, *(uint8_t *)(t1 + t2));
-            break;
-        case INDEX_op_ld8s_i32:
-        case INDEX_op_ld16u_i32:
-            TODO();
-            break;
-        case INDEX_op_ld16s_i32:
-            TODO();
-            break;
-        case INDEX_op_ld_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_s32(&tb_ptr);
-            tci_write_reg32(t0, *(uint32_t *)(t1 + t2));
-            break;
-        case INDEX_op_st8_i32:
-            t0 = tci_read_r8(&tb_ptr);
-            t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_s32(&tb_ptr);
-            *(uint8_t *)(t1 + t2) = t0;
-            break;
-        case INDEX_op_st16_i32:
-            t0 = tci_read_r16(&tb_ptr);
-            t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_s32(&tb_ptr);
-            *(uint16_t *)(t1 + t2) = t0;
-            break;
-        case INDEX_op_st_i32:
-            t0 = tci_read_r32(&tb_ptr);
-            t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_s32(&tb_ptr);
-            assert(t1 != sp_value || (int32_t)t2 < 0);
-            *(uint32_t *)(t1 + t2) = t0;
-            break;
-
-            /* Arithmetic operations (32 bit). */
-
-        case INDEX_op_add_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri32(&tb_ptr);
-            t2 = tci_read_ri32(&tb_ptr);
-            tci_write_reg32(t0, t1 + t2);
-            break;
-        case INDEX_op_sub_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri32(&tb_ptr);
-            t2 = tci_read_ri32(&tb_ptr);
-            tci_write_reg32(t0, t1 - t2);
-            break;
-        case INDEX_op_mul_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri32(&tb_ptr);
-            t2 = tci_read_ri32(&tb_ptr);
-            tci_write_reg32(t0, t1 * t2);
-            break;
-#if TCG_TARGET_HAS_div_i32
-        case INDEX_op_div_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri32(&tb_ptr);
-            t2 = tci_read_ri32(&tb_ptr);
-            tci_write_reg32(t0, (int32_t)t1 / (int32_t)t2);
-            break;
-        case INDEX_op_divu_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri32(&tb_ptr);
-            t2 = tci_read_ri32(&tb_ptr);
-            tci_write_reg32(t0, t1 / t2);
-            break;
-        case INDEX_op_rem_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri32(&tb_ptr);
-            t2 = tci_read_ri32(&tb_ptr);
-            tci_write_reg32(t0, (int32_t)t1 % (int32_t)t2);
-            break;
-        case INDEX_op_remu_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri32(&tb_ptr);
-            t2 = tci_read_ri32(&tb_ptr);
-            tci_write_reg32(t0, t1 % t2);
-            break;
-#elif TCG_TARGET_HAS_div2_i32
-        case INDEX_op_div2_i32:
-        case INDEX_op_divu2_i32:
-            TODO();
-            break;
-#endif
-        case INDEX_op_and_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri32(&tb_ptr);
-            t2 = tci_read_ri32(&tb_ptr);
-            tci_write_reg32(t0, t1 & t2);
-            break;
-        case INDEX_op_or_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri32(&tb_ptr);
-            t2 = tci_read_ri32(&tb_ptr);
-            tci_write_reg32(t0, t1 | t2);
-            break;
-        case INDEX_op_xor_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri32(&tb_ptr);
-            t2 = tci_read_ri32(&tb_ptr);
-            tci_write_reg32(t0, t1 ^ t2);
-            break;
-
-            /* Shift/rotate operations (32 bit). */
-
-        case INDEX_op_shl_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri32(&tb_ptr);
-            t2 = tci_read_ri32(&tb_ptr);
-            tci_write_reg32(t0, t1 << (t2 & 31));
-            break;
-        case INDEX_op_shr_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri32(&tb_ptr);
-            t2 = tci_read_ri32(&tb_ptr);
-            tci_write_reg32(t0, t1 >> (t2 & 31));
-            break;
-        case INDEX_op_sar_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri32(&tb_ptr);
-            t2 = tci_read_ri32(&tb_ptr);
-            tci_write_reg32(t0, ((int32_t)t1 >> (t2 & 31)));
-            break;
-#if TCG_TARGET_HAS_rot_i32
-        case INDEX_op_rotl_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri32(&tb_ptr);
-            t2 = tci_read_ri32(&tb_ptr);
-            tci_write_reg32(t0, rol32(t1, t2 & 31));
-            break;
-        case INDEX_op_rotr_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri32(&tb_ptr);
-            t2 = tci_read_ri32(&tb_ptr);
-            tci_write_reg32(t0, ror32(t1, t2 & 31));
-            break;
-#endif
-#if TCG_TARGET_HAS_deposit_i32
-        case INDEX_op_deposit_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r32(&tb_ptr);
-            t2 = tci_read_r32(&tb_ptr);
-            tmp16 = *tb_ptr++;
-            tmp8 = *tb_ptr++;
-            tmp32 = (((1 << tmp8) - 1) << tmp16);
-            tci_write_reg32(t0, (t1 & ~tmp32) | ((t2 << tmp16) & tmp32));
-            break;
-#endif
-        case INDEX_op_brcond_i32:
-            t0 = tci_read_r32(&tb_ptr);
-            t1 = tci_read_ri32(&tb_ptr);
-            condition = *tb_ptr++;
-            label = tci_read_label(&tb_ptr);
-            if (tci_compare32(t0, t1, condition)) {
-                assert(tb_ptr == old_code_ptr + op_size);
-                tb_ptr = (uint8_t *)label;
-                continue;
-            }
-            break;
 #if TCG_TARGET_REG_BITS == 32
-        case INDEX_op_add2_i32:
-            t0 = *tb_ptr++;
-            t1 = *tb_ptr++;
-            tmp64 = tci_read_r64(&tb_ptr);
-            tmp64 += tci_read_r64(&tb_ptr);
-            tci_write_reg64(t1, t0, tmp64);
-            break;
-        case INDEX_op_sub2_i32:
-            t0 = *tb_ptr++;
-            t1 = *tb_ptr++;
-            tmp64 = tci_read_r64(&tb_ptr);
-            tmp64 -= tci_read_r64(&tb_ptr);
-            tci_write_reg64(t1, t0, tmp64);
-            break;
-        case INDEX_op_brcond2_i32:
-            tmp64 = tci_read_r64(&tb_ptr);
-            v64 = tci_read_ri64(&tb_ptr);
-            condition = *tb_ptr++;
-            label = tci_read_label(&tb_ptr);
-            if (tci_compare64(tmp64, v64, condition)) {
-                assert(tb_ptr == old_code_ptr + op_size);
-                tb_ptr = (uint8_t *)label;
-                continue;
-            }
-            break;
-        case INDEX_op_mulu2_i32:
-            t0 = *tb_ptr++;
-            t1 = *tb_ptr++;
-            t2 = tci_read_r32(&tb_ptr);
-            tmp64 = tci_read_r32(&tb_ptr);
-            tci_write_reg64(t1, t0, t2 * tmp64);
-            break;
-#endif /* TCG_TARGET_REG_BITS == 32 */
-#if TCG_TARGET_HAS_ext8s_i32
-        case INDEX_op_ext8s_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r8s(&tb_ptr);
-            tci_write_reg32(t0, t1);
-            break;
-#endif
-#if TCG_TARGET_HAS_ext16s_i32
-        case INDEX_op_ext16s_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r16s(&tb_ptr);
-            tci_write_reg32(t0, t1);
-            break;
-#endif
-#if TCG_TARGET_HAS_ext8u_i32
-        case INDEX_op_ext8u_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r8(&tb_ptr);
-            tci_write_reg32(t0, t1);
-            break;
-#endif
-#if TCG_TARGET_HAS_ext16u_i32
-        case INDEX_op_ext16u_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r16(&tb_ptr);
-            tci_write_reg32(t0, t1);
-            break;
-#endif
-#if TCG_TARGET_HAS_bswap16_i32
-        case INDEX_op_bswap16_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r16(&tb_ptr);
-            tci_write_reg32(t0, bswap16(t1));
-            break;
-#endif
-#if TCG_TARGET_HAS_bswap32_i32
-        case INDEX_op_bswap32_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r32(&tb_ptr);
-            tci_write_reg32(t0, bswap32(t1));
-            break;
-#endif
-#if TCG_TARGET_HAS_not_i32
-        case INDEX_op_not_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r32(&tb_ptr);
-            tci_write_reg32(t0, ~t1);
-            break;
-#endif
-#if TCG_TARGET_HAS_neg_i32
-        case INDEX_op_neg_i32:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r32(&tb_ptr);
-            tci_write_reg32(t0, -t1);
-            break;
-#endif
-#if TCG_TARGET_REG_BITS == 64
-        case INDEX_op_mov_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(&tb_ptr);
-            tci_write_reg64(t0, t1);
-            break;
-        case INDEX_op_movi_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_i64(&tb_ptr);
-            tci_write_reg64(t0, t1);
-            break;
-
-            /* Load/store operations (64 bit). */
-
-        case INDEX_op_ld8u_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_s32(&tb_ptr);
-            tci_write_reg8(t0, *(uint8_t *)(t1 + t2));
-            break;
-        case INDEX_op_ld8s_i64:
-        case INDEX_op_ld16u_i64:
-        case INDEX_op_ld16s_i64:
-            TODO();
-            break;
-        case INDEX_op_ld32u_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_s32(&tb_ptr);
-            tci_write_reg32(t0, *(uint32_t *)(t1 + t2));
-            break;
-        case INDEX_op_ld32s_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_s32(&tb_ptr);
-            tci_write_reg32s(t0, *(int32_t *)(t1 + t2));
-            break;
-        case INDEX_op_ld_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_s32(&tb_ptr);
-            tci_write_reg64(t0, *(uint64_t *)(t1 + t2));
-            break;
-        case INDEX_op_st8_i64:
-            t0 = tci_read_r8(&tb_ptr);
-            t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_s32(&tb_ptr);
-            *(uint8_t *)(t1 + t2) = t0;
-            break;
-        case INDEX_op_st16_i64:
-            t0 = tci_read_r16(&tb_ptr);
-            t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_s32(&tb_ptr);
-            *(uint16_t *)(t1 + t2) = t0;
-            break;
-        case INDEX_op_st32_i64:
-            t0 = tci_read_r32(&tb_ptr);
-            t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_s32(&tb_ptr);
-            *(uint32_t *)(t1 + t2) = t0;
-            break;
-        case INDEX_op_st_i64:
-            t0 = tci_read_r64(&tb_ptr);
-            t1 = tci_read_r(&tb_ptr);
-            t2 = tci_read_s32(&tb_ptr);
-            assert(t1 != sp_value || (int32_t)t2 < 0);
-            *(uint64_t *)(t1 + t2) = t0;
-            break;
-
-            /* Arithmetic operations (64 bit). */
-
-        case INDEX_op_add_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri64(&tb_ptr);
-            t2 = tci_read_ri64(&tb_ptr);
-            tci_write_reg64(t0, t1 + t2);
-            break;
-        case INDEX_op_sub_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri64(&tb_ptr);
-            t2 = tci_read_ri64(&tb_ptr);
-            tci_write_reg64(t0, t1 - t2);
-            break;
-        case INDEX_op_mul_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri64(&tb_ptr);
-            t2 = tci_read_ri64(&tb_ptr);
-            tci_write_reg64(t0, t1 * t2);
-            break;
-#if TCG_TARGET_HAS_div_i64
-        case INDEX_op_div_i64:
-        case INDEX_op_divu_i64:
-        case INDEX_op_rem_i64:
-        case INDEX_op_remu_i64:
-            TODO();
-            break;
-#elif TCG_TARGET_HAS_div2_i64
-        case INDEX_op_div2_i64:
-        case INDEX_op_divu2_i64:
-            TODO();
-            break;
-#endif
-        case INDEX_op_and_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri64(&tb_ptr);
-            t2 = tci_read_ri64(&tb_ptr);
-            tci_write_reg64(t0, t1 & t2);
-            break;
-        case INDEX_op_or_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri64(&tb_ptr);
-            t2 = tci_read_ri64(&tb_ptr);
-            tci_write_reg64(t0, t1 | t2);
-            break;
-        case INDEX_op_xor_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri64(&tb_ptr);
-            t2 = tci_read_ri64(&tb_ptr);
-            tci_write_reg64(t0, t1 ^ t2);
-            break;
-
-            /* Shift/rotate operations (64 bit). */
-
-        case INDEX_op_shl_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri64(&tb_ptr);
-            t2 = tci_read_ri64(&tb_ptr);
-            tci_write_reg64(t0, t1 << (t2 & 63));
-            break;
-        case INDEX_op_shr_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri64(&tb_ptr);
-            t2 = tci_read_ri64(&tb_ptr);
-            tci_write_reg64(t0, t1 >> (t2 & 63));
-            break;
-        case INDEX_op_sar_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri64(&tb_ptr);
-            t2 = tci_read_ri64(&tb_ptr);
-            tci_write_reg64(t0, ((int64_t)t1 >> (t2 & 63)));
-            break;
-#if TCG_TARGET_HAS_rot_i64
-        case INDEX_op_rotl_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri64(&tb_ptr);
-            t2 = tci_read_ri64(&tb_ptr);
-            tci_write_reg64(t0, rol64(t1, t2 & 63));
-            break;
-        case INDEX_op_rotr_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_ri64(&tb_ptr);
-            t2 = tci_read_ri64(&tb_ptr);
-            tci_write_reg64(t0, ror64(t1, t2 & 63));
-            break;
-#endif
-#if TCG_TARGET_HAS_deposit_i64
-        case INDEX_op_deposit_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(&tb_ptr);
-            t2 = tci_read_r64(&tb_ptr);
-            tmp16 = *tb_ptr++;
-            tmp8 = *tb_ptr++;
-            tmp64 = (((1ULL << tmp8) - 1) << tmp16);
-            tci_write_reg64(t0, (t1 & ~tmp64) | ((t2 << tmp16) & tmp64));
-            break;
-#endif
-        case INDEX_op_brcond_i64:
-            t0 = tci_read_r64(&tb_ptr);
-            t1 = tci_read_ri64(&tb_ptr);
-            condition = *tb_ptr++;
-            label = tci_read_label(&tb_ptr);
-            if (tci_compare64(t0, t1, condition)) {
-                assert(tb_ptr == old_code_ptr + op_size);
-                tb_ptr = (uint8_t *)label;
-                continue;
-            }
-            break;
-#if TCG_TARGET_HAS_ext8u_i64
-        case INDEX_op_ext8u_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r8(&tb_ptr);
-            tci_write_reg64(t0, t1);
-            break;
-#endif
-#if TCG_TARGET_HAS_ext8s_i64
-        case INDEX_op_ext8s_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r8s(&tb_ptr);
-            tci_write_reg64(t0, t1);
-            break;
-#endif
-#if TCG_TARGET_HAS_ext16s_i64
-        case INDEX_op_ext16s_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r16s(&tb_ptr);
-            tci_write_reg64(t0, t1);
-            break;
-#endif
-#if TCG_TARGET_HAS_ext16u_i64
-        case INDEX_op_ext16u_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r16(&tb_ptr);
-            tci_write_reg64(t0, t1);
-            break;
-#endif
-#if TCG_TARGET_HAS_ext32s_i64
-        case INDEX_op_ext32s_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r32s(&tb_ptr);
-            tci_write_reg64(t0, t1);
-            break;
-#endif
-#if TCG_TARGET_HAS_ext32u_i64
-        case INDEX_op_ext32u_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r32(&tb_ptr);
-            tci_write_reg64(t0, t1);
-            break;
-#endif
-#if TCG_TARGET_HAS_bswap16_i64
-        case INDEX_op_bswap16_i64:
-            TODO();
-            t0 = *tb_ptr++;
-            t1 = tci_read_r16(&tb_ptr);
-            tci_write_reg64(t0, bswap16(t1));
-            break;
-#endif
-#if TCG_TARGET_HAS_bswap32_i64
-        case INDEX_op_bswap32_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r32(&tb_ptr);
-            tci_write_reg64(t0, bswap32(t1));
-            break;
-#endif
-#if TCG_TARGET_HAS_bswap64_i64
-        case INDEX_op_bswap64_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(&tb_ptr);
-            tci_write_reg64(t0, bswap64(t1));
-            break;
-#endif
-#if TCG_TARGET_HAS_not_i64
-        case INDEX_op_not_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(&tb_ptr);
-            tci_write_reg64(t0, ~t1);
-            break;
-#endif
-#if TCG_TARGET_HAS_neg_i64
-        case INDEX_op_neg_i64:
-            t0 = *tb_ptr++;
-            t1 = tci_read_r64(&tb_ptr);
-            tci_write_reg64(t0, -t1);
-            break;
-#endif
-#endif /* TCG_TARGET_REG_BITS == 64 */
-
-            /* QEMU specific operations. */
+        case TCI_cmppeq:
+            cmp = (r == y && w == x);
+            goto next;
+        case TCI_cmppne:
+            cmp = (r != y || w != x);
+            goto next;
+        case TCI_cmpplt:
+            cmp = ((int32_t)w < (int32_t)x || (w == x && r < y));
+            goto next;
+        case TCI_cmpple:
+            cmp = ((int32_t)w < (int32_t)x || (w == x && r <= y));
+            goto next;
+        case TCI_cmppgt:
+            cmp = ((int32_t)w > (int32_t)x || (w == x && r > y));
+            goto next;
+        case TCI_cmppge:
+            cmp = ((int32_t)w > (int32_t)x || (w == x && r >= y));
+            goto next;
+        case TCI_cmppltu:
+            cmp = (w < x || (w == x && r < y));
+            goto next;
+        case TCI_cmppleu:
+            cmp = (w < x || (w == x && r <= y));
+            goto next;
+        case TCI_cmppgtu:
+            cmp = (w > x || (w == x && r > y));
+            goto next;
+        case TCI_cmppgeu:
+            cmp = (w > x || (w == x && r >= y));
+            goto next;
+#endif /* 32 */
+        case TCI_add2:
+            r += y;
+            w += x + (r < y);
+            goto output_rw;
+        case TCI_sub2:
+            w = w - x - (r < y);
+            r -= y;
+            goto output_rw;
 
-#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
-        case INDEX_op_debug_insn_start:
-            TODO();
-            break;
-#else
-        case INDEX_op_debug_insn_start:
-            TODO();
-            break;
-#endif
-        case INDEX_op_exit_tb:
-            next_tb = *(uint64_t *)tb_ptr;
-            goto exit;
-            break;
-        case INDEX_op_goto_tb:
-            t0 = tci_read_i32(&tb_ptr);
-            assert(tb_ptr == old_code_ptr + op_size);
-            tb_ptr += (int32_t)t0;
-            continue;
-        case INDEX_op_qemu_ld8u:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp8 = helper_ldb_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp8 = *(uint8_t *)(host_addr + GUEST_BASE);
-#endif
-            tci_write_reg8(t0, tmp8);
-            break;
-        case INDEX_op_qemu_ld8s:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp8 = helper_ldb_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp8 = *(uint8_t *)(host_addr + GUEST_BASE);
-#endif
-            tci_write_reg8s(t0, tmp8);
-            break;
-        case INDEX_op_qemu_ld16u:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp16 = helper_ldw_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp16 = tswap16(*(uint16_t *)(host_addr + GUEST_BASE));
-#endif
-            tci_write_reg16(t0, tmp16);
-            break;
-        case INDEX_op_qemu_ld16s:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp16 = helper_ldw_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp16 = tswap16(*(uint16_t *)(host_addr + GUEST_BASE));
-#endif
-            tci_write_reg16s(t0, tmp16);
-            break;
-#if TCG_TARGET_REG_BITS == 64
-        case INDEX_op_qemu_ld32u:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp32 = helper_ldl_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp32 = tswap32(*(uint32_t *)(host_addr + GUEST_BASE));
-#endif
-            tci_write_reg32(t0, tmp32);
-            break;
-        case INDEX_op_qemu_ld32s:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp32 = helper_ldl_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp32 = tswap32(*(uint32_t *)(host_addr + GUEST_BASE));
-#endif
-            tci_write_reg32s(t0, tmp32);
-            break;
-#endif /* TCG_TARGET_REG_BITS == 64 */
-        case INDEX_op_qemu_ld32:
-            t0 = *tb_ptr++;
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp32 = helper_ldl_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp32 = tswap32(*(uint32_t *)(host_addr + GUEST_BASE));
-#endif
-            tci_write_reg32(t0, tmp32);
-            break;
-        case INDEX_op_qemu_ld64:
-            t0 = *tb_ptr++;
-#if TCG_TARGET_REG_BITS == 32
-            t1 = *tb_ptr++;
-#endif
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            tmp64 = helper_ldq_mmu(env, taddr, tci_read_i(&tb_ptr));
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            tmp64 = tswap64(*(uint64_t *)(host_addr + GUEST_BASE));
-#endif
-            tci_write_reg(t0, tmp64);
-#if TCG_TARGET_REG_BITS == 32
-            tci_write_reg(t1, tmp64 >> 32);
-#endif
-            break;
-        case INDEX_op_qemu_st8:
-            t0 = tci_read_r8(&tb_ptr);
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            t2 = tci_read_i(&tb_ptr);
-            helper_stb_mmu(env, taddr, t0, t2);
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            *(uint8_t *)(host_addr + GUEST_BASE) = t0;
-#endif
-            break;
-        case INDEX_op_qemu_st16:
-            t0 = tci_read_r16(&tb_ptr);
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            t2 = tci_read_i(&tb_ptr);
-            helper_stw_mmu(env, taddr, t0, t2);
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            *(uint16_t *)(host_addr + GUEST_BASE) = tswap16(t0);
-#endif
-            break;
-        case INDEX_op_qemu_st32:
-            t0 = tci_read_r32(&tb_ptr);
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            t2 = tci_read_i(&tb_ptr);
-            helper_stl_mmu(env, taddr, t0, t2);
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            *(uint32_t *)(host_addr + GUEST_BASE) = tswap32(t0);
-#endif
-            break;
-        case INDEX_op_qemu_st64:
-            tmp64 = tci_read_r64(&tb_ptr);
-            taddr = tci_read_ulong(&tb_ptr);
-#ifdef CONFIG_SOFTMMU
-            t2 = tci_read_i(&tb_ptr);
-            helper_stq_mmu(env, taddr, tmp64, t2);
-#else
-            host_addr = (tcg_target_ulong)taddr;
-            *(uint64_t *)(host_addr + GUEST_BASE) = tswap64(tmp64);
-#endif
-            break;
         default:
-            TODO();
             break;
         }
-        assert(tb_ptr == old_code_ptr + op_size);
     }
-exit:
-    return next_tb;
+    /* Should have looped back via goto.  */
+    abort();
 }
diff --git a/translate-all.c b/translate-all.c
index 5549a85..2a9a448 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -464,7 +464,10 @@ static inline PageDesc *page_find(tb_page_addr_t index)
 /* Maximum size of the code gen buffer we'd like to use.  Unless otherwise
    indicated, this is constrained by the range of direct branches on the
    host cpu, as used by the TCG implementation of goto_tb.  */
-#if defined(__x86_64__)
+#if defined(CONFIG_TCG_INTERPRETER)
+  /* We have a +- 8GB range on the branches; but don't go overboard.  */
+# define MAX_CODE_GEN_BUFFER_SIZE  (3ul * 1024 * 1024 * 1024)
+#elif defined(__x86_64__)
 # define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
 #elif defined(__sparc__)
 # define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
-- 
1.9.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]