qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] Wanted: A better way to implement MIPS unaligned instructio


From: Thiemo Seufer
Subject: [Qemu-devel] Wanted: A better way to implement MIPS unaligned instructions
Date: Tue, 17 Jun 2008 13:12:51 +0100
User-agent: Mutt/1.5.18 (2008-05-17)

Hello All,

I am currently trying to implement TCG versions of the MIPS [ls][dw][lr]
set of instructions. I believe I can't use a helper function for
load/store type instructions. The appended patch uses TCG directly, but
the resulting translation is excessively complicated. Is there a better
way to do this?


Thiemo


Index: qemu-work/target-mips/op.c
===================================================================
--- qemu-work.orig/target-mips/op.c     2008-06-16 07:32:12.000000000 +0100
+++ qemu-work/target-mips/op.c  2008-06-17 06:43:36.000000000 +0100
@@ -30,41 +30,6 @@
 #ifndef CALL_FROM_TB1
 #define CALL_FROM_TB1(func, arg0) func(arg0)
 #endif
-#ifndef CALL_FROM_TB1_CONST16
-#define CALL_FROM_TB1_CONST16(func, arg0) CALL_FROM_TB1(func, arg0)
-#endif
-#ifndef CALL_FROM_TB2
-#define CALL_FROM_TB2(func, arg0, arg1) func(arg0, arg1)
-#endif
-#ifndef CALL_FROM_TB2_CONST16
-#define CALL_FROM_TB2_CONST16(func, arg0, arg1)     \
-        CALL_FROM_TB2(func, arg0, arg1)
-#endif
-#ifndef CALL_FROM_TB3
-#define CALL_FROM_TB3(func, arg0, arg1, arg2) func(arg0, arg1, arg2)
-#endif
-#ifndef CALL_FROM_TB4
-#define CALL_FROM_TB4(func, arg0, arg1, arg2, arg3) \
-        func(arg0, arg1, arg2, arg3)
-#endif
-
-/* Load and store */
-#define MEMSUFFIX _raw
-#include "op_mem.c"
-#undef MEMSUFFIX
-#if !defined(CONFIG_USER_ONLY)
-#define MEMSUFFIX _user
-#include "op_mem.c"
-#undef MEMSUFFIX
-
-#define MEMSUFFIX _super
-#include "op_mem.c"
-#undef MEMSUFFIX
-
-#define MEMSUFFIX _kernel
-#include "op_mem.c"
-#undef MEMSUFFIX
-#endif
 
 /* 64 bits arithmetic */
 #if TARGET_LONG_BITS > HOST_LONG_BITS
Index: qemu-work/target-mips/op_mem.c
===================================================================
--- qemu-work.orig/target-mips/op_mem.c 2008-06-16 07:32:12.000000000 +0100
+++ /dev/null   1970-01-01 00:00:00.000000000 +0000
@@ -1,269 +0,0 @@
-/*
- *  MIPS emulation memory micro-operations for qemu.
- *
- *  Copyright (c) 2004-2005 Jocelyn Mayer
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-/* "half" load and stores.  We must do the memory access inline,
-   or fault handling won't work.  */
-
-#ifdef TARGET_WORDS_BIGENDIAN
-#define GET_LMASK(v) ((v) & 3)
-#define GET_OFFSET(addr, offset) (addr + (offset))
-#else
-#define GET_LMASK(v) (((v) & 3) ^ 3)
-#define GET_OFFSET(addr, offset) (addr - (offset))
-#endif
-
-void glue(op_lwl, MEMSUFFIX) (void)
-{
-    target_ulong tmp;
-
-    tmp = glue(ldub, MEMSUFFIX)(T0);
-    T1 = (T1 & 0x00FFFFFF) | (tmp << 24);
-
-    if (GET_LMASK(T0) <= 2) {
-        tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 1));
-        T1 = (T1 & 0xFF00FFFF) | (tmp << 16);
-    }
-
-    if (GET_LMASK(T0) <= 1) {
-        tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 2));
-        T1 = (T1 & 0xFFFF00FF) | (tmp << 8);
-    }
-
-    if (GET_LMASK(T0) == 0) {
-        tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 3));
-        T1 = (T1 & 0xFFFFFF00) | tmp;
-    }
-    T1 = (int32_t)T1;
-    FORCE_RET();
-}
-
-void glue(op_lwr, MEMSUFFIX) (void)
-{
-    target_ulong tmp;
-
-    tmp = glue(ldub, MEMSUFFIX)(T0);
-    T1 = (T1 & 0xFFFFFF00) | tmp;
-
-    if (GET_LMASK(T0) >= 1) {
-        tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -1));
-        T1 = (T1 & 0xFFFF00FF) | (tmp << 8);
-    }
-
-    if (GET_LMASK(T0) >= 2) {
-        tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -2));
-        T1 = (T1 & 0xFF00FFFF) | (tmp << 16);
-    }
-
-    if (GET_LMASK(T0) == 3) {
-        tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -3));
-        T1 = (T1 & 0x00FFFFFF) | (tmp << 24);
-    }
-    T1 = (int32_t)T1;
-    FORCE_RET();
-}
-
-void glue(op_swl, MEMSUFFIX) (void)
-{
-    glue(stb, MEMSUFFIX)(T0, (uint8_t)(T1 >> 24));
-
-    if (GET_LMASK(T0) <= 2)
-        glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 1), (uint8_t)(T1 >> 16));
-
-    if (GET_LMASK(T0) <= 1)
-        glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 2), (uint8_t)(T1 >> 8));
-
-    if (GET_LMASK(T0) == 0)
-        glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 3), (uint8_t)T1);
-
-    FORCE_RET();
-}
-
-void glue(op_swr, MEMSUFFIX) (void)
-{
-    glue(stb, MEMSUFFIX)(T0, (uint8_t)T1);
-
-    if (GET_LMASK(T0) >= 1)
-        glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -1), (uint8_t)(T1 >> 8));
-
-    if (GET_LMASK(T0) >= 2)
-        glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -2), (uint8_t)(T1 >> 16));
-
-    if (GET_LMASK(T0) == 3)
-        glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -3), (uint8_t)(T1 >> 24));
-
-    FORCE_RET();
-}
-
-#if defined(TARGET_MIPS64)
-/* "half" load and stores.  We must do the memory access inline,
-   or fault handling won't work.  */
-
-#ifdef TARGET_WORDS_BIGENDIAN
-#define GET_LMASK64(v) ((v) & 7)
-#else
-#define GET_LMASK64(v) (((v) & 7) ^ 7)
-#endif
-
-void glue(op_ldl, MEMSUFFIX) (void)
-{
-    uint64_t tmp;
-
-    tmp = glue(ldub, MEMSUFFIX)(T0);
-    T1 = (T1 & 0x00FFFFFFFFFFFFFFULL) | (tmp << 56);
-
-    if (GET_LMASK64(T0) <= 6) {
-        tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 1));
-        T1 = (T1 & 0xFF00FFFFFFFFFFFFULL) | (tmp << 48);
-    }
-
-    if (GET_LMASK64(T0) <= 5) {
-        tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 2));
-        T1 = (T1 & 0xFFFF00FFFFFFFFFFULL) | (tmp << 40);
-    }
-
-    if (GET_LMASK64(T0) <= 4) {
-        tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 3));
-        T1 = (T1 & 0xFFFFFF00FFFFFFFFULL) | (tmp << 32);
-    }
-
-    if (GET_LMASK64(T0) <= 3) {
-        tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 4));
-        T1 = (T1 & 0xFFFFFFFF00FFFFFFULL) | (tmp << 24);
-    }
-
-    if (GET_LMASK64(T0) <= 2) {
-        tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 5));
-        T1 = (T1 & 0xFFFFFFFFFF00FFFFULL) | (tmp << 16);
-    }
-
-    if (GET_LMASK64(T0) <= 1) {
-        tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 6));
-        T1 = (T1 & 0xFFFFFFFFFFFF00FFULL) | (tmp << 8);
-    }
-
-    if (GET_LMASK64(T0) == 0) {
-        tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 7));
-        T1 = (T1 & 0xFFFFFFFFFFFFFF00ULL) | tmp;
-    }
-
-    FORCE_RET();
-}
-
-void glue(op_ldr, MEMSUFFIX) (void)
-{
-    uint64_t tmp;
-
-    tmp = glue(ldub, MEMSUFFIX)(T0);
-    T1 = (T1 & 0xFFFFFFFFFFFFFF00ULL) | tmp;
-
-    if (GET_LMASK64(T0) >= 1) {
-        tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -1));
-        T1 = (T1 & 0xFFFFFFFFFFFF00FFULL) | (tmp  << 8);
-    }
-
-    if (GET_LMASK64(T0) >= 2) {
-        tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -2));
-        T1 = (T1 & 0xFFFFFFFFFF00FFFFULL) | (tmp << 16);
-    }
-
-    if (GET_LMASK64(T0) >= 3) {
-        tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -3));
-        T1 = (T1 & 0xFFFFFFFF00FFFFFFULL) | (tmp << 24);
-    }
-
-    if (GET_LMASK64(T0) >= 4) {
-        tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -4));
-        T1 = (T1 & 0xFFFFFF00FFFFFFFFULL) | (tmp << 32);
-    }
-
-    if (GET_LMASK64(T0) >= 5) {
-        tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -5));
-        T1 = (T1 & 0xFFFF00FFFFFFFFFFULL) | (tmp << 40);
-    }
-
-    if (GET_LMASK64(T0) >= 6) {
-        tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -6));
-        T1 = (T1 & 0xFF00FFFFFFFFFFFFULL) | (tmp << 48);
-    }
-
-    if (GET_LMASK64(T0) == 7) {
-        tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -7));
-        T1 = (T1 & 0x00FFFFFFFFFFFFFFULL) | (tmp << 56);
-    }
-
-    FORCE_RET();
-}
-
-void glue(op_sdl, MEMSUFFIX) (void)
-{
-    glue(stb, MEMSUFFIX)(T0, (uint8_t)(T1 >> 56));
-
-    if (GET_LMASK64(T0) <= 6)
-        glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 1), (uint8_t)(T1 >> 48));
-
-    if (GET_LMASK64(T0) <= 5)
-        glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 2), (uint8_t)(T1 >> 40));
-
-    if (GET_LMASK64(T0) <= 4)
-        glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 3), (uint8_t)(T1 >> 32));
-
-    if (GET_LMASK64(T0) <= 3)
-        glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 4), (uint8_t)(T1 >> 24));
-
-    if (GET_LMASK64(T0) <= 2)
-        glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 5), (uint8_t)(T1 >> 16));
-
-    if (GET_LMASK64(T0) <= 1)
-        glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 6), (uint8_t)(T1 >> 8));
-
-    if (GET_LMASK64(T0) <= 0)
-        glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 7), (uint8_t)T1);
-
-    FORCE_RET();
-}
-
-void glue(op_sdr, MEMSUFFIX) (void)
-{
-    glue(stb, MEMSUFFIX)(T0, (uint8_t)T1);
-
-    if (GET_LMASK64(T0) >= 1)
-        glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -1), (uint8_t)(T1 >> 8));
-
-    if (GET_LMASK64(T0) >= 2)
-        glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -2), (uint8_t)(T1 >> 16));
-
-    if (GET_LMASK64(T0) >= 3)
-        glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -3), (uint8_t)(T1 >> 24));
-
-    if (GET_LMASK64(T0) >= 4)
-        glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -4), (uint8_t)(T1 >> 32));
-
-    if (GET_LMASK64(T0) >= 5)
-        glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -5), (uint8_t)(T1 >> 40));
-
-    if (GET_LMASK64(T0) >= 6)
-        glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -6), (uint8_t)(T1 >> 48));
-
-    if (GET_LMASK64(T0) == 7)
-        glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -7), (uint8_t)(T1 >> 56));
-
-    FORCE_RET();
-}
-#endif /* TARGET_MIPS64 */
Index: qemu-work/target-mips/translate.c
===================================================================
--- qemu-work.orig/target-mips/translate.c      2008-06-17 06:43:05.000000000 
+0100
+++ qemu-work/target-mips/translate.c   2008-06-17 06:43:36.000000000 +0100
@@ -930,37 +930,6 @@
 }
 
 /* load/store instructions. */
-#if defined(CONFIG_USER_ONLY)
-#define op_ldst(name)        gen_op_##name##_raw()
-#define OP_LD_TABLE(width)
-#define OP_ST_TABLE(width)
-#else
-#define op_ldst(name)        (*gen_op_##name[ctx->mem_idx])()
-#define OP_LD_TABLE(width)                                                    \
-static GenOpFunc *gen_op_l##width[] = {                                       \
-    &gen_op_l##width##_kernel,                                                \
-    &gen_op_l##width##_super,                                                 \
-    &gen_op_l##width##_user,                                                  \
-}
-#define OP_ST_TABLE(width)                                                    \
-static GenOpFunc *gen_op_s##width[] = {                                       \
-    &gen_op_s##width##_kernel,                                                \
-    &gen_op_s##width##_super,                                                 \
-    &gen_op_s##width##_user,                                                  \
-}
-#endif
-
-#if defined(TARGET_MIPS64)
-OP_LD_TABLE(dl);
-OP_LD_TABLE(dr);
-OP_ST_TABLE(dl);
-OP_ST_TABLE(dr);
-#endif
-OP_LD_TABLE(wl);
-OP_LD_TABLE(wr);
-OP_ST_TABLE(wl);
-OP_ST_TABLE(wr);
-
 #define OP_LD(insn,fname)                                        \
 void inline op_ldst_##insn(DisasContext *ctx)                    \
 {                                                                \
@@ -1032,6 +1001,486 @@
 #endif
 #undef OP_ST_ATOMIC
 
+/* "half" load and stores.  We must do the memory access inline,
+   or fault handling won't work.  */
+void inline get_lmask (TCGv ret, TCGv val)
+{
+    tcg_gen_andi_tl(ret, val, 3);
+#ifndef TARGET_WORDS_BIGENDIAN
+    tcg_gen_xori_tl(ret, ret, 3);
+#endif
+}
+
+void inline get_offset (TCGv ret, TCGv val, target_ulong off)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+    tcg_gen_addi_tl(ret, val, off);
+#else
+    tcg_gen_subi_tl(ret, val, off);
+#endif
+}
+
+void inline gen_lwl (DisasContext *ctx)
+{
+    TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+    TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+    TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+    TCGv r_tmp3 = tcg_temp_local_new(TCG_TYPE_TL);
+    int l1 = gen_new_label();
+    int l2 = gen_new_label();
+    int l3 = gen_new_label();
+
+    get_lmask(r_mask, cpu_T[0]);
+    tcg_gen_qemu_ld8u(r_tmp1, cpu_T[0], ctx->mem_idx);
+    tcg_gen_shli_tl(r_tmp2, r_tmp1, 24);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0x00ffffff);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+
+    tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 2, l1);
+    get_offset(r_tmp2, cpu_T[0], 1);
+    tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+    tcg_gen_shli_tl(r_tmp2, r_tmp1, 16);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xff00ffff);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+    gen_set_label(l1);
+
+    tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 1, l2);
+    get_offset(r_tmp2, cpu_T[0], 2);
+    tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+    tcg_gen_shli_tl(r_tmp2, r_tmp1, 8);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffff00ff);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+    gen_set_label(l2);
+
+    tcg_gen_brcondi_tl(TCG_COND_NE, r_mask, 0, l3);
+    get_offset(r_tmp2, cpu_T[0], 3);
+    tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffff00);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp1);
+    gen_set_label(l3);
+
+    tcg_temp_free(r_mask);
+    tcg_temp_free(r_tmp1);
+    tcg_temp_free(r_tmp2);
+    tcg_temp_free(r_tmp3);
+    tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
+}
+
+void inline gen_lwr (DisasContext *ctx)
+{
+    TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+    TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+    TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+    TCGv r_tmp3 = tcg_temp_local_new(TCG_TYPE_TL);
+    int l1 = gen_new_label();
+    int l2 = gen_new_label();
+    int l3 = gen_new_label();
+
+    get_lmask(r_mask, cpu_T[0]);
+    tcg_gen_qemu_ld8u(r_tmp1, cpu_T[0], ctx->mem_idx);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffff00);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp1);
+
+    tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 1, l1);
+    get_offset(r_tmp2, cpu_T[0], -1);
+    tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+    tcg_gen_shli_tl(r_tmp2, r_tmp1, 8);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffff00ff);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+    gen_set_label(l1);
+
+    tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 2, l2);
+    get_offset(r_tmp2, cpu_T[0], -2);
+    tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+    tcg_gen_shli_tl(r_tmp2, r_tmp1, 16);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xff00ffff);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+    gen_set_label(l2);
+
+    tcg_gen_brcondi_tl(TCG_COND_NE, r_mask, 3, l3);
+    get_offset(r_tmp2, cpu_T[0], -3);
+    tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+    tcg_gen_shli_tl(r_tmp2, r_tmp1, 24);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0x00ffffff);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+    gen_set_label(l3);
+
+    tcg_temp_free(r_mask);
+    tcg_temp_free(r_tmp1);
+    tcg_temp_free(r_tmp2);
+    tcg_temp_free(r_tmp3);
+    tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
+}
+
+void inline gen_swl (DisasContext *ctx)
+{
+    TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+    TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+    TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+    int l1 = gen_new_label();
+    int l2 = gen_new_label();
+    int l3 = gen_new_label();
+
+    get_lmask(r_mask, cpu_T[0]);
+    tcg_gen_shri_tl(r_tmp1, cpu_T[1], 24);
+    tcg_gen_qemu_st8(r_tmp1, cpu_T[0], ctx->mem_idx);
+
+    tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 2, l1);
+    get_offset(r_tmp1, cpu_T[0], 1);
+    tcg_gen_shri_tl(r_tmp2, cpu_T[1], 16);
+    tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+    gen_set_label(l1);
+
+    tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 1, l2);
+    get_offset(r_tmp1, cpu_T[0], 2);
+    tcg_gen_shri_tl(r_tmp2, cpu_T[1], 8);
+    tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+    gen_set_label(l2);
+
+    tcg_gen_brcondi_tl(TCG_COND_NE, r_mask, 0, l3);
+    get_offset(r_tmp1, cpu_T[0], 3);
+    tcg_gen_qemu_st8(cpu_T[1], r_tmp1, ctx->mem_idx);
+    gen_set_label(l3);
+
+    tcg_temp_free(r_mask);
+    tcg_temp_free(r_tmp1);
+    tcg_temp_free(r_tmp2);
+}
+
+void inline gen_swr (DisasContext *ctx)
+{
+    TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+    TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+    TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+    int l1 = gen_new_label();
+    int l2 = gen_new_label();
+    int l3 = gen_new_label();
+
+    get_lmask(r_mask, cpu_T[0]);
+    tcg_gen_qemu_st8(cpu_T[1], cpu_T[0], ctx->mem_idx);
+
+    tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 1, l1);
+    get_offset(r_tmp1, cpu_T[0], -1);
+    tcg_gen_shri_tl(r_tmp2, cpu_T[1], 8);
+    tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+    gen_set_label(l1);
+
+    tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 2, l2);
+    get_offset(r_tmp1, cpu_T[0], -2);
+    tcg_gen_shri_tl(r_tmp2, cpu_T[1], 16);
+    tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+    gen_set_label(l2);
+
+    tcg_gen_brcondi_tl(TCG_COND_NE, r_mask, 3, l3);
+    get_offset(r_tmp1, cpu_T[0], -3);
+    tcg_gen_shri_tl(r_tmp2, cpu_T[1], 24);
+    tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+    gen_set_label(l3);
+
+    tcg_temp_free(r_mask);
+    tcg_temp_free(r_tmp1);
+    tcg_temp_free(r_tmp2);
+}
+
+#if defined(TARGET_MIPS64)
+
+void inline get_lmask64 (TCGv ret, TCGv val)
+{
+    tcg_gen_andi_tl(ret, val, 7);
+#ifndef TARGET_WORDS_BIGENDIAN
+    tcg_gen_xori_tl(ret, ret, 7);
+#endif
+}
+
+void inline gen_ldl (DisasContext *ctx)
+{
+    TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+    TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+    TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+    TCGv r_tmp3 = tcg_temp_local_new(TCG_TYPE_TL);
+    int l1 = gen_new_label();
+    int l2 = gen_new_label();
+    int l3 = gen_new_label();
+    int l4 = gen_new_label();
+    int l5 = gen_new_label();
+    int l6 = gen_new_label();
+    int l7 = gen_new_label();
+
+    get_lmask64(r_mask, cpu_T[0]);
+    tcg_gen_qemu_ld8u(r_tmp1, cpu_T[0], ctx->mem_idx);
+    tcg_gen_shli_tl(r_tmp2, r_tmp1, 56);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0x00ffffffffffffffULL);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+
+    tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 6, l1);
+    get_offset(r_tmp2, cpu_T[0], 1);
+    tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+    tcg_gen_shli_tl(r_tmp2, r_tmp1, 48);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xff00ffffffffffffULL);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+    gen_set_label(l1);
+
+    tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 5, l2);
+    get_offset(r_tmp2, cpu_T[0], 2);
+    tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+    tcg_gen_shli_tl(r_tmp2, r_tmp1, 40);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffff00ffffffffffULL);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+    gen_set_label(l2);
+
+    tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 4, l3);
+    get_offset(r_tmp2, cpu_T[0], 3);
+    tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+    tcg_gen_shli_tl(r_tmp2, r_tmp1, 32);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffff00ffffffffULL);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+    gen_set_label(l3);
+
+    tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 3, l4);
+    get_offset(r_tmp2, cpu_T[0], 4);
+    tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+    tcg_gen_shli_tl(r_tmp2, r_tmp1, 24);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffffff00ffffffULL);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+    gen_set_label(l4);
+
+    tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 2, l5);
+    get_offset(r_tmp2, cpu_T[0], 5);
+    tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+    tcg_gen_shli_tl(r_tmp2, r_tmp1, 16);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffffffff00ffffULL);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+    gen_set_label(l5);
+
+    tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 1, l6);
+    get_offset(r_tmp2, cpu_T[0], 6);
+    tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+    tcg_gen_shli_tl(r_tmp2, r_tmp1, 8);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffffffffff00ffULL);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+    gen_set_label(l6);
+
+    tcg_gen_brcondi_tl(TCG_COND_NE, r_mask, 0, l7);
+    get_offset(r_tmp2, cpu_T[0], 7);
+    tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffffffffffff00ULL);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp1);
+    gen_set_label(l7);
+
+    tcg_temp_free(r_mask);
+    tcg_temp_free(r_tmp1);
+    tcg_temp_free(r_tmp2);
+    tcg_temp_free(r_tmp3);
+}
+
+void inline gen_ldr (DisasContext *ctx)
+{
+    TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+    TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+    TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+    TCGv r_tmp3 = tcg_temp_local_new(TCG_TYPE_TL);
+    int l1 = gen_new_label();
+    int l2 = gen_new_label();
+    int l3 = gen_new_label();
+    int l4 = gen_new_label();
+    int l5 = gen_new_label();
+    int l6 = gen_new_label();
+    int l7 = gen_new_label();
+
+    get_lmask64(r_mask, cpu_T[0]);
+    tcg_gen_qemu_ld8u(r_tmp1, cpu_T[0], ctx->mem_idx);
+    tcg_gen_andi_tl(r_tmp2, cpu_T[1], 0xffffffffffffff00ULL);
+    tcg_gen_or_tl(cpu_T[1], r_tmp2, r_tmp1);
+
+    tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 1, l1);
+    get_offset(r_tmp2, cpu_T[0], -1);
+    tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+    tcg_gen_shli_tl(r_tmp2, r_tmp1, 8);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffffffffff00ffULL);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+    gen_set_label(l1);
+
+    tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 2, l2);
+    get_offset(r_tmp2, cpu_T[0], -2);
+    tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+    tcg_gen_shli_tl(r_tmp2, r_tmp1, 16);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffffffff00ffffULL);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+    gen_set_label(l2);
+
+    tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 3, l3);
+    get_offset(r_tmp2, cpu_T[0], -3);
+    tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+    tcg_gen_shli_tl(r_tmp2, r_tmp1, 24);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffffff00ffffffULL);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+    gen_set_label(l3);
+
+    tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 4, l4);
+    get_offset(r_tmp2, cpu_T[0], -4);
+    tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+    tcg_gen_shli_tl(r_tmp2, r_tmp1, 32);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffff00ffffffffULL);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+    gen_set_label(l4);
+
+    tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 5, l5);
+    get_offset(r_tmp2, cpu_T[0], -5);
+    tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+    tcg_gen_shli_tl(r_tmp2, r_tmp1, 40);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffff00ffffffffffULL);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+    gen_set_label(l5);
+
+    tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 6, l6);
+    get_offset(r_tmp2, cpu_T[0], -6);
+    tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+    tcg_gen_shli_tl(r_tmp2, r_tmp1, 48);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xff00ffffffffffffULL);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+    gen_set_label(l6);
+
+    tcg_gen_brcondi_tl(TCG_COND_NE, r_mask, 7, l7);
+    get_offset(r_tmp2, cpu_T[0], -7);
+    tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+    tcg_gen_shli_tl(r_tmp2, r_tmp1, 56);
+    tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0x00ffffffffffffffULL);
+    tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+    gen_set_label(l7);
+
+    tcg_temp_free(r_mask);
+    tcg_temp_free(r_tmp1);
+    tcg_temp_free(r_tmp2);
+    tcg_temp_free(r_tmp3);
+}
+
+void inline gen_sdl (DisasContext *ctx)
+{
+    TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+    TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+    TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+    int l1 = gen_new_label();
+    int l2 = gen_new_label();
+    int l3 = gen_new_label();
+    int l4 = gen_new_label();
+    int l5 = gen_new_label();
+    int l6 = gen_new_label();
+    int l7 = gen_new_label();
+
+    get_lmask64(r_mask, cpu_T[0]);
+    tcg_gen_shri_tl(r_tmp1, cpu_T[1], 56);
+    tcg_gen_qemu_st8(r_tmp1, cpu_T[0], ctx->mem_idx);
+
+    tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 6, l1);
+    get_offset(r_tmp1, cpu_T[0], 1);
+    tcg_gen_shri_tl(r_tmp2, cpu_T[1], 48);
+    tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+    gen_set_label(l1);
+
+    tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 5, l2);
+    get_offset(r_tmp1, cpu_T[0], 2);
+    tcg_gen_shri_tl(r_tmp2, cpu_T[1], 40);
+    tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+    gen_set_label(l2);
+
+    tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 4, l3);
+    get_offset(r_tmp1, cpu_T[0], 3);
+    tcg_gen_shri_tl(r_tmp2, cpu_T[1], 32);
+    tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+    gen_set_label(l3);
+
+    tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 3, l4);
+    get_offset(r_tmp1, cpu_T[0], 4);
+    tcg_gen_shri_tl(r_tmp2, cpu_T[1], 24);
+    tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+    gen_set_label(l4);
+
+    tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 3, l5);
+    get_offset(r_tmp1, cpu_T[0], 5);
+    tcg_gen_shri_tl(r_tmp2, cpu_T[1], 16);
+    tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+    gen_set_label(l5);
+
+    tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 2, l6);
+    get_offset(r_tmp1, cpu_T[0], 6);
+    tcg_gen_shri_tl(r_tmp2, cpu_T[1], 8);
+    tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+    gen_set_label(l6);
+
+    tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 1, l7);
+    get_offset(r_tmp1, cpu_T[0], 7);
+    tcg_gen_qemu_st8(cpu_T[1], r_tmp1, ctx->mem_idx);
+    gen_set_label(l7);
+
+    tcg_temp_free(r_mask);
+    tcg_temp_free(r_tmp1);
+    tcg_temp_free(r_tmp2);
+}
+
+void inline gen_sdr (DisasContext *ctx)
+{
+    TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+    TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+    TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+    int l1 = gen_new_label();
+    int l2 = gen_new_label();
+    int l3 = gen_new_label();
+    int l4 = gen_new_label();
+    int l5 = gen_new_label();
+    int l6 = gen_new_label();
+    int l7 = gen_new_label();
+
+    get_lmask64(r_mask, cpu_T[0]);
+    tcg_gen_qemu_st8(cpu_T[1], cpu_T[0], ctx->mem_idx);
+
+    tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 1, l1);
+    get_offset(r_tmp1, cpu_T[0], -1);
+    tcg_gen_shri_tl(r_tmp2, cpu_T[1], 8);
+    tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+    gen_set_label(l1);
+
+    tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 2, l2);
+    get_offset(r_tmp1, cpu_T[0], -2);
+    tcg_gen_shri_tl(r_tmp2, cpu_T[1], 16);
+    tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+    gen_set_label(l2);
+
+    tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 3, l3);
+    get_offset(r_tmp1, cpu_T[0], -3);
+    tcg_gen_shri_tl(r_tmp2, cpu_T[1], 24);
+    tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+    gen_set_label(l3);
+
+    tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 4, l4);
+    get_offset(r_tmp1, cpu_T[0], -4);
+    tcg_gen_shri_tl(r_tmp2, cpu_T[1], 32);
+    tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+    gen_set_label(l4);
+
+    tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 5, l5);
+    get_offset(r_tmp1, cpu_T[0], -5);
+    tcg_gen_shri_tl(r_tmp2, cpu_T[1], 40);
+    tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+    gen_set_label(l5);
+
+    tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 6, l6);
+    get_offset(r_tmp1, cpu_T[0], -6);
+    tcg_gen_shri_tl(r_tmp2, cpu_T[1], 48);
+    tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+    gen_set_label(l6);
+
+    tcg_gen_brcondi_tl(TCG_COND_NE, r_mask, 7, l7);
+    get_offset(r_tmp1, cpu_T[0], -7);
+    tcg_gen_shri_tl(r_tmp2, cpu_T[1], 56);
+    tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+    gen_set_label(l7);
+
+    tcg_temp_free(r_mask);
+    tcg_temp_free(r_tmp1);
+    tcg_temp_free(r_tmp2);
+}
+#endif /* TARGET_MIPS64 */
+
 /* Load and store */
 static void gen_ldst (DisasContext *ctx, uint32_t opc, int rt,
                       int base, int16_t offset)
@@ -1080,24 +1529,24 @@
         break;
     case OPC_LDL:
         gen_load_gpr(cpu_T[1], rt);
-        op_ldst(ldl);
+        gen_ldl(ctx);
         gen_store_gpr(cpu_T[1], rt);
         opn = "ldl";
         break;
     case OPC_SDL:
         gen_load_gpr(cpu_T[1], rt);
-        op_ldst(sdl);
+        gen_sdl(ctx);
         opn = "sdl";
         break;
     case OPC_LDR:
         gen_load_gpr(cpu_T[1], rt);
-        op_ldst(ldr);
+        gen_ldr(ctx);
         gen_store_gpr(cpu_T[1], rt);
         opn = "ldr";
         break;
     case OPC_SDR:
         gen_load_gpr(cpu_T[1], rt);
-        op_ldst(sdr);
+        gen_sdr(ctx);
         opn = "sdr";
         break;
 #endif
@@ -1143,24 +1592,24 @@
         break;
     case OPC_LWL:
        gen_load_gpr(cpu_T[1], rt);
-        op_ldst(lwl);
+        gen_lwl(ctx);
         gen_store_gpr(cpu_T[1], rt);
         opn = "lwl";
         break;
     case OPC_SWL:
         gen_load_gpr(cpu_T[1], rt);
-        op_ldst(swl);
-        opn = "swr";
+        gen_swl(ctx);
+        opn = "swl";
         break;
     case OPC_LWR:
        gen_load_gpr(cpu_T[1], rt);
-        op_ldst(lwr);
+        gen_lwr(ctx);
         gen_store_gpr(cpu_T[1], rt);
         opn = "lwr";
         break;
     case OPC_SWR:
         gen_load_gpr(cpu_T[1], rt);
-        op_ldst(swr);
+        gen_swr(ctx);
         opn = "swr";
         break;
     case OPC_LL:
Index: qemu-work/exec-all.h
===================================================================
--- qemu-work.orig/exec-all.h   2008-06-17 06:47:25.000000000 +0100
+++ qemu-work/exec-all.h        2008-06-17 06:47:53.000000000 +0100
@@ -30,7 +30,7 @@
 struct TranslationBlock;
 
 /* XXX: make safe guess about sizes */
-#define MAX_OP_PER_INSTR 64
+#define MAX_OP_PER_INSTR 256
 /* A Call op needs up to 6 + 2N parameters (N = number of arguments).  */
 #define MAX_OPC_PARAM 10
 #define OPC_BUF_SIZE 512




reply via email to

[Prev in Thread] Current Thread [Next in Thread]