[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] Wanted: A better way to implement MIPS unaligned instructio
From: |
Thiemo Seufer |
Subject: |
[Qemu-devel] Wanted: A better way to implement MIPS unaligned instructions |
Date: |
Tue, 17 Jun 2008 13:12:51 +0100 |
User-agent: |
Mutt/1.5.18 (2008-05-17) |
Hello All,
I am currently trying to implement TCG versions of the MIPS [ls][dw][lr]
set of instructions. I believe I can't use a helper function for
load/store type instructions. The appended patch uses TCG directly, but
the resulting translation is excessively complicated. Is there a better
way to do this?
Thiemo
Index: qemu-work/target-mips/op.c
===================================================================
--- qemu-work.orig/target-mips/op.c 2008-06-16 07:32:12.000000000 +0100
+++ qemu-work/target-mips/op.c 2008-06-17 06:43:36.000000000 +0100
@@ -30,41 +30,6 @@
#ifndef CALL_FROM_TB1
#define CALL_FROM_TB1(func, arg0) func(arg0)
#endif
-#ifndef CALL_FROM_TB1_CONST16
-#define CALL_FROM_TB1_CONST16(func, arg0) CALL_FROM_TB1(func, arg0)
-#endif
-#ifndef CALL_FROM_TB2
-#define CALL_FROM_TB2(func, arg0, arg1) func(arg0, arg1)
-#endif
-#ifndef CALL_FROM_TB2_CONST16
-#define CALL_FROM_TB2_CONST16(func, arg0, arg1) \
- CALL_FROM_TB2(func, arg0, arg1)
-#endif
-#ifndef CALL_FROM_TB3
-#define CALL_FROM_TB3(func, arg0, arg1, arg2) func(arg0, arg1, arg2)
-#endif
-#ifndef CALL_FROM_TB4
-#define CALL_FROM_TB4(func, arg0, arg1, arg2, arg3) \
- func(arg0, arg1, arg2, arg3)
-#endif
-
-/* Load and store */
-#define MEMSUFFIX _raw
-#include "op_mem.c"
-#undef MEMSUFFIX
-#if !defined(CONFIG_USER_ONLY)
-#define MEMSUFFIX _user
-#include "op_mem.c"
-#undef MEMSUFFIX
-
-#define MEMSUFFIX _super
-#include "op_mem.c"
-#undef MEMSUFFIX
-
-#define MEMSUFFIX _kernel
-#include "op_mem.c"
-#undef MEMSUFFIX
-#endif
/* 64 bits arithmetic */
#if TARGET_LONG_BITS > HOST_LONG_BITS
Index: qemu-work/target-mips/op_mem.c
===================================================================
--- qemu-work.orig/target-mips/op_mem.c 2008-06-16 07:32:12.000000000 +0100
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
@@ -1,269 +0,0 @@
-/*
- * MIPS emulation memory micro-operations for qemu.
- *
- * Copyright (c) 2004-2005 Jocelyn Mayer
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- */
-
-/* "half" load and stores. We must do the memory access inline,
- or fault handling won't work. */
-
-#ifdef TARGET_WORDS_BIGENDIAN
-#define GET_LMASK(v) ((v) & 3)
-#define GET_OFFSET(addr, offset) (addr + (offset))
-#else
-#define GET_LMASK(v) (((v) & 3) ^ 3)
-#define GET_OFFSET(addr, offset) (addr - (offset))
-#endif
-
-void glue(op_lwl, MEMSUFFIX) (void)
-{
- target_ulong tmp;
-
- tmp = glue(ldub, MEMSUFFIX)(T0);
- T1 = (T1 & 0x00FFFFFF) | (tmp << 24);
-
- if (GET_LMASK(T0) <= 2) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 1));
- T1 = (T1 & 0xFF00FFFF) | (tmp << 16);
- }
-
- if (GET_LMASK(T0) <= 1) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 2));
- T1 = (T1 & 0xFFFF00FF) | (tmp << 8);
- }
-
- if (GET_LMASK(T0) == 0) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 3));
- T1 = (T1 & 0xFFFFFF00) | tmp;
- }
- T1 = (int32_t)T1;
- FORCE_RET();
-}
-
-void glue(op_lwr, MEMSUFFIX) (void)
-{
- target_ulong tmp;
-
- tmp = glue(ldub, MEMSUFFIX)(T0);
- T1 = (T1 & 0xFFFFFF00) | tmp;
-
- if (GET_LMASK(T0) >= 1) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -1));
- T1 = (T1 & 0xFFFF00FF) | (tmp << 8);
- }
-
- if (GET_LMASK(T0) >= 2) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -2));
- T1 = (T1 & 0xFF00FFFF) | (tmp << 16);
- }
-
- if (GET_LMASK(T0) == 3) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -3));
- T1 = (T1 & 0x00FFFFFF) | (tmp << 24);
- }
- T1 = (int32_t)T1;
- FORCE_RET();
-}
-
-void glue(op_swl, MEMSUFFIX) (void)
-{
- glue(stb, MEMSUFFIX)(T0, (uint8_t)(T1 >> 24));
-
- if (GET_LMASK(T0) <= 2)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 1), (uint8_t)(T1 >> 16));
-
- if (GET_LMASK(T0) <= 1)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 2), (uint8_t)(T1 >> 8));
-
- if (GET_LMASK(T0) == 0)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 3), (uint8_t)T1);
-
- FORCE_RET();
-}
-
-void glue(op_swr, MEMSUFFIX) (void)
-{
- glue(stb, MEMSUFFIX)(T0, (uint8_t)T1);
-
- if (GET_LMASK(T0) >= 1)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -1), (uint8_t)(T1 >> 8));
-
- if (GET_LMASK(T0) >= 2)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -2), (uint8_t)(T1 >> 16));
-
- if (GET_LMASK(T0) == 3)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -3), (uint8_t)(T1 >> 24));
-
- FORCE_RET();
-}
-
-#if defined(TARGET_MIPS64)
-/* "half" load and stores. We must do the memory access inline,
- or fault handling won't work. */
-
-#ifdef TARGET_WORDS_BIGENDIAN
-#define GET_LMASK64(v) ((v) & 7)
-#else
-#define GET_LMASK64(v) (((v) & 7) ^ 7)
-#endif
-
-void glue(op_ldl, MEMSUFFIX) (void)
-{
- uint64_t tmp;
-
- tmp = glue(ldub, MEMSUFFIX)(T0);
- T1 = (T1 & 0x00FFFFFFFFFFFFFFULL) | (tmp << 56);
-
- if (GET_LMASK64(T0) <= 6) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 1));
- T1 = (T1 & 0xFF00FFFFFFFFFFFFULL) | (tmp << 48);
- }
-
- if (GET_LMASK64(T0) <= 5) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 2));
- T1 = (T1 & 0xFFFF00FFFFFFFFFFULL) | (tmp << 40);
- }
-
- if (GET_LMASK64(T0) <= 4) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 3));
- T1 = (T1 & 0xFFFFFF00FFFFFFFFULL) | (tmp << 32);
- }
-
- if (GET_LMASK64(T0) <= 3) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 4));
- T1 = (T1 & 0xFFFFFFFF00FFFFFFULL) | (tmp << 24);
- }
-
- if (GET_LMASK64(T0) <= 2) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 5));
- T1 = (T1 & 0xFFFFFFFFFF00FFFFULL) | (tmp << 16);
- }
-
- if (GET_LMASK64(T0) <= 1) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 6));
- T1 = (T1 & 0xFFFFFFFFFFFF00FFULL) | (tmp << 8);
- }
-
- if (GET_LMASK64(T0) == 0) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, 7));
- T1 = (T1 & 0xFFFFFFFFFFFFFF00ULL) | tmp;
- }
-
- FORCE_RET();
-}
-
-void glue(op_ldr, MEMSUFFIX) (void)
-{
- uint64_t tmp;
-
- tmp = glue(ldub, MEMSUFFIX)(T0);
- T1 = (T1 & 0xFFFFFFFFFFFFFF00ULL) | tmp;
-
- if (GET_LMASK64(T0) >= 1) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -1));
- T1 = (T1 & 0xFFFFFFFFFFFF00FFULL) | (tmp << 8);
- }
-
- if (GET_LMASK64(T0) >= 2) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -2));
- T1 = (T1 & 0xFFFFFFFFFF00FFFFULL) | (tmp << 16);
- }
-
- if (GET_LMASK64(T0) >= 3) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -3));
- T1 = (T1 & 0xFFFFFFFF00FFFFFFULL) | (tmp << 24);
- }
-
- if (GET_LMASK64(T0) >= 4) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -4));
- T1 = (T1 & 0xFFFFFF00FFFFFFFFULL) | (tmp << 32);
- }
-
- if (GET_LMASK64(T0) >= 5) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -5));
- T1 = (T1 & 0xFFFF00FFFFFFFFFFULL) | (tmp << 40);
- }
-
- if (GET_LMASK64(T0) >= 6) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -6));
- T1 = (T1 & 0xFF00FFFFFFFFFFFFULL) | (tmp << 48);
- }
-
- if (GET_LMASK64(T0) == 7) {
- tmp = glue(ldub, MEMSUFFIX)(GET_OFFSET(T0, -7));
- T1 = (T1 & 0x00FFFFFFFFFFFFFFULL) | (tmp << 56);
- }
-
- FORCE_RET();
-}
-
-void glue(op_sdl, MEMSUFFIX) (void)
-{
- glue(stb, MEMSUFFIX)(T0, (uint8_t)(T1 >> 56));
-
- if (GET_LMASK64(T0) <= 6)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 1), (uint8_t)(T1 >> 48));
-
- if (GET_LMASK64(T0) <= 5)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 2), (uint8_t)(T1 >> 40));
-
- if (GET_LMASK64(T0) <= 4)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 3), (uint8_t)(T1 >> 32));
-
- if (GET_LMASK64(T0) <= 3)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 4), (uint8_t)(T1 >> 24));
-
- if (GET_LMASK64(T0) <= 2)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 5), (uint8_t)(T1 >> 16));
-
- if (GET_LMASK64(T0) <= 1)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 6), (uint8_t)(T1 >> 8));
-
- if (GET_LMASK64(T0) <= 0)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, 7), (uint8_t)T1);
-
- FORCE_RET();
-}
-
-void glue(op_sdr, MEMSUFFIX) (void)
-{
- glue(stb, MEMSUFFIX)(T0, (uint8_t)T1);
-
- if (GET_LMASK64(T0) >= 1)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -1), (uint8_t)(T1 >> 8));
-
- if (GET_LMASK64(T0) >= 2)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -2), (uint8_t)(T1 >> 16));
-
- if (GET_LMASK64(T0) >= 3)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -3), (uint8_t)(T1 >> 24));
-
- if (GET_LMASK64(T0) >= 4)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -4), (uint8_t)(T1 >> 32));
-
- if (GET_LMASK64(T0) >= 5)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -5), (uint8_t)(T1 >> 40));
-
- if (GET_LMASK64(T0) >= 6)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -6), (uint8_t)(T1 >> 48));
-
- if (GET_LMASK64(T0) == 7)
- glue(stb, MEMSUFFIX)(GET_OFFSET(T0, -7), (uint8_t)(T1 >> 56));
-
- FORCE_RET();
-}
-#endif /* TARGET_MIPS64 */
Index: qemu-work/target-mips/translate.c
===================================================================
--- qemu-work.orig/target-mips/translate.c 2008-06-17 06:43:05.000000000
+0100
+++ qemu-work/target-mips/translate.c 2008-06-17 06:43:36.000000000 +0100
@@ -930,37 +930,6 @@
}
/* load/store instructions. */
-#if defined(CONFIG_USER_ONLY)
-#define op_ldst(name) gen_op_##name##_raw()
-#define OP_LD_TABLE(width)
-#define OP_ST_TABLE(width)
-#else
-#define op_ldst(name) (*gen_op_##name[ctx->mem_idx])()
-#define OP_LD_TABLE(width) \
-static GenOpFunc *gen_op_l##width[] = { \
- &gen_op_l##width##_kernel, \
- &gen_op_l##width##_super, \
- &gen_op_l##width##_user, \
-}
-#define OP_ST_TABLE(width) \
-static GenOpFunc *gen_op_s##width[] = { \
- &gen_op_s##width##_kernel, \
- &gen_op_s##width##_super, \
- &gen_op_s##width##_user, \
-}
-#endif
-
-#if defined(TARGET_MIPS64)
-OP_LD_TABLE(dl);
-OP_LD_TABLE(dr);
-OP_ST_TABLE(dl);
-OP_ST_TABLE(dr);
-#endif
-OP_LD_TABLE(wl);
-OP_LD_TABLE(wr);
-OP_ST_TABLE(wl);
-OP_ST_TABLE(wr);
-
#define OP_LD(insn,fname) \
void inline op_ldst_##insn(DisasContext *ctx) \
{ \
@@ -1032,6 +1001,486 @@
#endif
#undef OP_ST_ATOMIC
+/* "half" load and stores. We must do the memory access inline,
+ or fault handling won't work. */
+void inline get_lmask (TCGv ret, TCGv val)
+{
+ tcg_gen_andi_tl(ret, val, 3);
+#ifndef TARGET_WORDS_BIGENDIAN
+ tcg_gen_xori_tl(ret, ret, 3);
+#endif
+}
+
+void inline get_offset (TCGv ret, TCGv val, target_ulong off)
+{
+#ifdef TARGET_WORDS_BIGENDIAN
+ tcg_gen_addi_tl(ret, val, off);
+#else
+ tcg_gen_subi_tl(ret, val, off);
+#endif
+}
+
+void inline gen_lwl (DisasContext *ctx)
+{
+ TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp3 = tcg_temp_local_new(TCG_TYPE_TL);
+ int l1 = gen_new_label();
+ int l2 = gen_new_label();
+ int l3 = gen_new_label();
+
+ get_lmask(r_mask, cpu_T[0]);
+ tcg_gen_qemu_ld8u(r_tmp1, cpu_T[0], ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 24);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0x00ffffff);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 2, l1);
+ get_offset(r_tmp2, cpu_T[0], 1);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 16);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xff00ffff);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l1);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 1, l2);
+ get_offset(r_tmp2, cpu_T[0], 2);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 8);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffff00ff);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l2);
+
+ tcg_gen_brcondi_tl(TCG_COND_NE, r_mask, 0, l3);
+ get_offset(r_tmp2, cpu_T[0], 3);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffff00);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp1);
+ gen_set_label(l3);
+
+ tcg_temp_free(r_mask);
+ tcg_temp_free(r_tmp1);
+ tcg_temp_free(r_tmp2);
+ tcg_temp_free(r_tmp3);
+ tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
+}
+
+void inline gen_lwr (DisasContext *ctx)
+{
+ TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp3 = tcg_temp_local_new(TCG_TYPE_TL);
+ int l1 = gen_new_label();
+ int l2 = gen_new_label();
+ int l3 = gen_new_label();
+
+ get_lmask(r_mask, cpu_T[0]);
+ tcg_gen_qemu_ld8u(r_tmp1, cpu_T[0], ctx->mem_idx);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffff00);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp1);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 1, l1);
+ get_offset(r_tmp2, cpu_T[0], -1);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 8);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffff00ff);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l1);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 2, l2);
+ get_offset(r_tmp2, cpu_T[0], -2);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 16);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xff00ffff);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l2);
+
+ tcg_gen_brcondi_tl(TCG_COND_NE, r_mask, 3, l3);
+ get_offset(r_tmp2, cpu_T[0], -3);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 24);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0x00ffffff);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l3);
+
+ tcg_temp_free(r_mask);
+ tcg_temp_free(r_tmp1);
+ tcg_temp_free(r_tmp2);
+ tcg_temp_free(r_tmp3);
+ tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
+}
+
+void inline gen_swl (DisasContext *ctx)
+{
+ TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+ int l1 = gen_new_label();
+ int l2 = gen_new_label();
+ int l3 = gen_new_label();
+
+ get_lmask(r_mask, cpu_T[0]);
+ tcg_gen_shri_tl(r_tmp1, cpu_T[1], 24);
+ tcg_gen_qemu_st8(r_tmp1, cpu_T[0], ctx->mem_idx);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 2, l1);
+ get_offset(r_tmp1, cpu_T[0], 1);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 16);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l1);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 1, l2);
+ get_offset(r_tmp1, cpu_T[0], 2);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 8);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l2);
+
+ tcg_gen_brcondi_tl(TCG_COND_NE, r_mask, 0, l3);
+ get_offset(r_tmp1, cpu_T[0], 3);
+ tcg_gen_qemu_st8(cpu_T[1], r_tmp1, ctx->mem_idx);
+ gen_set_label(l3);
+
+ tcg_temp_free(r_mask);
+ tcg_temp_free(r_tmp1);
+ tcg_temp_free(r_tmp2);
+}
+
+void inline gen_swr (DisasContext *ctx)
+{
+ TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+ int l1 = gen_new_label();
+ int l2 = gen_new_label();
+ int l3 = gen_new_label();
+
+ get_lmask(r_mask, cpu_T[0]);
+ tcg_gen_qemu_st8(cpu_T[1], cpu_T[0], ctx->mem_idx);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 1, l1);
+ get_offset(r_tmp1, cpu_T[0], -1);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 8);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l1);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 2, l2);
+ get_offset(r_tmp1, cpu_T[0], -2);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 16);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l2);
+
+ tcg_gen_brcondi_tl(TCG_COND_NE, r_mask, 3, l3);
+ get_offset(r_tmp1, cpu_T[0], -3);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 24);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l3);
+
+ tcg_temp_free(r_mask);
+ tcg_temp_free(r_tmp1);
+ tcg_temp_free(r_tmp2);
+}
+
+#if defined(TARGET_MIPS64)
+
+void inline get_lmask64 (TCGv ret, TCGv val)
+{
+ tcg_gen_andi_tl(ret, val, 7);
+#ifndef TARGET_WORDS_BIGENDIAN
+ tcg_gen_xori_tl(ret, ret, 7);
+#endif
+}
+
+void inline gen_ldl (DisasContext *ctx)
+{
+ TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp3 = tcg_temp_local_new(TCG_TYPE_TL);
+ int l1 = gen_new_label();
+ int l2 = gen_new_label();
+ int l3 = gen_new_label();
+ int l4 = gen_new_label();
+ int l5 = gen_new_label();
+ int l6 = gen_new_label();
+ int l7 = gen_new_label();
+
+ get_lmask64(r_mask, cpu_T[0]);
+ tcg_gen_qemu_ld8u(r_tmp1, cpu_T[0], ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 56);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0x00ffffffffffffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 6, l1);
+ get_offset(r_tmp2, cpu_T[0], 1);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 48);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xff00ffffffffffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l1);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 5, l2);
+ get_offset(r_tmp2, cpu_T[0], 2);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 40);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffff00ffffffffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l2);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 4, l3);
+ get_offset(r_tmp2, cpu_T[0], 3);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 32);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffff00ffffffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l3);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 3, l4);
+ get_offset(r_tmp2, cpu_T[0], 4);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 24);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffffff00ffffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l4);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 2, l5);
+ get_offset(r_tmp2, cpu_T[0], 5);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 16);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffffffff00ffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l5);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 1, l6);
+ get_offset(r_tmp2, cpu_T[0], 6);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 8);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffffffffff00ffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l6);
+
+ tcg_gen_brcondi_tl(TCG_COND_NE, r_mask, 0, l7);
+ get_offset(r_tmp2, cpu_T[0], 7);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffffffffffff00ULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp1);
+ gen_set_label(l7);
+
+ tcg_temp_free(r_mask);
+ tcg_temp_free(r_tmp1);
+ tcg_temp_free(r_tmp2);
+ tcg_temp_free(r_tmp3);
+}
+
+void inline gen_ldr (DisasContext *ctx)
+{
+ TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp3 = tcg_temp_local_new(TCG_TYPE_TL);
+ int l1 = gen_new_label();
+ int l2 = gen_new_label();
+ int l3 = gen_new_label();
+ int l4 = gen_new_label();
+ int l5 = gen_new_label();
+ int l6 = gen_new_label();
+ int l7 = gen_new_label();
+
+ get_lmask64(r_mask, cpu_T[0]);
+ tcg_gen_qemu_ld8u(r_tmp1, cpu_T[0], ctx->mem_idx);
+ tcg_gen_andi_tl(r_tmp2, cpu_T[1], 0xffffffffffffff00ULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp2, r_tmp1);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 1, l1);
+ get_offset(r_tmp2, cpu_T[0], -1);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 8);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffffffffff00ffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l1);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 2, l2);
+ get_offset(r_tmp2, cpu_T[0], -2);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 16);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffffffff00ffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l2);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 3, l3);
+ get_offset(r_tmp2, cpu_T[0], -3);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 24);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffffff00ffffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l3);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 4, l4);
+ get_offset(r_tmp2, cpu_T[0], -4);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 32);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffffff00ffffffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l4);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 5, l5);
+ get_offset(r_tmp2, cpu_T[0], -5);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 40);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xffff00ffffffffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l5);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 6, l6);
+ get_offset(r_tmp2, cpu_T[0], -6);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 48);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0xff00ffffffffffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l6);
+
+ tcg_gen_brcondi_tl(TCG_COND_NE, r_mask, 7, l7);
+ get_offset(r_tmp2, cpu_T[0], -7);
+ tcg_gen_qemu_ld8u(r_tmp1, r_tmp2, ctx->mem_idx);
+ tcg_gen_shli_tl(r_tmp2, r_tmp1, 56);
+ tcg_gen_andi_tl(r_tmp3, cpu_T[1], 0x00ffffffffffffffULL);
+ tcg_gen_or_tl(cpu_T[1], r_tmp3, r_tmp2);
+ gen_set_label(l7);
+
+ tcg_temp_free(r_mask);
+ tcg_temp_free(r_tmp1);
+ tcg_temp_free(r_tmp2);
+ tcg_temp_free(r_tmp3);
+}
+
+void inline gen_sdl (DisasContext *ctx)
+{
+ TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+ int l1 = gen_new_label();
+ int l2 = gen_new_label();
+ int l3 = gen_new_label();
+ int l4 = gen_new_label();
+ int l5 = gen_new_label();
+ int l6 = gen_new_label();
+ int l7 = gen_new_label();
+
+ get_lmask64(r_mask, cpu_T[0]);
+ tcg_gen_shri_tl(r_tmp1, cpu_T[1], 56);
+ tcg_gen_qemu_st8(r_tmp1, cpu_T[0], ctx->mem_idx);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 6, l1);
+ get_offset(r_tmp1, cpu_T[0], 1);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 48);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l1);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 5, l2);
+ get_offset(r_tmp1, cpu_T[0], 2);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 40);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l2);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 4, l3);
+ get_offset(r_tmp1, cpu_T[0], 3);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 32);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l3);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 3, l4);
+ get_offset(r_tmp1, cpu_T[0], 4);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 24);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l4);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 3, l5);
+ get_offset(r_tmp1, cpu_T[0], 5);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 16);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l5);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 2, l6);
+ get_offset(r_tmp1, cpu_T[0], 6);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 8);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l6);
+
+ tcg_gen_brcondi_tl(TCG_COND_GT, r_mask, 1, l7);
+ get_offset(r_tmp1, cpu_T[0], 7);
+ tcg_gen_qemu_st8(cpu_T[1], r_tmp1, ctx->mem_idx);
+ gen_set_label(l7);
+
+ tcg_temp_free(r_mask);
+ tcg_temp_free(r_tmp1);
+ tcg_temp_free(r_tmp2);
+}
+
+void inline gen_sdr (DisasContext *ctx)
+{
+ TCGv r_mask = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_TL);
+ TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_TL);
+ int l1 = gen_new_label();
+ int l2 = gen_new_label();
+ int l3 = gen_new_label();
+ int l4 = gen_new_label();
+ int l5 = gen_new_label();
+ int l6 = gen_new_label();
+ int l7 = gen_new_label();
+
+ get_lmask64(r_mask, cpu_T[0]);
+ tcg_gen_qemu_st8(cpu_T[1], cpu_T[0], ctx->mem_idx);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 1, l1);
+ get_offset(r_tmp1, cpu_T[0], -1);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 8);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l1);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 2, l2);
+ get_offset(r_tmp1, cpu_T[0], -2);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 16);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l2);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 3, l3);
+ get_offset(r_tmp1, cpu_T[0], -3);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 24);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l3);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 4, l4);
+ get_offset(r_tmp1, cpu_T[0], -4);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 32);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l4);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 5, l5);
+ get_offset(r_tmp1, cpu_T[0], -5);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 40);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l5);
+
+ tcg_gen_brcondi_tl(TCG_COND_LT, r_mask, 6, l6);
+ get_offset(r_tmp1, cpu_T[0], -6);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 48);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l6);
+
+ tcg_gen_brcondi_tl(TCG_COND_NE, r_mask, 7, l7);
+ get_offset(r_tmp1, cpu_T[0], -7);
+ tcg_gen_shri_tl(r_tmp2, cpu_T[1], 56);
+ tcg_gen_qemu_st8(r_tmp2, r_tmp1, ctx->mem_idx);
+ gen_set_label(l7);
+
+ tcg_temp_free(r_mask);
+ tcg_temp_free(r_tmp1);
+ tcg_temp_free(r_tmp2);
+}
+#endif /* TARGET_MIPS64 */
+
/* Load and store */
static void gen_ldst (DisasContext *ctx, uint32_t opc, int rt,
int base, int16_t offset)
@@ -1080,24 +1529,24 @@
break;
case OPC_LDL:
gen_load_gpr(cpu_T[1], rt);
- op_ldst(ldl);
+ gen_ldl(ctx);
gen_store_gpr(cpu_T[1], rt);
opn = "ldl";
break;
case OPC_SDL:
gen_load_gpr(cpu_T[1], rt);
- op_ldst(sdl);
+ gen_sdl(ctx);
opn = "sdl";
break;
case OPC_LDR:
gen_load_gpr(cpu_T[1], rt);
- op_ldst(ldr);
+ gen_ldr(ctx);
gen_store_gpr(cpu_T[1], rt);
opn = "ldr";
break;
case OPC_SDR:
gen_load_gpr(cpu_T[1], rt);
- op_ldst(sdr);
+ gen_sdr(ctx);
opn = "sdr";
break;
#endif
@@ -1143,24 +1592,24 @@
break;
case OPC_LWL:
gen_load_gpr(cpu_T[1], rt);
- op_ldst(lwl);
+ gen_lwl(ctx);
gen_store_gpr(cpu_T[1], rt);
opn = "lwl";
break;
case OPC_SWL:
gen_load_gpr(cpu_T[1], rt);
- op_ldst(swl);
- opn = "swr";
+ gen_swl(ctx);
+ opn = "swl";
break;
case OPC_LWR:
gen_load_gpr(cpu_T[1], rt);
- op_ldst(lwr);
+ gen_lwr(ctx);
gen_store_gpr(cpu_T[1], rt);
opn = "lwr";
break;
case OPC_SWR:
gen_load_gpr(cpu_T[1], rt);
- op_ldst(swr);
+ gen_swr(ctx);
opn = "swr";
break;
case OPC_LL:
Index: qemu-work/exec-all.h
===================================================================
--- qemu-work.orig/exec-all.h 2008-06-17 06:47:25.000000000 +0100
+++ qemu-work/exec-all.h 2008-06-17 06:47:53.000000000 +0100
@@ -30,7 +30,7 @@
struct TranslationBlock;
/* XXX: make safe guess about sizes */
-#define MAX_OP_PER_INSTR 64
+#define MAX_OP_PER_INSTR 256
/* A Call op needs up to 6 + 2N parameters (N = number of arguments). */
#define MAX_OPC_PARAM 10
#define OPC_BUF_SIZE 512
- [Qemu-devel] Wanted: A better way to implement MIPS unaligned instructions,
Thiemo Seufer <=