[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] TCG native 32->64 concatenation
From: |
Paul Brook |
Subject: |
[Qemu-devel] TCG native 32->64 concatenation |
Date: |
Sun, 7 Sep 2008 17:53:26 +0100 |
User-agent: |
KMail/1.9.9 |
The patch below adds a new concat_i32_i64 TCG op. This allows a pair of
32-bit values to be efficiently combined to form a 64-bit value. I've
converted all the cases I could find to use this, and tested the arm code on
both 32 and 64-bit hosts.
This touches bits of code that I can't easily test well, so I'd appreciate
another pair of eyes looking over it before I commit.
Signed-off-by: Paul Brook <address@hidden>
Index: target-sh4/translate.c
===================================================================
--- target-sh4/translate.c (revision 5178)
+++ target-sh4/translate.c (working copy)
@@ -393,15 +393,12 @@ static inline void gen_load_fpr32(TCGv t
static inline void gen_load_fpr64(TCGv t, int reg)
{
TCGv tmp1 = tcg_temp_new(TCG_TYPE_I32);
- TCGv tmp2 = tcg_temp_new(TCG_TYPE_I64);
+ TCGv tmp2 = tcg_temp_new(TCG_TYPE_I32);
tcg_gen_ld_i32(tmp1, cpu_env, offsetof(CPUState, fregs[reg]));
- tcg_gen_extu_i32_i64(t, tmp1);
- tcg_gen_shli_i64(t, t, 32);
- tcg_gen_ld_i32(tmp1, cpu_env, offsetof(CPUState, fregs[reg + 1]));
- tcg_gen_extu_i32_i64(tmp2, tmp1);
+ tcg_gen_ld_i32(tmp2, cpu_env, offsetof(CPUState, fregs[reg + 1]));
+ tcg_gen_concat_i32_i64(t, tmp2, tmp1);
tcg_temp_free(tmp1);
- tcg_gen_or_i64(t, t, tmp2);
tcg_temp_free(tmp2);
}
Index: target-ppc/translate.c
===================================================================
--- target-ppc/translate.c (revision 5178)
+++ target-ppc/translate.c (working copy)
@@ -5308,12 +5308,7 @@ static always_inline void gen_load_gpr64
#if defined(TARGET_PPC64)
tcg_gen_mov_i64(t, cpu_gpr[reg]);
#else
- tcg_gen_extu_i32_i64(t, cpu_gprh[reg]);
- tcg_gen_shli_i64(t, t, 32);
- TCGv tmp = tcg_temp_local_new(TCG_TYPE_I64);
- tcg_gen_extu_i32_i64(tmp, cpu_gpr[reg]);
- tcg_gen_or_i64(t, t, tmp);
- tcg_temp_free(tmp);
+ tcg_gen_concat_i32_i64(t, cpu_gpr[reg], cpu_gprh[reg]);
#endif
}
Index: target-mips/translate.c
===================================================================
--- target-mips/translate.c (revision 5178)
+++ target-mips/translate.c (working copy)
@@ -666,14 +666,11 @@ static inline void gen_load_fpr64 (Disas
tcg_gen_ld_i64(t, current_fpu, 8 * reg);
} else {
TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I32);
- TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
+ TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I32);
tcg_gen_ld_i32(r_tmp1, current_fpu, 8 * (reg | 1) + 4 *
FP_ENDIAN_IDX);
- tcg_gen_extu_i32_i64(t, r_tmp1);
- tcg_gen_shli_i64(t, t, 32);
- tcg_gen_ld_i32(r_tmp1, current_fpu, 8 * (reg & ~1) + 4 *
FP_ENDIAN_IDX);
- tcg_gen_extu_i32_i64(r_tmp2, r_tmp1);
- tcg_gen_or_i64(t, t, r_tmp2);
+ tcg_gen_ld_i32(r_tmp2, current_fpu, 8 * (reg & ~1) + 4 *
FP_ENDIAN_IDX);
+ tcg_gen_concat_i32_i64(t, r_tmp2, r_tmp1);
tcg_temp_free(r_tmp1);
tcg_temp_free(r_tmp2);
}
@@ -6531,22 +6528,17 @@ static void gen_farith (DisasContext *ct
case FOP(38, 16):
check_cp1_64bitmode(ctx);
{
- TCGv fp64_0 = tcg_temp_new(TCG_TYPE_I64);
- TCGv fp64_1 = tcg_temp_new(TCG_TYPE_I64);
+ TCGv fp64 = tcg_temp_new(TCG_TYPE_I64);
TCGv fp32_0 = tcg_temp_new(TCG_TYPE_I32);
TCGv fp32_1 = tcg_temp_new(TCG_TYPE_I32);
gen_load_fpr32(fp32_0, fs);
gen_load_fpr32(fp32_1, ft);
- tcg_gen_extu_i32_i64(fp64_0, fp32_0);
- tcg_gen_extu_i32_i64(fp64_1, fp32_1);
- tcg_temp_free(fp32_0);
+ tcg_gen_concat_i32_i64(fp64, fp32_0, fp32_1);
tcg_temp_free(fp32_1);
- tcg_gen_shli_i64(fp64_1, fp64_1, 32);
- tcg_gen_or_i64(fp64_0, fp64_0, fp64_1);
- tcg_temp_free(fp64_1);
- gen_store_fpr64(ctx, fp64_0, fd);
- tcg_temp_free(fp64_0);
+ tcg_temp_free(fp32_0);
+ gen_store_fpr64(ctx, fp64, fd);
+ tcg_temp_free(fp64);
}
opn = "cvt.ps.s";
break;
Index: tcg/tcg-op.h
===================================================================
--- tcg/tcg-op.h (revision 5178)
+++ tcg/tcg-op.h (working copy)
@@ -1395,6 +1395,23 @@ static inline void tcg_gen_discard_i64(T
}
#endif
+static inline void tcg_gen_concat_i32_i64(TCGv dest, TCGv low, TCGv high)
+{
+#if TCG_TARGET_REG_BITS == 32
+ tcg_gen_mov_i32(dest, low);
+ tcg_gen_mov_i32(TCGV_HIGH(dest), high);
+#else
+ TCGv tmp = tcg_temp_new (TCG_TYPE_I64);
+ /* This extension is only needed for type correctness.
+ We may be able to do better given target specific information. */
+ tcg_gen_extu_i32_i64(tmp, high);
+ tcg_gen_shli_i64(tmp, tmp, 32);
+ tcg_gen_extu_i32_i64(dest, low);
+ tcg_gen_or_i64(dest, dest, tmp);
+ tcg_temp_free(tmp);
+#endif
+}
+
/***************************************/
/* QEMU specific operations. Their type depend on the QEMU CPU
type. */
Index: tcg/README
===================================================================
--- tcg/README (revision 5178)
+++ tcg/README (working copy)
@@ -265,6 +265,10 @@ Convert t1 (32 bit) to t0 (64 bit) and d
* trunc_i64_i32 t0, t1
Truncate t1 (64 bit) to t0 (32 bit)
+* concat_i32_i64 t0, t1, t2
+Construct t0 (64-bit) taking the low half from t1 (32 bit) and the high half
+from t2 (32 bit).
+
********* Load/Store
* ld_i32/i64 t0, t1, offset
Index: target-arm/translate.c
===================================================================
--- target-arm/translate.c (revision 5178)
+++ target-arm/translate.c (working copy)
@@ -1447,10 +1447,7 @@ static void gen_iwmmxt_movl_T0_T1_wRn(in
static void gen_iwmmxt_movl_wRn_T0_T1(int rn)
{
- tcg_gen_extu_i32_i64(cpu_V0, cpu_T[0]);
- tcg_gen_extu_i32_i64(cpu_V1, cpu_T[0]);
- tcg_gen_shli_i64(cpu_V1, cpu_V1, 32);
- tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
+ tcg_gen_concat_i32_i64(cpu_V0, cpu_T[0], cpu_T[0]);
iwmmxt_store_reg(cpu_V0, rn);
}
@@ -4663,14 +4660,11 @@ static int disas_neon_data_insn(CPUState
} else {
tmp = neon_load_reg(rm + pass, 0);
gen_neon_shift_narrow(size, tmp, tmp2, q, u);
- tcg_gen_extu_i32_i64(cpu_V0, tmp);
+ tmp3 = neon_load_reg(rm + pass, 1);
+ gen_neon_shift_narrow(size, tmp3, tmp2, q, u);
+ tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
dead_tmp(tmp);
- tmp = neon_load_reg(rm + pass, 1);
- gen_neon_shift_narrow(size, tmp, tmp2, q, u);
- tcg_gen_extu_i32_i64(cpu_V1, tmp);
- dead_tmp(tmp);
- tcg_gen_shli_i64(cpu_V1, cpu_V1, 32);
- tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
+ dead_tmp(tmp3);
}
tmp = new_tmp();
if (op == 8 && !u) {
@@ -5600,7 +5594,7 @@ static void gen_addq_lo(DisasContext *s,
TCGv tmp;
TCGv tmp2;
- /* Load 64-bit value rd:rn. */
+ /* Load value and extend to 64 bits. */
tmp = tcg_temp_new(TCG_TYPE_I64);
tmp2 = load_reg(s, rlow);
tcg_gen_extu_i32_i64(tmp, tmp2);
@@ -5612,19 +5606,16 @@ static void gen_addq_lo(DisasContext *s,
static void gen_addq(DisasContext *s, TCGv val, int rlow, int rhigh)
{
TCGv tmp;
- TCGv tmp2;
+ TCGv tmpl;
+ TCGv tmph;
/* Load 64-bit value rd:rn. */
+ tmpl = load_reg(s, rlow);
+ tmph = load_reg(s, rhigh);
tmp = tcg_temp_new(TCG_TYPE_I64);
- tmp2 = load_reg(s, rhigh);
- tcg_gen_extu_i32_i64(tmp, tmp2);
- dead_tmp(tmp2);
- tcg_gen_shli_i64(tmp, tmp, 32);
- tcg_gen_add_i64(val, val, tmp);
-
- tmp2 = load_reg(s, rlow);
- tcg_gen_extu_i32_i64(tmp, tmp2);
- dead_tmp(tmp2);
+ tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
+ dead_tmp(tmpl);
+ dead_tmp(tmph);
tcg_gen_add_i64(val, val, tmp);
}
- [Qemu-devel] TCG native 32->64 concatenation,
Paul Brook <=