[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 007/126] target-s390: Use TCG registers for FPR
From: |
Aurelien Jarno |
Subject: |
Re: [Qemu-devel] [PATCH 007/126] target-s390: Use TCG registers for FPR |
Date: |
Mon, 10 Sep 2012 16:34:22 +0200 |
User-agent: |
Mutt/1.5.21 (2010-09-15) |
On Sun, Sep 09, 2012 at 02:04:25PM -0700, Richard Henderson wrote:
> At the same time, tidy other usages of tcg_gen_deposit_i64.
> In some cases we can "type cast" rather than extend, and in
> others we can allow tcg_gen_deposit_i64 itself to optimize
> the HOST_LONG_BITS==32 case.
>
> Signed-off-by: Richard Henderson <address@hidden>
> ---
> target-s390x/translate.c | 68
> ++++++++++++++++++++++++++++--------------------
> 1 file changed, 40 insertions(+), 28 deletions(-)
>
> diff --git a/target-s390x/translate.c b/target-s390x/translate.c
> index 3080cef..bf35a65 100644
> --- a/target-s390x/translate.c
> +++ b/target-s390x/translate.c
> @@ -89,7 +89,7 @@ void cpu_dump_state(CPUS390XState *env, FILE *f,
> fprintf_function cpu_fprintf,
> }
>
> for (i = 0; i < 16; i++) {
> - cpu_fprintf(f, "F%02d=%016" PRIx64, i, *(uint64_t *)&env->fregs[i]);
> + cpu_fprintf(f, "F%02d=%016" PRIx64, i, env->fregs[i].ll);
> if ((i % 4) == 3) {
> cpu_fprintf(f, "\n");
> } else {
> @@ -136,21 +136,22 @@ static TCGv_i64 cc_src;
> static TCGv_i64 cc_dst;
> static TCGv_i64 cc_vr;
>
> -static char cpu_reg_names[10*3 + 6*4];
> +static char cpu_reg_names[32][4];
> static TCGv_i64 regs[16];
> +static TCGv_i64 fregs[16];
>
> static uint8_t gen_opc_cc_op[OPC_BUF_SIZE];
>
> void s390x_translate_init(void)
> {
> int i;
> - size_t cpu_reg_names_size = sizeof(cpu_reg_names);
> - char *p;
>
> cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
> - psw_addr = tcg_global_mem_new_i64(TCG_AREG0, offsetof(CPUS390XState,
> psw.addr),
> + psw_addr = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUS390XState, psw.addr),
> "psw_addr");
> - psw_mask = tcg_global_mem_new_i64(TCG_AREG0, offsetof(CPUS390XState,
> psw.mask),
> + psw_mask = tcg_global_mem_new_i64(TCG_AREG0,
> + offsetof(CPUS390XState, psw.mask),
> "psw_mask");
>
> cc_op = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUS390XState, cc_op),
> @@ -162,13 +163,18 @@ void s390x_translate_init(void)
> cc_vr = tcg_global_mem_new_i64(TCG_AREG0, offsetof(CPUS390XState, cc_vr),
> "cc_vr");
>
> - p = cpu_reg_names;
> for (i = 0; i < 16; i++) {
> - snprintf(p, cpu_reg_names_size, "r%d", i);
> + snprintf(cpu_reg_names[i], sizeof(cpu_reg_names[0]), "r%d", i);
> regs[i] = tcg_global_mem_new(TCG_AREG0,
> - offsetof(CPUS390XState, regs[i]), p);
> - p += (i < 10) ? 3 : 4;
> - cpu_reg_names_size -= (i < 10) ? 3 : 4;
> + offsetof(CPUS390XState, regs[i]),
> + cpu_reg_names[i]);
> + }
> +
> + for (i = 0; i < 16; i++) {
> + snprintf(cpu_reg_names[i + 16], sizeof(cpu_reg_names[0]), "f%d", i);
> + fregs[i] = tcg_global_mem_new(TCG_AREG0,
> + offsetof(CPUS390XState, fregs[i].d),
> + cpu_reg_names[i + 16]);
> }
> }
>
> @@ -182,14 +188,18 @@ static inline TCGv_i64 load_reg(int reg)
> static inline TCGv_i64 load_freg(int reg)
> {
> TCGv_i64 r = tcg_temp_new_i64();
> - tcg_gen_ld_i64(r, cpu_env, offsetof(CPUS390XState, fregs[reg].d));
> + tcg_gen_mov_i64(r, fregs[reg]);
> return r;
> }
>
> static inline TCGv_i32 load_freg32(int reg)
> {
> TCGv_i32 r = tcg_temp_new_i32();
> - tcg_gen_ld_i32(r, cpu_env, offsetof(CPUS390XState, fregs[reg].l.upper));
> +#if HOST_LONG_BITS == 32
> + tcg_gen_mov_i32(r, TCGV_HIGH(fregs[reg]));
> +#else
> + tcg_gen_shri_i64(MAKE_TCGV_I64(GET_TCGV_I32(r)), fregs[reg], 32);
> +#endif
> return r;
> }
>
> @@ -214,39 +224,35 @@ static inline void store_reg(int reg, TCGv_i64 v)
>
> static inline void store_freg(int reg, TCGv_i64 v)
> {
> - tcg_gen_st_i64(v, cpu_env, offsetof(CPUS390XState, fregs[reg].d));
> + tcg_gen_mov_i64(fregs[reg], v);
> }
>
> static inline void store_reg32(int reg, TCGv_i32 v)
> {
> + /* 32 bit register writes keep the upper half */
> #if HOST_LONG_BITS == 32
> tcg_gen_mov_i32(TCGV_LOW(regs[reg]), v);
> #else
> - TCGv_i64 tmp = tcg_temp_new_i64();
> - tcg_gen_extu_i32_i64(tmp, v);
> - /* 32 bit register writes keep the upper half */
> - tcg_gen_deposit_i64(regs[reg], regs[reg], tmp, 0, 32);
> - tcg_temp_free_i64(tmp);
> + tcg_gen_deposit_i64(regs[reg], regs[reg],
> + MAKE_TCGV_I64(GET_TCGV_I32(v)), 0, 32);
> #endif
> }
>
> static inline void store_reg32_i64(int reg, TCGv_i64 v)
> {
> /* 32 bit register writes keep the upper half */
> -#if HOST_LONG_BITS == 32
> - tcg_gen_mov_i32(TCGV_LOW(regs[reg]), TCGV_LOW(v));
> -#else
> tcg_gen_deposit_i64(regs[reg], regs[reg], v, 0, 32);
> -#endif
> }
>
> static inline void store_reg16(int reg, TCGv_i32 v)
> {
> - TCGv_i64 tmp = tcg_temp_new_i64();
> - tcg_gen_extu_i32_i64(tmp, v);
> /* 16 bit register writes keep the upper bytes */
> - tcg_gen_deposit_i64(regs[reg], regs[reg], tmp, 0, 16);
> - tcg_temp_free_i64(tmp);
> +#if HOST_LONG_BITS == 32
> + tcg_gen_deposit_i32(TCGV_LOW(regs[reg]), TCGV_LOW(regs[reg]), v, 0, 16);
> +#else
> + tcg_gen_deposit_i64(regs[reg], regs[reg],
> + MAKE_TCGV_I64(GET_TCGV_I32(v)), 0, 16);
> +#endif
> }
>
> static inline void store_reg8(int reg, TCGv_i64 v)
> @@ -257,7 +263,13 @@ static inline void store_reg8(int reg, TCGv_i64 v)
>
> static inline void store_freg32(int reg, TCGv_i32 v)
> {
> - tcg_gen_st_i32(v, cpu_env, offsetof(CPUS390XState, fregs[reg].l.upper));
> + /* 32 bit register writes keep the lower half */
> +#if HOST_LONG_BITS == 32
> + tcg_gen_mov_i32(TCGV_HIGH(fregs[reg]), v);
> +#else
> + tcg_gen_deposit_i64(fregs[reg], fregs[reg],
> + MAKE_TCGV_I64(GET_TCGV_I32(v)), 32, 32);
> +#endif
> }
>
I am not sure we want to start mixing different layers, and especially
having some assumptions about the host inside the target. That's why I
don't think we should have things like HOST_LONG_BITS in target-*/ and
even less MAKE_TCGV_I64() and GET_TCGV_I32().
Your code take the assumption that it's possible to do moves between 32
and 64-bit registers, which might not be guaranteed on some hosts. It's
fine taking such assumptions (as long as there is also a comment) in the
TCG code.
To handle conversion between 32- and 64-bit registers we have functions
like:
- concat_i32_i64
- extu_i32_i64
- ext_i32_i64
- trunc_i64_i32
These functions work on both 32- and 64-bit hosts, accessing directly
the high and low part on 32-bit hosts.
If it is not possible to implement your FPR code using these functions,
we might want to add some more, but I really thing it's a bad idea to
have this code in the targets.
--
Aurelien Jarno GPG: 1024D/F1BCDB73
address@hidden http://www.aurel32.net
- [Qemu-devel] [PATCH 000/126] Rewrite s390x translator, Richard Henderson, 2012/09/09
- [Qemu-devel] [PATCH 001/126] tcg: Add TCGV_IS_UNUSED_*, Richard Henderson, 2012/09/09
- [Qemu-devel] [PATCH 006/126] target-s390: Add missing temp_free in gen_op_calc_cc, Richard Henderson, 2012/09/09
- [Qemu-devel] [PATCH 002/126] tcg: Add TCG_COND_NEVER, TCG_COND_ALWAYS, Richard Henderson, 2012/09/09
- [Qemu-devel] [PATCH 004/126] target-s390: Fix disassembly of cpsdr, Richard Henderson, 2012/09/09
- [Qemu-devel] [PATCH 005/126] target-s390: Fix gdbstub, Richard Henderson, 2012/09/09
- [Qemu-devel] [PATCH 003/126] target-s390: Disassemble more z10 and z196 opcodes, Richard Henderson, 2012/09/09
- [Qemu-devel] [PATCH 007/126] target-s390: Use TCG registers for FPR, Richard Henderson, 2012/09/09
- Re: [Qemu-devel] [PATCH 007/126] target-s390: Use TCG registers for FPR,
Aurelien Jarno <=
- [Qemu-devel] [PATCH 009/126] target-s390: Split o ut disas_jcc, Richard Henderson, 2012/09/09
- [Qemu-devel] [PATCH 011/126] target-s390: Convert ADD HALFWORD, Richard Henderson, 2012/09/09
- [Qemu-devel] [PATCH 012/126] target-s390: Implement SUBTRACT HALFWORD, Richard Henderson, 2012/09/09
- [Qemu-devel] [PATCH 013/126] target-s390: Implement ADD LOGICAL WITH SIGNED IMMEDIATE, Richard Henderson, 2012/09/09
- [Qemu-devel] [PATCH 015/126] target-s390: Convert AND, OR, XOR, Richard Henderson, 2012/09/09
- [Qemu-devel] [PATCH 018/126] target-s390: Convert LOAD ADDRESS, Richard Henderson, 2012/09/09
- [Qemu-devel] [PATCH 017/126] target-s390: Convert LOAD, LOAD LOGICAL, Richard Henderson, 2012/09/09
- [Qemu-devel] [PATCH 014/126] target-s390: Convert MULTIPLY, Richard Henderson, 2012/09/09