[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v8 09/14] target-mips-ase-dsp: Add bit/manipulat
From: |
Aurelien Jarno |
Subject: |
Re: [Qemu-devel] [PATCH v8 09/14] target-mips-ase-dsp: Add bit/manipulation instructions |
Date: |
Tue, 18 Sep 2012 18:37:18 +0200 |
User-agent: |
Mutt/1.5.21 (2010-09-15) |
On Wed, Sep 12, 2012 at 10:01:50AM +0800, Jia Liu wrote:
> Add MIPS ASE DSP Bit/Manipulation instructions.
>
> Signed-off-by: Jia Liu <address@hidden>
> ---
> target-mips/dsp_helper.c | 75 +++++++++++++++
> target-mips/helper.h | 7 ++
> target-mips/translate.c | 229
> +++++++++++++++++++++++++++++++++++++++++++++-
> 3 files changed, 310 insertions(+), 1 deletion(-)
>
> diff --git a/target-mips/dsp_helper.c b/target-mips/dsp_helper.c
> index 5d0b1ed..8a89e8b 100644
> --- a/target-mips/dsp_helper.c
> +++ b/target-mips/dsp_helper.c
> @@ -5822,6 +5822,81 @@ void helper_dmsubu(CPUMIPSState *env,
> }
> #endif
>
> +/** DSP Bit/Manipulation Sub-class insns **/
> +target_ulong helper_bitrev(target_ulong rt)
> +{
> + int32_t temp;
> + uint32_t rd;
> + int i, last;
> +
> + temp = rt & MIPSDSP_LO;
> + rd = 0;
> + for (i = 0; i < 16; i++) {
> + last = temp % 2;
> + temp = temp >> 1;
> + rd = rd | (last << (15 - i));
> + }
> +
> + return (target_ulong)rd;
> +}
This looks like overcomplicated, and I am not sure using a modulo is the
fastest way to get the last bit.
You can do something like:
| rd = 0;
| for (i = 0; i < 16; i++) {
| rd = (rd << 1) | temp & 1;
| temp = temp >> 1;
| }
> +target_ulong helper_insv(CPUMIPSState *env, target_ulong rs, target_ulong rt)
> +{
> + uint32_t pos, size, msb, lsb, filter;
> + uint32_t temp, temprs, temprt;
> + target_ulong dspc;
> +
> + dspc = env->active_tc.DSPControl;
> + pos = dspc & 0x1F;
> + size = (dspc >> 7) & 0x1F;
> + msb = pos + size - 1;
> + lsb = pos;
> +
> + if (lsb > msb) {
> + return rt;
> + }
> +
> + filter = ((int32_t)0x01 << size) - 1;
> + filter = filter << pos;
> + temprs = rs & filter;
> + temprt = rt & ~filter;
> + temp = temprs | temprt;
> +
> + return (target_long)(int32_t)temp;
> +}
> +
> +#if defined(TARGET_MIPS64)
> +target_ulong helper_dinsv(CPUMIPSState *env, target_ulong rs, target_ulong
> rt)
> +{
> + target_ulong dspctrl;
> + target_ulong filter;
> + uint8_t pos, size;
> + uint8_t msb, lsb;
> + uint64_t temp;
> +
> + temp = rt;
> + dspctrl = env->active_tc.DSPControl;
> + pos = dspctrl & 0x7F;
> + size = (dspctrl >> 7) & 0x3F;
> +
> + msb = pos + size - 1;
> + lsb = pos;
> +
> + if ((lsb > msb) || (msb > 63)) {
> + return temp;
> + }
> +
> + temp = 0;
> + filter = ((target_ulong)0x01 << size) - 1;
> + filter = filter << pos;
> +
> + temp |= rs & filter;
> + temp |= rt & (~filter);
> +
> + return temp;
> +}
> +#endif
> +
> #undef MIPSDSP_LHI
> #undef MIPSDSP_LLO
> #undef MIPSDSP_HI
> diff --git a/target-mips/helper.h b/target-mips/helper.h
> index 5803fa5..e776fe9 100644
> --- a/target-mips/helper.h
> +++ b/target-mips/helper.h
> @@ -576,4 +576,11 @@ DEF_HELPER_FLAGS_4(dmsub, 0, void, env, tl, tl, i32)
> DEF_HELPER_FLAGS_4(dmsubu, 0, void, env, tl, tl, i32)
> #endif
>
> +/* DSP Bit/Manipulation Sub-class insns */
> +DEF_HELPER_FLAGS_1(bitrev, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
> +DEF_HELPER_FLAGS_3(insv, 0, tl, env, tl, tl)
> +#if defined(TARGET_MIPS64)
> +DEF_HELPER_FLAGS_3(dinsv, 0, tl, env, tl, tl);
> +#endif
> +
> #include "def-helper.h"
> diff --git a/target-mips/translate.c b/target-mips/translate.c
> index 365228d..fb0af11 100644
> --- a/target-mips/translate.c
> +++ b/target-mips/translate.c
> @@ -343,6 +343,11 @@ enum {
> #if defined(TARGET_MIPS64)
> OPC_DPAQ_W_QH_DSP = 0x34 | OPC_SPECIAL3,
> #endif
> + /* DSP Bit/Manipulation Sub-class */
> + OPC_INSV_DSP = 0x0C | OPC_SPECIAL3,
> +#if defined(TARGET_MIPS64)
> + OPC_DINSV_DSP = 0x0D | OPC_SPECIAL3,
> +#endif
> };
>
> /* BSHFL opcodes */
> @@ -450,6 +455,12 @@ enum {
> OPC_PRECEU_PH_QBR = (0x1D << 6) | OPC_ABSQ_S_PH_DSP,
> OPC_PRECEU_PH_QBLA = (0x1E << 6) | OPC_ABSQ_S_PH_DSP,
> OPC_PRECEU_PH_QBRA = (0x1F << 6) | OPC_ABSQ_S_PH_DSP,
> + /* DSP Bit/Manipulation Sub-class */
> + OPC_BITREV = (0x1B << 6) | OPC_ABSQ_S_PH_DSP,
> + OPC_REPL_QB = (0x02 << 6) | OPC_ABSQ_S_PH_DSP,
> + OPC_REPLV_QB = (0x03 << 6) | OPC_ABSQ_S_PH_DSP,
> + OPC_REPL_PH = (0x0A << 6) | OPC_ABSQ_S_PH_DSP,
> + OPC_REPLV_PH = (0x0B << 6) | OPC_ABSQ_S_PH_DSP,
> };
>
> #define MASK_CMPU_EQ_QB(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
> @@ -518,6 +529,12 @@ enum {
> OPC_MULSA_W_PH = (0x02 << 6) | OPC_DPA_W_PH_DSP,
> };
>
> +#define MASK_INSV(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
> +enum {
> + /* DSP Bit/Manipulation Sub-class */
> + OPC_INSV = (0x00 << 6) | OPC_INSV_DSP,
> +};
> +
> #if defined(TARGET_MIPS64)
> #define MASK_ABSQ_S_QH(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
> enum {
> @@ -539,6 +556,13 @@ enum {
> OPC_ABSQ_S_OB = (0x01 << 6) | OPC_ABSQ_S_QH_DSP,
> OPC_ABSQ_S_PW = (0x11 << 6) | OPC_ABSQ_S_QH_DSP,
> OPC_ABSQ_S_QH = (0x09 << 6) | OPC_ABSQ_S_QH_DSP,
> + /* DSP Bit/Manipulation Sub-class */
> + OPC_REPL_OB = (0x02 << 6) | OPC_ABSQ_S_QH_DSP,
> + OPC_REPL_PW = (0x12 << 6) | OPC_ABSQ_S_QH_DSP,
> + OPC_REPL_QH = (0x0A << 6) | OPC_ABSQ_S_QH_DSP,
> + OPC_REPLV_OB = (0x03 << 6) | OPC_ABSQ_S_QH_DSP,
> + OPC_REPLV_PW = (0x13 << 6) | OPC_ABSQ_S_QH_DSP,
> + OPC_REPLV_QH = (0x0B << 6) | OPC_ABSQ_S_QH_DSP,
> };
> #endif
>
> @@ -592,6 +616,14 @@ enum {
> #endif
>
> #if defined(TARGET_MIPS64)
> +#define MASK_DINSV(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
> +enum {
> + /* DSP Bit/Manipulation Sub-class */
> + OPC_DINSV = (0x00 << 6) | OPC_DINSV_DSP,
> +};
> +#endif
> +
> +#if defined(TARGET_MIPS64)
> #define MASK_DPAQ_W_QH(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
> enum {
> /* MIPS DSP Multiply Sub-class insns */
> @@ -12679,6 +12711,75 @@ static void decode_opc (CPUMIPSState *env,
> DisasContext *ctx, int *is_branch)
> check_dsp(ctx);
> gen_helper_preceu_ph_qbra(cpu_gpr[rd], cpu_gpr[rt]);
> break;
> + case OPC_BITREV:
> + check_dsp(ctx);
> + gen_helper_bitrev(cpu_gpr[rd], cpu_gpr[rt]);
You should check for rd or rt being 0.
> + break;
> + case OPC_REPL_QB:
> + check_dsp(ctx);
> + {
> + target_long temp;
> +
> + imm = (ctx->opcode >> 16) & 0xFF;
> + temp = ((int32_t)imm << 24 | \
> + (int32_t)imm << 16 | \
> + (int32_t)imm << 8 | \
> + (int32_t)imm);
I don't thing that casting to (int32_t) is doing what you want. You
don't need casts there, but a cast on the reassembled value to sign
extend it.
> + tcg_gen_movi_tl(cpu_gpr[rd], temp);
> + break;
> + }
> + case OPC_REPLV_QB:
> + check_dsp(ctx);
> + {
> + TCGv t, temp_rd;
> +
> + t = tcg_temp_new();
> + temp_rd = tcg_temp_new();
> +
> + /* we need t to save gpr[rt] 7..0 bits. */
You actually don't need it, you can work directly on cpu_gr[rd]
(provided that there is a check for the rd == 0 case), and use only one
temp for doing that.
> + tcg_gen_ext8u_tl(t, cpu_gpr[rt]);
> + tcg_gen_mov_tl(temp_rd, t);
> + tcg_gen_shli_tl(t, t, 8);
> + tcg_gen_or_tl(temp_rd, temp_rd, t);
> + tcg_gen_mov_tl(t, temp_rd);
> + tcg_gen_shli_tl(t, t, 16);
> + tcg_gen_or_tl(temp_rd, temp_rd, t);
> +#if defined(TARGET_MIPS64)
> + tcg_gen_ext32s_i64(temp_rd, temp_rd);
> +#endif
> + tcg_gen_mov_tl(cpu_gpr[rd], temp_rd);
> +
> + tcg_temp_free(t);
> + tcg_temp_free(temp_rd);
> + break;
> + }
> + case OPC_REPL_PH:
> + check_dsp(ctx);
> + {
> + imm = (ctx->opcode >> 16) & 0x03FF;
> + tcg_gen_movi_tl(cpu_gpr[rd], \
> + (target_long)((int32_t)imm << 16 | \
> + (uint32_t)(uint16_t)imm));
> + break;
> + }
> + case OPC_REPLV_PH:
> + check_dsp(ctx);
> + {
> + TCGv t, temp_rd;
> +
> + t = tcg_temp_new();
> + temp_rd = tcg_temp_new();
> +
> + tcg_gen_ext16u_tl(t, cpu_gpr[rt]);
> + tcg_gen_ext16s_tl(temp_rd, cpu_gpr[rt]);
> + tcg_gen_shli_tl(temp_rd, temp_rd, 16);
> + tcg_gen_or_tl(temp_rd, temp_rd, t);
> + tcg_gen_mov_tl(cpu_gpr[rd], temp_rd);
> +
There you can also work directly on cpu_gpr[rd]:
| tcg_gen_ext16u_tl(t, cpu_gpr[rt]);
| tcg_gen_ext16s_tl(cpu_gpr[rd], cpu_gpr[rt]);
| tcg_gen_shli_tl(cpu_gpr[rd], cpu_gpr[rd], 16);
| tcg_gen_or_tl(cpu_gpr[rd], cpu_gpr[rd], t);
> + tcg_temp_free(t);
> + tcg_temp_free(temp_rd);
> + break;
> + }
> default: /* Invalid */
> MIPS_INVAL("MASK ABSQ_S.PH");
> generate_exception(ctx, EXCP_RI);
> @@ -13179,6 +13280,22 @@ static void decode_opc (CPUMIPSState *env,
> DisasContext *ctx, int *is_branch)
> break;
> }
> break;
> + case OPC_INSV_DSP:
> + op2 = MASK_INSV(ctx->opcode);
> + switch (op2) {
> + case OPC_INSV:
> + check_dsp(ctx);
> + {
> + gen_helper_insv(cpu_gpr[rt], cpu_env,
> + cpu_gpr[rs], cpu_gpr[rt]);
> + break;
> + }
> + default: /* Invalid */
> + MIPS_INVAL("MASK INSV");
> + generate_exception(ctx, EXCP_RI);
> + break;
> + }
> + break;
> #if defined(TARGET_MIPS64)
> case OPC_DEXTM ... OPC_DEXT:
> case OPC_DINSM ... OPC_DINS:
> @@ -13260,6 +13377,100 @@ static void decode_opc (CPUMIPSState *env,
> DisasContext *ctx, int *is_branch)
> check_dsp(ctx);
> gen_helper_preceu_qh_obra(cpu_gpr[rd], cpu_gpr[rt]);
> break;
> + case OPC_REPL_OB:
> + check_dsp(ctx);
> + {
> + target_long temp;
> +
> + imm = (ctx->opcode >> 16) & 0xFF;
> + temp = imm;
> + temp = (temp << 8) | temp;
> + temp = (temp << 16) | temp;
> + temp = (temp << 32) | temp;
> + tcg_gen_movi_tl(cpu_gpr[rd], temp);
> + break;
> + }
> + case OPC_REPL_PW:
> + check_dsp(ctx);
> + {
> + target_long temp;
> +
> + imm = (ctx->opcode >> 16) & 0x03FF;
> + imm = (int16_t)(imm << 6) >> 6;
> + temp = ((target_long)imm << 32) \
> + | ((target_long)imm & 0xFFFFFFFF);
> + tcg_gen_movi_tl(cpu_gpr[rd], temp);
> + break;
> + }
> + case OPC_REPL_QH:
> + check_dsp(ctx);
> + {
> + target_long temp;
> +
> + imm = (ctx->opcode >> 16) & 0x03FF;
> + imm = (int16_t)(imm << 6) >> 6;
> +
> + temp = ((uint64_t)(uint16_t)imm << 48) | \
> + ((uint64_t)(uint16_t)imm << 32) | \
> + ((uint64_t)(uint16_t)imm << 16) | \
> + (uint64_t)(uint16_t)imm;
> + tcg_gen_movi_tl(cpu_gpr[rd], temp);
> + break;
> + }
> + case OPC_REPLV_OB:
> + check_dsp(ctx);
> + {
> + TCGv immv, temp_rd;
> +
> + immv = tcg_const_tl(0);
> + temp_rd = tcg_const_tl(0);
> +
> + tcg_gen_ext8u_tl(immv, cpu_gpr[rt]);
> + tcg_gen_mov_tl(temp_rd, immv);
> + tcg_gen_shli_tl(temp_rd, temp_rd, 8);
> + tcg_gen_or_tl(temp_rd, temp_rd, immv);
> + tcg_gen_mov_tl(immv, temp_rd);
> + tcg_gen_shli_tl(temp_rd, temp_rd, 16);
> + tcg_gen_or_tl(temp_rd, temp_rd, immv);
> + tcg_gen_concat_tl_i64(temp_rd, temp_rd, temp_rd);
I am not sure concat is the best way there, as it does a sign extension
first, which is unneeded. Also as for replv.qb, it can be done with only
one temp and without the mov.
> +
> + gen_store_gpr(temp_rd, rd);
> +
> + tcg_temp_free(immv);
> + tcg_temp_free(temp_rd);
> + break;
> + }
> + case OPC_REPLV_PW:
> + check_insn(env, ctx, ASE_DSP);
> + {
> + TCGv imm_v;
> + imm_v = tcg_temp_new();
> +
> + tcg_gen_ext32u_i64(imm_v, cpu_gpr[rt]);
> + tcg_gen_concat_tl_i64(cpu_gpr[rd], imm_v, imm_v);
concat already does the zero extension.
> +
> + tcg_temp_free(imm_v);
> + break;
> + }
> + case OPC_REPLV_QH:
> + check_insn(env, ctx, ASE_DSP);
> + {
> + TCGv imm_v;
> + TCGv temp_rd;
> +
> + imm_v = tcg_temp_new();
> + temp_rd = tcg_temp_new();
> +
> + tcg_gen_ext16u_tl(imm_v, cpu_gpr[rt]);
> + tcg_gen_mov_tl(temp_rd, imm_v);
> + tcg_gen_shli_tl(temp_rd, temp_rd, 16);
> + tcg_gen_or_tl(temp_rd, temp_rd, imm_v);
> + tcg_gen_concat_tl_i64(cpu_gpr[rd], temp_rd, temp_rd);
> +
Same comments as for previous instructions.
> + tcg_temp_free(imm_v);
> + tcg_temp_free(temp_rd);
> + break;
> + }
> case OPC_ABSQ_S_OB:
> check_dspr2(ctx);
> gen_helper_absq_s_ob(cpu_gpr[rd], cpu_env, cpu_gpr[rt]);
> @@ -13621,6 +13832,22 @@ static void decode_opc (CPUMIPSState *env,
> DisasContext *ctx, int *is_branch)
> }
> #endif
> #if defined(TARGET_MIPS64)
> + case OPC_DINSV_DSP:
> + op2 = MASK_INSV(ctx->opcode);
> + switch (op2) {
> + case OPC_DINSV:
> + check_dsp(ctx);
> + gen_helper_dinsv(cpu_gpr[rt], cpu_env,
> + cpu_gpr[rs], cpu_gpr[rt]);
> + break;
> + default: /* Invalid */
> + MIPS_INVAL("MASK DINSV");
> + generate_exception(ctx, EXCP_RI);
> + break;
> + }
> + break;
> +#endif
> +#if defined(TARGET_MIPS64)
> case OPC_SHLL_OB_DSP:
> op2 = MASK_SHLL_OB(ctx->opcode);
> switch (op2) {
> @@ -13795,7 +14022,7 @@ static void decode_opc (CPUMIPSState *env,
> DisasContext *ctx, int *is_branch)
> generate_exception(ctx, EXCP_RI);
> break;
> }
> - break;
> + break;
> #endif
> default: /* Invalid */
> MIPS_INVAL("special3");
> --
> 1.7.9.5
>
>
>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
address@hidden http://www.aurel32.net
- Re: [Qemu-devel] [PATCH v8 03/14] target-mips-ase-dsp: Use correct acc value to index cpu_HI/cpu_LO rather than using a fix number, (continued)
- [Qemu-devel] [PATCH v8 04/14] target-mips-ase-dsp: Add branch instructions, Jia Liu, 2012/09/11
- [Qemu-devel] [PATCH v8 05/14] target-mips-ase-dsp: Add load instructions, Jia Liu, 2012/09/11
- [Qemu-devel] [PATCH v8 07/14] target-mips-ase-dsp: Add GPR-based shift instructions, Jia Liu, 2012/09/11
- [Qemu-devel] [PATCH v8 08/14] target-mips-ase-dsp: Add multiply instructions, Jia Liu, 2012/09/11
- [Qemu-devel] [PATCH v8 09/14] target-mips-ase-dsp: Add bit/manipulation instructions, Jia Liu, 2012/09/11
- Re: [Qemu-devel] [PATCH v8 09/14] target-mips-ase-dsp: Add bit/manipulation instructions,
Aurelien Jarno <=
- [Qemu-devel] [PATCH v8 10/14] target-mips-ase-dsp: Add compare-pick instructions, Jia Liu, 2012/09/11
- [Qemu-devel] [PATCH v8 11/14] target-mips-ase-dsp: Add DSP accumulator instructions, Jia Liu, 2012/09/11
- [Qemu-devel] [PATCH v8 14/14] target-mips-ase-dsp: Change TODO file, Jia Liu, 2012/09/11
- [Qemu-devel] [PATCH v8 13/14] target-mips-ase-dsp: Add testcases, Jia Liu, 2012/09/11
- [Qemu-devel] [PATCH v8 06/14] target-mips-ase-dsp: Add arithmetic instructions, Jia Liu, 2012/09/11
- [Qemu-devel] [PATCH v8 12/14] target-mips-ase-dsp: Add MIPS DSP processors, Jia Liu, 2012/09/11
- Re: [Qemu-devel] [PATCH v8 00/14] QEMU MIPS ASE DSP support, Jia Liu, 2012/09/17