Re: [Qemu-devel] [PATCH v8 09/14] target-mips-ase-dsp: Add bit/manipulat

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v8 09/14] target-mips-ase-dsp: Add bit/manipulat

From:	Aurelien Jarno
Subject:	Re: [Qemu-devel] [PATCH v8 09/14] target-mips-ase-dsp: Add bit/manipulation instructions
Date:	Tue, 18 Sep 2012 18:37:18 +0200
User-agent:	Mutt/1.5.21 (2010-09-15)

On Wed, Sep 12, 2012 at 10:01:50AM +0800, Jia Liu wrote:
> Add MIPS ASE DSP Bit/Manipulation instructions.
> 
> Signed-off-by: Jia Liu <address@hidden>
> ---
>  target-mips/dsp_helper.c |   75 +++++++++++++++
>  target-mips/helper.h     |    7 ++
>  target-mips/translate.c  |  229 
> +++++++++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 310 insertions(+), 1 deletion(-)
> 
> diff --git a/target-mips/dsp_helper.c b/target-mips/dsp_helper.c
> index 5d0b1ed..8a89e8b 100644
> --- a/target-mips/dsp_helper.c
> +++ b/target-mips/dsp_helper.c
> @@ -5822,6 +5822,81 @@ void helper_dmsubu(CPUMIPSState *env,
>  }
>  #endif
>  
> +/** DSP Bit/Manipulation Sub-class insns **/
> +target_ulong helper_bitrev(target_ulong rt)
> +{
> +    int32_t temp;
> +    uint32_t rd;
> +    int i, last;
> +
> +    temp = rt & MIPSDSP_LO;
> +    rd = 0;
> +    for (i = 0; i < 16; i++) {
> +        last = temp % 2;
> +        temp = temp >> 1;
> +        rd = rd | (last << (15 - i));
> +    }
> +
> +    return (target_ulong)rd;
> +}

This looks like overcomplicated, and I am not sure using a modulo is the
fastest way to get the last bit.

You can do something like:

|    rd = 0;
|    for (i = 0; i < 16; i++) {
|        rd = (rd << 1) | temp & 1;
|        temp = temp >> 1;
|    }

> +target_ulong helper_insv(CPUMIPSState *env, target_ulong rs, target_ulong rt)
> +{
> +    uint32_t pos, size, msb, lsb, filter;
> +    uint32_t temp, temprs, temprt;
> +    target_ulong dspc;
> +
> +    dspc = env->active_tc.DSPControl;
> +    pos  = dspc & 0x1F;
> +    size = (dspc >> 7) & 0x1F;
> +    msb  = pos + size - 1;
> +    lsb  = pos;
> +
> +    if (lsb > msb) {
> +        return rt;
> +    }
> +
> +    filter = ((int32_t)0x01 << size) - 1;
> +    filter = filter << pos;
> +    temprs = rs & filter;
> +    temprt = rt & ~filter;
> +    temp = temprs | temprt;
> +
> +    return (target_long)(int32_t)temp;
> +}
> +
> +#if defined(TARGET_MIPS64)
> +target_ulong helper_dinsv(CPUMIPSState *env, target_ulong rs, target_ulong 
> rt)
> +{
> +    target_ulong dspctrl;
> +    target_ulong filter;
> +    uint8_t pos, size;
> +    uint8_t msb, lsb;
> +    uint64_t temp;
> +
> +    temp = rt;
> +    dspctrl = env->active_tc.DSPControl;
> +    pos = dspctrl & 0x7F;
> +    size = (dspctrl >> 7) & 0x3F;
> +
> +    msb = pos + size - 1;
> +    lsb = pos;
> +
> +    if ((lsb > msb) || (msb > 63)) {
> +        return temp;
> +    }
> +
> +    temp = 0;
> +    filter = ((target_ulong)0x01 << size) - 1;
> +    filter = filter << pos;
> +
> +    temp |= rs & filter;
> +    temp |= rt & (~filter);
> +
> +    return temp;
> +}
> +#endif
> +
>  #undef MIPSDSP_LHI
>  #undef MIPSDSP_LLO
>  #undef MIPSDSP_HI
> diff --git a/target-mips/helper.h b/target-mips/helper.h
> index 5803fa5..e776fe9 100644
> --- a/target-mips/helper.h
> +++ b/target-mips/helper.h
> @@ -576,4 +576,11 @@ DEF_HELPER_FLAGS_4(dmsub, 0, void, env, tl, tl, i32)
>  DEF_HELPER_FLAGS_4(dmsubu, 0, void, env, tl, tl, i32)
>  #endif
>  
> +/* DSP Bit/Manipulation Sub-class insns */
> +DEF_HELPER_FLAGS_1(bitrev, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
> +DEF_HELPER_FLAGS_3(insv, 0, tl, env, tl, tl)
> +#if defined(TARGET_MIPS64)
> +DEF_HELPER_FLAGS_3(dinsv, 0, tl, env, tl, tl);
> +#endif
> +
>  #include "def-helper.h"
> diff --git a/target-mips/translate.c b/target-mips/translate.c
> index 365228d..fb0af11 100644
> --- a/target-mips/translate.c
> +++ b/target-mips/translate.c
> @@ -343,6 +343,11 @@ enum {
>  #if defined(TARGET_MIPS64)
>      OPC_DPAQ_W_QH_DSP  = 0x34 | OPC_SPECIAL3,
>  #endif
> +    /* DSP Bit/Manipulation Sub-class */
> +    OPC_INSV_DSP       = 0x0C | OPC_SPECIAL3,
> +#if defined(TARGET_MIPS64)
> +    OPC_DINSV_DSP      = 0x0D | OPC_SPECIAL3,
> +#endif
>  };
>  
>  /* BSHFL opcodes */
> @@ -450,6 +455,12 @@ enum {
>      OPC_PRECEU_PH_QBR   = (0x1D << 6) | OPC_ABSQ_S_PH_DSP,
>      OPC_PRECEU_PH_QBLA  = (0x1E << 6) | OPC_ABSQ_S_PH_DSP,
>      OPC_PRECEU_PH_QBRA  = (0x1F << 6) | OPC_ABSQ_S_PH_DSP,
> +    /* DSP Bit/Manipulation Sub-class */
> +    OPC_BITREV          = (0x1B << 6) | OPC_ABSQ_S_PH_DSP,
> +    OPC_REPL_QB         = (0x02 << 6) | OPC_ABSQ_S_PH_DSP,
> +    OPC_REPLV_QB        = (0x03 << 6) | OPC_ABSQ_S_PH_DSP,
> +    OPC_REPL_PH         = (0x0A << 6) | OPC_ABSQ_S_PH_DSP,
> +    OPC_REPLV_PH        = (0x0B << 6) | OPC_ABSQ_S_PH_DSP,
>  };
>  
>  #define MASK_CMPU_EQ_QB(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
> @@ -518,6 +529,12 @@ enum {
>      OPC_MULSA_W_PH    = (0x02 << 6) | OPC_DPA_W_PH_DSP,
>  };
>  
> +#define MASK_INSV(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
> +enum {
> +    /* DSP Bit/Manipulation Sub-class */
> +    OPC_INSV = (0x00 << 6) | OPC_INSV_DSP,
> +};
> +
>  #if defined(TARGET_MIPS64)
>  #define MASK_ABSQ_S_QH(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
>  enum {
> @@ -539,6 +556,13 @@ enum {
>      OPC_ABSQ_S_OB       = (0x01 << 6) | OPC_ABSQ_S_QH_DSP,
>      OPC_ABSQ_S_PW       = (0x11 << 6) | OPC_ABSQ_S_QH_DSP,
>      OPC_ABSQ_S_QH       = (0x09 << 6) | OPC_ABSQ_S_QH_DSP,
> +    /* DSP Bit/Manipulation Sub-class */
> +    OPC_REPL_OB         = (0x02 << 6) | OPC_ABSQ_S_QH_DSP,
> +    OPC_REPL_PW         = (0x12 << 6) | OPC_ABSQ_S_QH_DSP,
> +    OPC_REPL_QH         = (0x0A << 6) | OPC_ABSQ_S_QH_DSP,
> +    OPC_REPLV_OB        = (0x03 << 6) | OPC_ABSQ_S_QH_DSP,
> +    OPC_REPLV_PW        = (0x13 << 6) | OPC_ABSQ_S_QH_DSP,
> +    OPC_REPLV_QH        = (0x0B << 6) | OPC_ABSQ_S_QH_DSP,
>  };
>  #endif
>  
> @@ -592,6 +616,14 @@ enum {
>  #endif
>  
>  #if defined(TARGET_MIPS64)
> +#define MASK_DINSV(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
> +enum {
> +    /* DSP Bit/Manipulation Sub-class */
> +    OPC_DINSV = (0x00 << 6) | OPC_DINSV_DSP,
> +};
> +#endif
> +
> +#if defined(TARGET_MIPS64)
>  #define MASK_DPAQ_W_QH(op) (MASK_SPECIAL3(op) | (op & (0x1F << 6)))
>  enum {
>      /* MIPS DSP Multiply Sub-class insns */
> @@ -12679,6 +12711,75 @@ static void decode_opc (CPUMIPSState *env, 
> DisasContext *ctx, int *is_branch)
>                  check_dsp(ctx);
>                  gen_helper_preceu_ph_qbra(cpu_gpr[rd], cpu_gpr[rt]);
>                  break;
> +            case OPC_BITREV:
> +                check_dsp(ctx);
> +                gen_helper_bitrev(cpu_gpr[rd], cpu_gpr[rt]);

You should check for rd or rt being 0.

> +                break;
> +            case OPC_REPL_QB:
> +                check_dsp(ctx);
> +                {
> +                    target_long temp;
> +
> +                    imm = (ctx->opcode >> 16) & 0xFF;
> +                    temp = ((int32_t)imm << 24 | \
> +                            (int32_t)imm << 16 | \
> +                            (int32_t)imm << 8  | \
> +                            (int32_t)imm);

I don't thing that casting to (int32_t) is doing what you want. You
don't need casts there, but a cast on the reassembled value to sign
extend it.

> +                    tcg_gen_movi_tl(cpu_gpr[rd], temp);
> +                    break;
> +                }
> +            case OPC_REPLV_QB:
> +                check_dsp(ctx);
> +                {
> +                    TCGv t, temp_rd;
> +
> +                    t = tcg_temp_new();
> +                    temp_rd = tcg_temp_new();
> +
> +                    /* we need t to save gpr[rt] 7..0 bits. */

You actually don't need it, you can work directly on cpu_gr[rd]
(provided that there is a check for the rd == 0 case), and use only one
temp for doing that.

> +                    tcg_gen_ext8u_tl(t, cpu_gpr[rt]);
> +                    tcg_gen_mov_tl(temp_rd, t);
> +                    tcg_gen_shli_tl(t, t, 8);
> +                    tcg_gen_or_tl(temp_rd, temp_rd, t);
> +                    tcg_gen_mov_tl(t, temp_rd);
> +                    tcg_gen_shli_tl(t, t, 16);
> +                    tcg_gen_or_tl(temp_rd, temp_rd, t);
> +#if defined(TARGET_MIPS64)
> +                    tcg_gen_ext32s_i64(temp_rd, temp_rd);
> +#endif
> +                    tcg_gen_mov_tl(cpu_gpr[rd], temp_rd);
> +
> +                    tcg_temp_free(t);
> +                    tcg_temp_free(temp_rd);
> +                    break;
> +                }
> +            case OPC_REPL_PH:
> +                check_dsp(ctx);
> +                {
> +                    imm = (ctx->opcode >> 16) & 0x03FF;
> +                    tcg_gen_movi_tl(cpu_gpr[rd], \
> +                                    (target_long)((int32_t)imm << 16 | \
> +                                    (uint32_t)(uint16_t)imm));
> +                    break;
> +                }
> +            case OPC_REPLV_PH:
> +                check_dsp(ctx);
> +                {
> +                    TCGv t, temp_rd;
> +
> +                    t = tcg_temp_new();
> +                    temp_rd = tcg_temp_new();
> +
> +                    tcg_gen_ext16u_tl(t, cpu_gpr[rt]);
> +                    tcg_gen_ext16s_tl(temp_rd, cpu_gpr[rt]);
> +                    tcg_gen_shli_tl(temp_rd, temp_rd, 16);
> +                    tcg_gen_or_tl(temp_rd, temp_rd, t);
> +                    tcg_gen_mov_tl(cpu_gpr[rd], temp_rd);
> +

There you can also work directly on cpu_gpr[rd]:

| tcg_gen_ext16u_tl(t, cpu_gpr[rt]);
| tcg_gen_ext16s_tl(cpu_gpr[rd], cpu_gpr[rt]);
| tcg_gen_shli_tl(cpu_gpr[rd], cpu_gpr[rd], 16);
| tcg_gen_or_tl(cpu_gpr[rd], cpu_gpr[rd], t);

> +                    tcg_temp_free(t);
> +                    tcg_temp_free(temp_rd);
> +                    break;
> +                }
>              default:            /* Invalid */
>                  MIPS_INVAL("MASK ABSQ_S.PH");
>                  generate_exception(ctx, EXCP_RI);
> @@ -13179,6 +13280,22 @@ static void decode_opc (CPUMIPSState *env, 
> DisasContext *ctx, int *is_branch)
>                  break;
>              }
>              break;
> +        case OPC_INSV_DSP:
> +            op2 = MASK_INSV(ctx->opcode);
> +            switch (op2) {
> +            case OPC_INSV:
> +                check_dsp(ctx);
> +                {
> +                    gen_helper_insv(cpu_gpr[rt], cpu_env,
> +                                    cpu_gpr[rs], cpu_gpr[rt]);
> +                    break;
> +                }
> +            default:            /* Invalid */
> +                MIPS_INVAL("MASK INSV");
> +                generate_exception(ctx, EXCP_RI);
> +                break;
> +            }
> +            break;
>  #if defined(TARGET_MIPS64)
>          case OPC_DEXTM ... OPC_DEXT:
>          case OPC_DINSM ... OPC_DINS:
> @@ -13260,6 +13377,100 @@ static void decode_opc (CPUMIPSState *env, 
> DisasContext *ctx, int *is_branch)
>                  check_dsp(ctx);
>                  gen_helper_preceu_qh_obra(cpu_gpr[rd], cpu_gpr[rt]);
>                  break;
> +            case OPC_REPL_OB:
> +                check_dsp(ctx);
> +                {
> +                    target_long temp;
> +
> +                    imm = (ctx->opcode >> 16) & 0xFF;
> +                    temp = imm;
> +                    temp = (temp << 8) | temp;
> +                    temp = (temp << 16) | temp;
> +                    temp = (temp << 32) | temp;
> +                    tcg_gen_movi_tl(cpu_gpr[rd], temp);
> +                    break;
> +                }
> +            case OPC_REPL_PW:
> +                check_dsp(ctx);
> +                {
> +                    target_long temp;
> +
> +                    imm = (ctx->opcode >> 16) & 0x03FF;
> +                    imm = (int16_t)(imm << 6) >> 6;
> +                    temp = ((target_long)imm << 32) \
> +                           | ((target_long)imm & 0xFFFFFFFF);
> +                    tcg_gen_movi_tl(cpu_gpr[rd], temp);
> +                    break;
> +                }
> +            case OPC_REPL_QH:
> +                check_dsp(ctx);
> +                {
> +                    target_long temp;
> +
> +                    imm = (ctx->opcode >> 16) & 0x03FF;
> +                    imm = (int16_t)(imm << 6) >> 6;
> +
> +                    temp = ((uint64_t)(uint16_t)imm << 48) | \
> +                           ((uint64_t)(uint16_t)imm << 32) | \
> +                           ((uint64_t)(uint16_t)imm << 16) | \
> +                           (uint64_t)(uint16_t)imm;
> +                    tcg_gen_movi_tl(cpu_gpr[rd], temp);
> +                    break;
> +                }
> +            case OPC_REPLV_OB:
> +                check_dsp(ctx);
> +                {
> +                    TCGv immv, temp_rd;
> +
> +                    immv = tcg_const_tl(0);
> +                    temp_rd = tcg_const_tl(0);
> +
> +                    tcg_gen_ext8u_tl(immv, cpu_gpr[rt]);
> +                    tcg_gen_mov_tl(temp_rd, immv);
> +                    tcg_gen_shli_tl(temp_rd, temp_rd, 8);
> +                    tcg_gen_or_tl(temp_rd, temp_rd, immv);
> +                    tcg_gen_mov_tl(immv, temp_rd);
> +                    tcg_gen_shli_tl(temp_rd, temp_rd, 16);
> +                    tcg_gen_or_tl(temp_rd, temp_rd, immv);
> +                    tcg_gen_concat_tl_i64(temp_rd, temp_rd, temp_rd);

I am not sure concat is the best way there, as it does a sign extension
first, which is unneeded. Also as for replv.qb, it can be done with only
one temp and without the mov.

> +
> +                    gen_store_gpr(temp_rd, rd);
> +
> +                    tcg_temp_free(immv);
> +                    tcg_temp_free(temp_rd);
> +                    break;
> +                }
> +            case OPC_REPLV_PW:
> +                check_insn(env, ctx, ASE_DSP);
> +                {
> +                    TCGv imm_v;
> +                    imm_v = tcg_temp_new();
> +
> +                    tcg_gen_ext32u_i64(imm_v, cpu_gpr[rt]);
> +                    tcg_gen_concat_tl_i64(cpu_gpr[rd], imm_v, imm_v);

concat already does the zero extension.

> +
> +                    tcg_temp_free(imm_v);
> +                    break;
> +                }
> +            case OPC_REPLV_QH:
> +                check_insn(env, ctx, ASE_DSP);
> +                {
> +                    TCGv imm_v;
> +                    TCGv temp_rd;
> +
> +                    imm_v = tcg_temp_new();
> +                    temp_rd = tcg_temp_new();
> +
> +                    tcg_gen_ext16u_tl(imm_v, cpu_gpr[rt]);
> +                    tcg_gen_mov_tl(temp_rd, imm_v);
> +                    tcg_gen_shli_tl(temp_rd, temp_rd, 16);
> +                    tcg_gen_or_tl(temp_rd, temp_rd, imm_v);
> +                    tcg_gen_concat_tl_i64(cpu_gpr[rd], temp_rd, temp_rd);
> +

Same comments as for previous instructions.

> +                    tcg_temp_free(imm_v);
> +                    tcg_temp_free(temp_rd);
> +                    break;
> +                }
>              case OPC_ABSQ_S_OB:
>                  check_dspr2(ctx);
>                  gen_helper_absq_s_ob(cpu_gpr[rd], cpu_env, cpu_gpr[rt]);
> @@ -13621,6 +13832,22 @@ static void decode_opc (CPUMIPSState *env, 
> DisasContext *ctx, int *is_branch)
>              }
>  #endif
>  #if defined(TARGET_MIPS64)
> +        case OPC_DINSV_DSP:
> +            op2 = MASK_INSV(ctx->opcode);
> +            switch (op2) {
> +            case OPC_DINSV:
> +                check_dsp(ctx);
> +                gen_helper_dinsv(cpu_gpr[rt], cpu_env,
> +                                 cpu_gpr[rs], cpu_gpr[rt]);
> +                break;
> +            default:            /* Invalid */
> +                MIPS_INVAL("MASK DINSV");
> +                generate_exception(ctx, EXCP_RI);
> +                break;
> +            }
> +            break;
> +#endif
> +#if defined(TARGET_MIPS64)
>          case OPC_SHLL_OB_DSP:
>              op2 = MASK_SHLL_OB(ctx->opcode);
>              switch (op2) {
> @@ -13795,7 +14022,7 @@ static void decode_opc (CPUMIPSState *env, 
> DisasContext *ctx, int *is_branch)
>                  generate_exception(ctx, EXCP_RI);
>                  break;
>              }
> -          break;
> +            break;
>  #endif
>          default:            /* Invalid */
>              MIPS_INVAL("special3");
> -- 
> 1.7.9.5
> 
> 
> 

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
address@hidden                 http://www.aurel32.net

[Prev in Thread]

Current Thread

[Next in Thread]

Re: [Qemu-devel] [PATCH v8 03/14] target-mips-ase-dsp: Use correct acc value to index cpu_HI/cpu_LO rather than using a fix number, (continued)
- [Qemu-devel] [PATCH v8 04/14] target-mips-ase-dsp: Add branch instructions, Jia Liu, 2012/09/11
  - Re: [Qemu-devel] [PATCH v8 04/14] target-mips-ase-dsp: Add branch instructions, Aurelien Jarno, 2012/09/18
- [Qemu-devel] [PATCH v8 05/14] target-mips-ase-dsp: Add load instructions, Jia Liu, 2012/09/11
  - Re: [Qemu-devel] [PATCH v8 05/14] target-mips-ase-dsp: Add load instructions, Aurelien Jarno, 2012/09/18
- [Qemu-devel] [PATCH v8 07/14] target-mips-ase-dsp: Add GPR-based shift instructions, Jia Liu, 2012/09/11
  - Re: [Qemu-devel] [PATCH v8 07/14] target-mips-ase-dsp: Add GPR-based shift instructions, Aurelien Jarno, 2012/09/18
- [Qemu-devel] [PATCH v8 08/14] target-mips-ase-dsp: Add multiply instructions, Jia Liu, 2012/09/11
  - Re: [Qemu-devel] [PATCH v8 08/14] target-mips-ase-dsp: Add multiply instructions, Aurelien Jarno, 2012/09/18
- [Qemu-devel] [PATCH v8 09/14] target-mips-ase-dsp: Add bit/manipulation instructions, Jia Liu, 2012/09/11
  - Re: [Qemu-devel] [PATCH v8 09/14] target-mips-ase-dsp: Add bit/manipulation instructions, Aurelien Jarno <=
- [Qemu-devel] [PATCH v8 10/14] target-mips-ase-dsp: Add compare-pick instructions, Jia Liu, 2012/09/11
  - Re: [Qemu-devel] [PATCH v8 10/14] target-mips-ase-dsp: Add compare-pick instructions, Aurelien Jarno, 2012/09/18
- [Qemu-devel] [PATCH v8 11/14] target-mips-ase-dsp: Add DSP accumulator instructions, Jia Liu, 2012/09/11
- [Qemu-devel] [PATCH v8 14/14] target-mips-ase-dsp: Change TODO file, Jia Liu, 2012/09/11
- [Qemu-devel] [PATCH v8 13/14] target-mips-ase-dsp: Add testcases, Jia Liu, 2012/09/11
- [Qemu-devel] [PATCH v8 06/14] target-mips-ase-dsp: Add arithmetic instructions, Jia Liu, 2012/09/11
  - Re: [Qemu-devel] [PATCH v8 06/14] target-mips-ase-dsp: Add arithmetic instructions, Aurelien Jarno, 2012/09/18
- [Qemu-devel] [PATCH v8 12/14] target-mips-ase-dsp: Add MIPS DSP processors, Jia Liu, 2012/09/11
  - Re: [Qemu-devel] [PATCH v8 12/14] target-mips-ase-dsp: Add MIPS DSP processors, Aurelien Jarno, 2012/09/18
- Re: [Qemu-devel] [PATCH v8 00/14] QEMU MIPS ASE DSP support, Jia Liu, 2012/09/17

Prev by Date: Re: [Qemu-devel] [PATCH v8 06/14] target-mips-ase-dsp: Add arithmetic instructions
Next by Date: Re: [Qemu-devel] [PATCH v8 12/14] target-mips-ase-dsp: Add MIPS DSP processors
Previous by thread: [Qemu-devel] [PATCH v8 09/14] target-mips-ase-dsp: Add bit/manipulation instructions
Next by thread: [Qemu-devel] [PATCH v8 10/14] target-mips-ase-dsp: Add compare-pick instructions
Index(es):
- Date
- Thread