qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v2 11/20] target-mips: add MSA I5 format instruc


From: James Hogan
Subject: Re: [Qemu-devel] [PATCH v2 11/20] target-mips: add MSA I5 format instruction
Date: Wed, 29 Oct 2014 23:23:43 +0000
User-agent: Mutt/1.5.22 (2013-10-16)

Hi Yongbok,

On Wed, Oct 29, 2014 at 01:41:59AM +0000, Yongbok Kim wrote:
> +DEF_HELPER_5(msa_addvi_df, void, env, i32, i32, i32, s64)
> +DEF_HELPER_5(msa_ceqi_df, void, env, i32, i32, i32, s64)
> +DEF_HELPER_5(msa_clei_s_df, void, env, i32, i32, i32, s64)
> +DEF_HELPER_5(msa_clei_u_df, void, env, i32, i32, i32, s64)
> +DEF_HELPER_5(msa_clti_s_df, void, env, i32, i32, i32, s64)
> +DEF_HELPER_5(msa_clti_u_df, void, env, i32, i32, i32, s64)
> +DEF_HELPER_4(msa_ldi_df, void, env, i32, i32, i32)
> +DEF_HELPER_5(msa_maxi_s_df, void, env, i32, i32, i32, s64)
> +DEF_HELPER_5(msa_maxi_u_df, void, env, i32, i32, i32, s64)
> +DEF_HELPER_5(msa_mini_s_df, void, env, i32, i32, i32, s64)
> +DEF_HELPER_5(msa_mini_u_df, void, env, i32, i32, i32, s64)
> +DEF_HELPER_5(msa_subvi_df, void, env, i32, i32, i32, s64)

s64 seems like overkill for a 5bit field.

> diff --git a/target-mips/msa_helper.c b/target-mips/msa_helper.c
> index 46ffaa5..ffdde07 100644
> --- a/target-mips/msa_helper.c
> +++ b/target-mips/msa_helper.c
> @@ -114,3 +114,145 @@ void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, 
> uint32_t wd,
>      msa_move_v(pwd, pwx);
>  }
>  
> +static inline int64_t msa_addv_df(uint32_t df, int64_t arg1, int64_t arg2)
> +{
> +    return arg1 + arg2;
> +}
> +
> +static inline int64_t msa_subv_df(uint32_t df, int64_t arg1, int64_t arg2)
> +{
> +    return arg1 - arg2;
> +}
> +
> +static inline int64_t msa_ceq_df(uint32_t df, int64_t arg1, int64_t arg2)
> +{
> +    return arg1 == arg2 ? -1 : 0;
> +}
> +
> +static inline int64_t msa_cle_s_df(uint32_t df, int64_t arg1, int64_t arg2)
> +{
> +    return arg1 <= arg2 ? -1 : 0;
> +}
> +
> +static inline int64_t msa_cle_u_df(uint32_t df, int64_t arg1, int64_t arg2)
> +{
> +    uint64_t u_arg1 = UNSIGNED(arg1, df);
> +    uint64_t u_arg2 = UNSIGNED(arg2, df);
> +    return u_arg1 <= u_arg2 ? -1 : 0;
> +}
> +
> +static inline int64_t msa_clt_s_df(uint32_t df, int64_t arg1, int64_t arg2)
> +{
> +    return arg1 < arg2 ? -1 : 0;
> +}
> +
> +static inline int64_t msa_clt_u_df(uint32_t df, int64_t arg1, int64_t arg2)
> +{
> +    uint64_t u_arg1 = UNSIGNED(arg1, df);
> +    uint64_t u_arg2 = UNSIGNED(arg2, df);
> +    return u_arg1 < u_arg2 ? -1 : 0;
> +}
> +
> +static inline int64_t msa_max_s_df(uint32_t df, int64_t arg1, int64_t arg2)
> +{
> +    return arg1 > arg2 ? arg1 : arg2;
> +}
> +
> +static inline int64_t msa_max_u_df(uint32_t df, int64_t arg1, int64_t arg2)
> +{
> +    uint64_t u_arg1 = UNSIGNED(arg1, df);
> +    uint64_t u_arg2 = UNSIGNED(arg2, df);
> +    return u_arg1 > u_arg2 ? arg1 : arg2;
> +}
> +
> +static inline int64_t msa_min_s_df(uint32_t df, int64_t arg1, int64_t arg2)
> +{
> +    return arg1 < arg2 ? arg1 : arg2;
> +}
> +
> +static inline int64_t msa_min_u_df(uint32_t df, int64_t arg1, int64_t arg2)
> +{
> +    uint64_t u_arg1 = UNSIGNED(arg1, df);
> +    uint64_t u_arg2 = UNSIGNED(arg2, df);
> +    return u_arg1 < u_arg2 ? arg1 : arg2;
> +}
> +
> +#define MSA_BINOP_IMM_DF(helper, func)                                  \
> +void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df,       \
> +                        uint32_t wd, uint32_t ws, int64_t u5)           \
> +{                                                                       \
> +    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
> +    wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
> +    uint32_t i;                                                         \
> +                                                                        \
> +    switch (df) {                                                       \
> +    case DF_BYTE:                                                       \
> +        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
> +            pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5);         \
> +        }                                                               \
> +        break;                                                          \
> +    case DF_HALF:                                                       \
> +        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
> +            pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5);         \
> +        }                                                               \
> +        break;                                                          \
> +    case DF_WORD:                                                       \
> +        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
> +            pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5);         \
> +        }                                                               \
> +        break;                                                          \
> +    case DF_DOUBLE:                                                     \
> +        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
> +            pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5);         \
> +        }                                                               \

Have you checked whether the compiler is actually able to effectively
optimise this lot?

If not, I bet it'd have a much better chance if the inline functions
above were defined as macros, e.g.:

#define msa_min_s_df(TYPE_S, TYPE_U, ARG1, ARG2)        \
        ((TYPE_S)(ARG1) < (TYPE_S)(ARG2) ? (ARG1) : (ARG2))
#define msa_min_u_df(TYPE_S, TYPE_U, ARG1, ARG2)        \
        ((TYPE_U)(ARG1) < (TYPE_U)(ARG2) ? (ARG1) : (ARG2))

and called like this:

pwd->b[i] = msa_##func##_df(int8_t, uint8_t, pws->b[i], u5);

Having said that, I don't think it should block this patchset, but
certainly something it'd be nice to improve.

> +void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
> +                       uint32_t s10)
> +{
> +    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
> +    int64_t s64 = ((int64_t)s10 << 54) >> 54;

Extending to 32-bits (int32_t) would work just as well.

> +    uint32_t i;
> +
> +    switch (df) {
> +    case DF_BYTE:
> +        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
> +            pwd->b[i] = (int8_t)s10;
> +        }
> +        break;
> +    case DF_HALF:
> +        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
> +            pwd->h[i] = (int16_t)s64;
> +        }
> +        break;
> +    case DF_WORD:
> +        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
> +            pwd->w[i] = (int32_t)s64;
> +        }
> +        break;
> +    case DF_DOUBLE:
> +        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
> +            pwd->d[i] = s64;
> +        }
> +       break;
> +    default:
> +        assert(0);
> +    }
> +}
> diff --git a/target-mips/translate.c b/target-mips/translate.c
> index b2934d7..62dd0b9 100644
> --- a/target-mips/translate.c
> +++ b/target-mips/translate.c
> @@ -17395,6 +17395,81 @@ static void gen_msa_i8(CPUMIPSState *env, 
> DisasContext *ctx)
>      tcg_temp_free_i32(ti8);
>  }
>  
> +static void gen_msa_i5(CPUMIPSState *env, DisasContext *ctx)
> +{
> +#define MASK_MSA_I5(op)    (MASK_MSA_MINOR(op) | (op & (0x7 << 23)))
> +    uint32_t opcode = ctx->opcode;
> +
> +    uint8_t df = (ctx->opcode >> 21) & 0x3;
> +    int64_t s5 = (ctx->opcode >> 16) & 0x1f;
> +    s5 = (s5 << 59) >> 59; /* sign extend s5 to 64 bits*/
> +    uint8_t u5 = (ctx->opcode >> 16) & 0x1f;
> +    uint8_t ws = (ctx->opcode >> 11) & 0x1f;
> +    uint8_t wd = (ctx->opcode >> 6) & 0x1f;
> +
> +    TCGv_i32 tdf = tcg_const_i32(df);
> +    TCGv_i32 twd = tcg_const_i32(wd);
> +    TCGv_i32 tws = tcg_const_i32(ws);
> +    TCGv_i64 tu5 = tcg_const_i64(u5);
> +    TCGv_i64 ts5 = tcg_const_i64(s5);

these (and the in64_t s5 above) don't really need to be 64bit since the
field is only 5 bits wide.

> +
> +    switch (MASK_MSA_I5(opcode)) {
> +    case OPC_ADDVI_df:
> +        gen_helper_msa_addvi_df(cpu_env, tdf, twd, tws, tu5);

Since you know df at translation time, this could avoid the whole switch
in each helper every time one is executed by having a helper for each
df.

Again, for another patchset perhaps.

> +        break;

> +    case OPC_LDI_df:
> +        {
> +            int64_t s10 = (ctx->opcode >> 11) & 0x3ff;
> +            s10 = (s10 << 54) >> 54; /* sign extend s10 to 64 bits*/

why use int64_t when you then put it in a const_i32?

> +
> +            TCGv_i32 ts10 = tcg_const_i32(s10);
> +            gen_helper_msa_ldi_df(cpu_env, tdf, twd, ts10);
> +            tcg_temp_free_i32(ts10);
> +        }

Cheers
James



reply via email to

[Prev in Thread] Current Thread [Next in Thread]