[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 5/5] tcg/i386: Use SHLX/SHRX/SARX instructions
From: |
Aurelien Jarno |
Subject: |
Re: [Qemu-devel] [PATCH 5/5] tcg/i386: Use SHLX/SHRX/SARX instructions |
Date: |
Sun, 16 Feb 2014 19:12:11 +0100 |
User-agent: |
Mutt/1.5.21 (2010-09-15) |
On Fri, Jan 31, 2014 at 08:43:38AM -0600, Richard Henderson wrote:
> These three-operand shift instructions do not require the shift count
> to be placed into ECX. This reduces the number of mov insns required,
> with the mere addition of a new register constraint.
>
> Don't attempt to get rid of the matching constraint, as that's impossible
> to manipulate with just a new constraint. In addition, constant shifts
> still need the matching constraint.
>
> Signed-off-by: Richard Henderson <address@hidden>
> ---
> tcg/i386/tcg-target.c | 61
> +++++++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 50 insertions(+), 11 deletions(-)
>
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 4f6b9c1..fef1717 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -133,6 +133,12 @@ static bool have_movbe;
> it there. Therefore we always define the variable. */
> bool have_bmi1;
>
> +#if defined(CONFIG_CPUID_H) && defined(bit_BMI2)
> +static bool have_bmi2;
> +#else
> +# define have_bmi2 0
> +#endif
> +
> static uint8_t *tb_ret_addr;
>
> static void patch_reloc(uint8_t *code_ptr, int type,
> @@ -175,6 +181,7 @@ static int target_parse_constraint(TCGArgConstraint *ct,
> const char **pct_str)
> tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
> break;
> case 'c':
> + case_c:
> ct->ct |= TCG_CT_REG;
> tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
> break;
> @@ -203,6 +210,7 @@ static int target_parse_constraint(TCGArgConstraint *ct,
> const char **pct_str)
> tcg_regset_set32(ct->u.regs, 0, 0xf);
> break;
> case 'r':
> + case_r:
> ct->ct |= TCG_CT_REG;
> if (TCG_TARGET_REG_BITS == 64) {
> tcg_regset_set32(ct->u.regs, 0, 0xffff);
> @@ -210,6 +218,13 @@ static int target_parse_constraint(TCGArgConstraint *ct,
> const char **pct_str)
> tcg_regset_set32(ct->u.regs, 0, 0xff);
> }
> break;
> + case 'C':
> + /* With SHRX et al, we need not use ECX as shift count register. */
> + if (have_bmi2) {
> + goto case_r;
> + } else {
> + goto case_c;
> + }
>
> /* qemu_ld/st address constraint */
> case 'L':
> @@ -283,6 +298,8 @@ static inline int tcg_target_const_match(tcg_target_long
> val,
> # define P_REXB_RM 0
> # define P_GS 0
> #endif
> +#define P_SIMDF3 0x10000 /* 0xf3 opcode prefix */
> +#define P_SIMDF2 0x20000 /* 0xf2 opcode prefix */
>
> #define OPC_ARITH_EvIz (0x81)
> #define OPC_ARITH_EvIb (0x83)
> @@ -325,6 +342,9 @@ static inline int tcg_target_const_match(tcg_target_long
> val,
> #define OPC_SHIFT_1 (0xd1)
> #define OPC_SHIFT_Ib (0xc1)
> #define OPC_SHIFT_cl (0xd3)
> +#define OPC_SARX (0xf7 | P_EXT38 | P_SIMDF3)
> +#define OPC_SHLX (0xf7 | P_EXT38 | P_DATA16)
> +#define OPC_SHRX (0xf7 | P_EXT38 | P_SIMDF2)
> #define OPC_TESTL (0x85)
> #define OPC_XCHG_ax_r32 (0x90)
>
> @@ -493,7 +513,14 @@ static void tcg_out_vex_modrm(TCGContext *s, int opc,
> int r, int v, int rm)
>
> tmp = (r & 8 ? 0 : 0x80); /* VEX.R */
> }
> - tmp |= (opc & P_DATA16 ? 1 : 0); /* VEX.pp */
> + /* VEX.pp */
> + if (opc & P_DATA16) {
> + tmp |= 1; /* 0x66 */
> + } else if (opc & P_SIMDF3) {
> + tmp |= 2; /* 0xf3 */
> + } else if (opc & P_SIMDF2) {
> + tmp |= 3; /* 0xf2 */
> + }
> tmp |= (~v & 15) << 3; /* VEX.vvvv */
> tcg_out8(s, tmp);
> tcg_out8(s, opc);
> @@ -1689,7 +1716,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg
> *args, bool is64)
> static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
> const TCGArg *args, const int *const_args)
> {
> - int c, rexw = 0;
> + int c, vexop, rexw = 0;
>
> #if TCG_TARGET_REG_BITS == 64
> # define OP_32_64(x) \
> @@ -1860,19 +1887,28 @@ static inline void tcg_out_op(TCGContext *s,
> TCGOpcode opc,
>
> OP_32_64(shl):
> c = SHIFT_SHL;
> - goto gen_shift;
> + vexop = OPC_SHLX;
> + goto gen_shift_maybe_vex;
> OP_32_64(shr):
> c = SHIFT_SHR;
> - goto gen_shift;
> + vexop = OPC_SHRX;
> + goto gen_shift_maybe_vex;
> OP_32_64(sar):
> c = SHIFT_SAR;
> - goto gen_shift;
> + vexop = OPC_SARX;
> + goto gen_shift_maybe_vex;
> OP_32_64(rotl):
> c = SHIFT_ROL;
> goto gen_shift;
> OP_32_64(rotr):
> c = SHIFT_ROR;
> goto gen_shift;
> + gen_shift_maybe_vex:
> + if (have_bmi2 && !const_args[2]) {
> + tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]);
> + break;
> + }
> + /* FALLTHRU */
> gen_shift:
> if (const_args[2]) {
> tcg_out_shifti(s, c + rexw, args[0], args[2]);
> @@ -2065,9 +2101,9 @@ static const TCGTargetOpDef x86_op_defs[] = {
> { INDEX_op_xor_i32, { "r", "0", "ri" } },
> { INDEX_op_andc_i32, { "r", "r", "ri" } },
>
> - { INDEX_op_shl_i32, { "r", "0", "ci" } },
> - { INDEX_op_shr_i32, { "r", "0", "ci" } },
> - { INDEX_op_sar_i32, { "r", "0", "ci" } },
> + { INDEX_op_shl_i32, { "r", "0", "Ci" } },
> + { INDEX_op_shr_i32, { "r", "0", "Ci" } },
> + { INDEX_op_sar_i32, { "r", "0", "Ci" } },
> { INDEX_op_rotl_i32, { "r", "0", "ci" } },
> { INDEX_op_rotr_i32, { "r", "0", "ci" } },
>
> @@ -2123,9 +2159,9 @@ static const TCGTargetOpDef x86_op_defs[] = {
> { INDEX_op_xor_i64, { "r", "0", "re" } },
> { INDEX_op_andc_i64, { "r", "r", "rI" } },
>
> - { INDEX_op_shl_i64, { "r", "0", "ci" } },
> - { INDEX_op_shr_i64, { "r", "0", "ci" } },
> - { INDEX_op_sar_i64, { "r", "0", "ci" } },
> + { INDEX_op_shl_i64, { "r", "0", "Ci" } },
> + { INDEX_op_shr_i64, { "r", "0", "Ci" } },
> + { INDEX_op_sar_i64, { "r", "0", "Ci" } },
> { INDEX_op_rotl_i64, { "r", "0", "ci" } },
> { INDEX_op_rotr_i64, { "r", "0", "ci" } },
>
> @@ -2283,6 +2319,9 @@ static void tcg_target_init(TCGContext *s)
> #ifdef bit_BMI
> have_bmi1 = (b & bit_BMI) != 0;
> #endif
> +#ifndef have_bmi2
> + have_bmi2 = (b & bit_BMI2) != 0;
> +#endif
> }
>
> if (TCG_TARGET_REG_BITS == 64) {
Reviewed-by: Aurelien Jarno <address@hidden>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
address@hidden http://www.aurel32.net