[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 51/62] tcg-s390: Conditionalize AND IMMEDIATE instru
From: |
Richard Henderson |
Subject: |
[Qemu-devel] [PATCH 51/62] tcg-s390: Conditionalize AND IMMEDIATE instructions. |
Date: |
Thu, 27 May 2010 13:46:33 -0700 |
The 32-bit immediate AND instructions are in the extended-immediate
facility. Use these only if present.
At the same time, pull the logic to load immediates into registers
into a constraint letter for TCG.
Signed-off-by: Richard Henderson <address@hidden>
---
tcg/s390/tcg-target.c | 209 ++++++++++++++++++++++++++++--------------------
1 files changed, 122 insertions(+), 87 deletions(-)
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 8a7c9ae..359f6d1 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -33,10 +33,11 @@
do { } while (0)
#endif
-#define TCG_CT_CONST_32 0x100
-#define TCG_CT_CONST_NEG 0x200
-#define TCG_CT_CONST_ADDI 0x400
-#define TCG_CT_CONST_MULI 0x800
+#define TCG_CT_CONST_32 0x0100
+#define TCG_CT_CONST_NEG 0x0200
+#define TCG_CT_CONST_ADDI 0x0400
+#define TCG_CT_CONST_MULI 0x0800
+#define TCG_CT_CONST_ANDI 0x1000
#define TCG_TMP0 TCG_REG_R14
@@ -353,6 +354,10 @@ static int target_parse_constraint(TCGArgConstraint *ct,
const char **pct_str)
ct->ct &= ~TCG_CT_REG;
ct->ct |= TCG_CT_CONST_MULI;
break;
+ case 'A':
+ ct->ct &= ~TCG_CT_REG;
+ ct->ct |= TCG_CT_CONST_ANDI;
+ break;
default:
break;
}
@@ -362,9 +367,66 @@ static int target_parse_constraint(TCGArgConstraint *ct,
const char **pct_str)
return 0;
}
+/* Immediates to be used with logical AND. This is an optimization only,
+ since a full 64-bit immediate AND can always be performed with 4 sequential
+ NI[LH][LH] instructions. What we're looking for is immediates that we
+ can load efficiently, and the immediate load plus the reg-reg AND is
+ smaller than the sequential NI's. */
+
+static int tcg_match_andi(int ct, tcg_target_ulong val)
+{
+ int i;
+
+ if (facilities & FACILITY_EXT_IMM) {
+ if (ct & TCG_CT_CONST_32) {
+ /* All 32-bit ANDs can be performed with 1 48-bit insn. */
+ return 1;
+ }
+
+ /* Zero-extensions. */
+ if (val == 0xff || val == 0xffff || val == 0xffffffff) {
+ return 1;
+ }
+ } else {
+ if (ct & TCG_CT_CONST_32) {
+ val = (uint32_t)val;
+ } else if (val == 0xffffffff) {
+ return 1;
+ }
+ }
+
+ /* Try all 32-bit insns that can perform it in one go. */
+ for (i = 0; i < 4; i++) {
+ tcg_target_ulong mask = ~(0xffffull << i*16);
+ if ((val & mask) == mask) {
+ return 1;
+ }
+ }
+
+ /* Look for 16-bit values performing the mask. These are better
+ to load with LLI[LH][LH]. */
+ for (i = 0; i < 4; i++) {
+ tcg_target_ulong mask = 0xffffull << i*16;
+ if ((val & mask) == val) {
+ return 0;
+ }
+ }
+
+ /* Look for 32-bit values performing the 64-bit mask. These
+ are better to load with LLI[LH]F, or if extended immediates
+ not available, with a pair of LLI insns. */
+ if ((ct & TCG_CT_CONST_32) == 0) {
+ if (val <= 0xffffffff || (val & 0xffffffff) == 0) {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
/* Test if a constant matches the constraint. */
-static inline int tcg_target_const_match(tcg_target_long val,
- const TCGArgConstraint *arg_ct)
+static int tcg_target_const_match(tcg_target_long val,
+ const TCGArgConstraint *arg_ct)
{
int ct = arg_ct->ct;
@@ -401,6 +463,8 @@ static inline int tcg_target_const_match(tcg_target_long
val,
} else {
return val == (int16_t)val;
}
+ } else if (ct & TCG_CT_CONST_ANDI) {
+ return tcg_match_andi(ct, val);
}
return 0;
@@ -764,37 +828,6 @@ static void tgen64_addi(TCGContext *s, TCGReg dest,
int64_t val)
}
-static void tgen32_andi(TCGContext *s, TCGReg dest, uint32_t val)
-{
- /* Zero-th, look for no-op. */
- if (val == -1) {
- return;
- }
-
- /* First, look for the zero-extensions. */
- if (val == 0xff) {
- tgen_ext8u(s, dest, dest);
- return;
- }
- if (val == 0xffff) {
- tgen_ext16u(s, dest, dest);
- return;
- }
-
- /* Second, try all 32-bit insns that can perform it in one go. */
- if ((val & 0xffff0000) == 0xffff0000) {
- tcg_out_insn(s, RI, NILL, dest, val);
- return;
- }
- if ((val & 0x0000ffff) == 0x0000ffff) {
- tcg_out_insn(s, RI, NILH, dest, val >> 16);
- return;
- }
-
- /* Lastly, perform the entire operation with a 48-bit insn. */
- tcg_out_insn(s, RIL, NILF, dest, val);
-}
-
static void tgen64_andi(TCGContext *s, TCGReg dest, tcg_target_ulong val)
{
static const S390Opcode ni_insns[4] = {
@@ -806,69 +839,61 @@ static void tgen64_andi(TCGContext *s, TCGReg dest,
tcg_target_ulong val)
int i;
- /* Zero-th, look for no-op. */
+ /* Look for no-op. */
if (val == -1) {
return;
}
- /* First, look for the zero-extensions. */
- if (val == 0xff) {
- tgen_ext8u(s, dest, dest);
- return;
- }
- if (val == 0xffff) {
- tgen_ext16u(s, dest, dest);
- return;
- }
+ /* Look for the zero-extensions. */
if (val == 0xffffffff) {
tgen_ext32u(s, dest, dest);
return;
}
- /* Second, try all 32-bit insns that can perform it in one go. */
- for (i = 0; i < 4; i++) {
- tcg_target_ulong mask = ~(0xffffull << i*16);
- if ((val & mask) == mask) {
- tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
+ if (facilities & FACILITY_EXT_IMM) {
+ if (val == 0xff) {
+ tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
return;
}
- }
-
- /* Third, try all 48-bit insns that can perform it in one go. */
- for (i = 0; i < 2; i++) {
- tcg_target_ulong mask = ~(0xffffffffull << i*32);
- if ((val & mask) == mask) {
- tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
+ if (val == 0xffff) {
+ tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
return;
}
- }
- /* Fourth, look for masks that can be loaded with one instruction
- into a register. This is slightly smaller than using two 48-bit
- masks, as below. */
- for (i = 0; i < 4; i++) {
- tcg_target_ulong mask = ~(0xffffull << i*16);
- if ((val & mask) == 0) {
- tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, val);
- tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
- return;
+ /* Try all 32-bit insns that can perform it in one go. */
+ for (i = 0; i < 4; i++) {
+ tcg_target_ulong mask = ~(0xffffull << i*16);
+ if ((val & mask) == mask) {
+ tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
+ return;
+ }
}
- }
- for (i = 0; i < 2; i++) {
- tcg_target_ulong mask = ~(0xffffffffull << i*32);
- if ((val & mask) == 0) {
- tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, val);
- tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
- return;
+ /* Try all 48-bit insns that can perform it in one go. */
+ if (facilities & FACILITY_EXT_IMM) {
+ for (i = 0; i < 2; i++) {
+ tcg_target_ulong mask = ~(0xffffffffull << i*32);
+ if ((val & mask) == mask) {
+ tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
+ return;
+ }
+ }
}
- }
- /* Last, perform the AND via sequential modifications to the
- high and low parts. Do this via recursion to handle 16-bit
- vs 32-bit masks in each half. */
- tgen64_andi(s, dest, val | 0xffffffff00000000ull);
- tgen64_andi(s, dest, val | 0x00000000ffffffffull);
+ /* Perform the AND via sequential modifications to the high and low
+ parts. Do this via recursion to handle 16-bit vs 32-bit masks in
+ each half. */
+ tgen64_andi(s, dest, val | 0xffffffff00000000ull);
+ tgen64_andi(s, dest, val | 0x00000000ffffffffull);
+ } else {
+ /* With no extended-immediate facility, just emit the sequence. */
+ for (i = 0; i < 4; i++) {
+ tcg_target_ulong mask = 0xffffull << i*16;
+ if ((val & mask) != mask) {
+ tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
+ }
+ }
+ }
}
static void tgen64_ori(TCGContext *s, TCGReg dest, tcg_target_ulong val)
@@ -1121,6 +1146,16 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int
opc, TCGReg data,
}
#if defined(CONFIG_SOFTMMU)
+static void tgen64_andi_tmp(TCGContext *s, TCGReg dest, tcg_target_ulong val)
+{
+ if (tcg_match_andi(0, val)) {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, val);
+ tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
+ } else {
+ tgen64_andi(s, dest, val);
+ }
+}
+
static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
TCGReg addr_reg, int mem_index, int opc,
uint16_t **label2_ptr_p, int is_store)
@@ -1140,8 +1175,8 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg
data_reg,
tcg_out_sh64(s, RSY_SRLG, arg1, addr_reg, SH64_REG_NONE,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
- tgen64_andi(s, arg0, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
- tgen64_andi(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
+ tgen64_andi_tmp(s, arg0, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+ tgen64_andi_tmp(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
if (is_store) {
ofs = offsetof(CPUState, tlb_table[mem_index][0].addr_write);
@@ -1413,7 +1448,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode
opc,
case INDEX_op_and_i32:
if (const_args[2]) {
- tgen32_andi(s, args[0], args[2]);
+ tgen64_andi(s, args[0], args[2] | 0xffffffff00000000ull);
} else {
tcg_out_insn(s, RR, NR, args[0], args[2]);
}
@@ -1728,7 +1763,7 @@ static const TCGTargetOpDef s390_op_defs[] = {
{ INDEX_op_div2_i32, { "b", "a", "0", "1", "r" } },
{ INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } },
- { INDEX_op_and_i32, { "r", "0", "ri" } },
+ { INDEX_op_and_i32, { "r", "0", "rWA" } },
{ INDEX_op_or_i32, { "r", "0", "ri" } },
{ INDEX_op_xor_i32, { "r", "0", "ri" } },
{ INDEX_op_neg_i32, { "r", "r" } },
@@ -1789,7 +1824,7 @@ static const TCGTargetOpDef s390_op_defs[] = {
{ INDEX_op_div2_i64, { "b", "a", "0", "1", "r" } },
{ INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } },
- { INDEX_op_and_i64, { "r", "0", "ri" } },
+ { INDEX_op_and_i64, { "r", "0", "rA" } },
{ INDEX_op_or_i64, { "r", "0", "ri" } },
{ INDEX_op_xor_i64, { "r", "0", "ri" } },
{ INDEX_op_neg_i64, { "r", "r" } },
--
1.7.0.1
- [Qemu-devel] [PATCH 41/62] tcg-s390: Allocate the code_gen_buffer near the main program., (continued)
- [Qemu-devel] [PATCH 41/62] tcg-s390: Allocate the code_gen_buffer near the main program., Richard Henderson, 2010/05/27
- [Qemu-devel] [PATCH 42/62] tcg-s390: Rearrange qemu_ld/st to avoid register copy., Richard Henderson, 2010/05/27
- [Qemu-devel] [PATCH 44/62] tcg-s390: Tidy user qemu_ld/st., Richard Henderson, 2010/05/27
- [Qemu-devel] [PATCH 45/62] tcg-s390: Implement GUEST_BASE., Richard Henderson, 2010/05/27
- [Qemu-devel] [PATCH 43/62] tcg-s390: Tidy tcg_prepare_qemu_ldst., Richard Henderson, 2010/05/27
- [Qemu-devel] [PATCH 47/62] tcg-s390: Conditionalize general-instruction-extension insns., Richard Henderson, 2010/05/27
- [Qemu-devel] [PATCH 48/62] tcg-s390: Conditionalize ADD IMMEDIATE instructions., Richard Henderson, 2010/05/27
- [Qemu-devel] [PATCH 46/62] tcg-s390: Query instruction extensions that are installed., Richard Henderson, 2010/05/27
- [Qemu-devel] [PATCH 50/62] tcg-s390: Conditionalize 8 and 16 bit extensions., Richard Henderson, 2010/05/27
- [Qemu-devel] [PATCH 49/62] tcg-s390: Conditionalize LOAD IMMEDIATE instructions., Richard Henderson, 2010/05/27
- [Qemu-devel] [PATCH 51/62] tcg-s390: Conditionalize AND IMMEDIATE instructions.,
Richard Henderson <=
- [Qemu-devel] [PATCH 52/62] tcg-s390: Conditionalize OR IMMEDIATE instructions., Richard Henderson, 2010/05/27
- [Qemu-devel] [PATCH 53/62] tcg-s390: Conditionalize XOR IMMEDIATE instructions., Richard Henderson, 2010/05/27
- [Qemu-devel] [PATCH 54/62] tcg-s390: Do not require the extended-immediate facility., Richard Henderson, 2010/05/27
- [Qemu-devel] [PATCH 55/62] tcg-s390: Use 16-bit branches for forward jumps., Richard Henderson, 2010/05/27
- [Qemu-devel] [PATCH 56/62] tcg-s390: Use the LOAD AND TEST instruction for compares., Richard Henderson, 2010/05/27
- [Qemu-devel] [PATCH 57/62] tcg-s390: Use the COMPARE IMMEDIATE instrucions for compares., Richard Henderson, 2010/05/27
- [Qemu-devel] [PATCH 58/62] tcg-s390: Use COMPARE AND BRANCH instructions., Richard Henderson, 2010/05/27
- [Qemu-devel] [PATCH 61/62] tcg-s390: Enable compile in 32-bit mode., Richard Henderson, 2010/05/27
- [Qemu-devel] [PATCH 59/62] tcg-s390: Generalize load/store support., Richard Henderson, 2010/05/27
- [Qemu-devel] [PATCH 60/62] tcg-s390: Fix TLB comparison width., Richard Henderson, 2010/05/27