[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH v4 62/64] tcg: Use ctpop to generate ctz if needed
From: |
Richard Henderson |
Subject: |
[Qemu-devel] [PATCH v4 62/64] tcg: Use ctpop to generate ctz if needed |
Date: |
Wed, 23 Nov 2016 14:01:59 +0100 |
Particularly when andc is also available, this is two insns
shorter than using clz to compute ctz.
Signed-off-by: Richard Henderson <address@hidden>
---
tcg/tcg-op.c | 107 ++++++++++++++++++++++++++++++++++++-----------------------
1 file changed, 65 insertions(+), 42 deletions(-)
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 6f4b1b6..d1debde 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -497,43 +497,46 @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1,
TCGv_i32 arg2)
tcg_gen_extrl_i64_i32(ret, t1);
tcg_temp_free_i64(t1);
tcg_temp_free_i64(t2);
- } else if (TCG_TARGET_HAS_clz_i32) {
- TCGv_i32 t1 = tcg_temp_new_i32();
- TCGv_i32 t2 = tcg_temp_new_i32();
- tcg_gen_neg_i32(t1, arg1);
- tcg_gen_xori_i32(t2, arg2, 31);
- tcg_gen_and_i32(t1, t1, arg1);
- tcg_gen_clz_i32(ret, t1, t2);
- tcg_temp_free_i32(t1);
- tcg_temp_free_i32(t2);
- tcg_gen_xori_i32(ret, ret, 31);
- } else if (TCG_TARGET_HAS_clz_i64) {
- TCGv_i32 t1 = tcg_temp_new_i32();
- TCGv_i32 t2 = tcg_temp_new_i32();
- TCGv_i64 x1 = tcg_temp_new_i64();
- TCGv_i64 x2 = tcg_temp_new_i64();
- tcg_gen_neg_i32(t1, arg1);
- tcg_gen_xori_i32(t2, arg2, 63);
- tcg_gen_and_i32(t1, t1, arg1);
- tcg_gen_extu_i32_i64(x1, t1);
- tcg_gen_extu_i32_i64(x2, t2);
- tcg_temp_free_i32(t1);
- tcg_temp_free_i32(t2);
- tcg_gen_clz_i64(x1, x1, x2);
- tcg_gen_extrl_i64_i32(ret, x1);
- tcg_temp_free_i64(x1);
- tcg_temp_free_i64(x2);
- tcg_gen_xori_i32(ret, ret, 63);
} else {
- gen_helper_ctz_i32(ret, arg1, arg2);
+ TCGv_i32 z, t;
+ if (TCG_TARGET_HAS_ctpop_i32 && TCG_TARGET_HAS_andc_i32) {
+ t = tcg_temp_new_i32();
+ tcg_gen_subi_i32(t, arg1, 1);
+ tcg_gen_andc_i32(t, t, arg1);
+ tcg_gen_ctpop_i32(t, t);
+ do_movc:
+ z = tcg_const_i32(0);
+ tcg_gen_movcond_i32(TCG_COND_EQ, ret, arg1, z, arg2, t);
+ tcg_temp_free_i32(t);
+ tcg_temp_free_i32(z);
+ } else if (TCG_TARGET_HAS_clz_i32 || TCG_TARGET_HAS_clz_i64) {
+ /* Since all non-x86 hosts have clz(0) == 32, don't fight it. */
+ t = tcg_temp_new_i32();
+ tcg_gen_neg_i32(t, arg1);
+ tcg_gen_and_i32(t, t, arg1);
+ tcg_gen_clzi_i32(t, t, 32);
+ tcg_gen_xori_i32(t, t, 31);
+ goto do_movc;
+ } else {
+ gen_helper_ctz_i32(ret, arg1, arg2);
+ }
}
}
void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
{
- TCGv_i32 t = tcg_const_i32(arg2);
- tcg_gen_ctz_i32(ret, arg1, t);
- tcg_temp_free_i32(t);
+ if (!TCG_TARGET_HAS_ctz_i32 && TCG_TARGET_HAS_ctpop_i32 && arg2 == 32) {
+ /* This equivalence has the advantage of not requiring a fixup. */
+ TCGv_i32 t = tcg_temp_new_i32();
+ tcg_gen_subi_i32(t, arg1, 1);
+ tcg_gen_andc_i32(t, t, arg1);
+ tcg_gen_ctpop_i32(ret, t);
+ tcg_temp_free_i32(t);
+ } else {
+ TCGv_i32 t = tcg_const_i32(arg2);
+ tcg_gen_ctz_i32(ret, arg1, t);
+ tcg_temp_free_i32(t);
+ }
}
void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg)
@@ -1842,18 +1845,29 @@ void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1,
TCGv_i64 arg2)
{
if (TCG_TARGET_HAS_ctz_i64) {
tcg_gen_op3_i64(INDEX_op_ctz_i64, ret, arg1, arg2);
- } else if (TCG_TARGET_HAS_clz_i64) {
- TCGv_i64 t1 = tcg_temp_new_i64();
- TCGv_i64 t2 = tcg_temp_new_i64();
- tcg_gen_neg_i64(t1, arg1);
- tcg_gen_xori_i64(t2, arg2, 63);
- tcg_gen_and_i64(t1, t1, arg1);
- tcg_gen_clz_i64(ret, t1, t2);
- tcg_temp_free_i64(t1);
- tcg_temp_free_i64(t2);
- tcg_gen_xori_i64(ret, ret, 63);
} else {
- gen_helper_ctz_i64(ret, arg1, arg2);
+ TCGv_i64 z, t;
+ if (TCG_TARGET_HAS_ctpop_i64 && TCG_TARGET_HAS_andc_i64) {
+ t = tcg_temp_new_i64();
+ tcg_gen_subi_i64(t, arg1, 1);
+ tcg_gen_andc_i64(t, t, arg1);
+ tcg_gen_ctpop_i64(t, t);
+ do_movc:
+ z = tcg_const_i64(0);
+ tcg_gen_movcond_i64(TCG_COND_EQ, ret, arg1, z, arg2, t);
+ tcg_temp_free_i64(t);
+ tcg_temp_free_i64(z);
+ } else if (TCG_TARGET_HAS_clz_i64) {
+ /* Since all non-x86 hosts have clz(0) == 64, don't fight it. */
+ t = tcg_temp_new_i64();
+ tcg_gen_neg_i64(t, arg1);
+ tcg_gen_and_i64(t, t, arg1);
+ tcg_gen_clzi_i64(t, t, 64);
+ tcg_gen_xori_i64(t, t, 63);
+ goto do_movc;
+ } else {
+ gen_helper_ctz_i64(ret, arg1, arg2);
+ }
}
}
@@ -1868,6 +1882,15 @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1,
uint64_t arg2)
tcg_gen_ctz_i32(TCGV_LOW(ret), TCGV_LOW(arg1), t32);
tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
tcg_temp_free_i32(t32);
+ } else if (!TCG_TARGET_HAS_ctz_i64
+ && TCG_TARGET_HAS_ctpop_i64
+ && arg2 == 64) {
+ /* This equivalence has the advantage of not requiring a fixup. */
+ TCGv_i64 t = tcg_temp_new_i64();
+ tcg_gen_subi_i64(t, arg1, 1);
+ tcg_gen_andc_i64(t, t, arg1);
+ tcg_gen_ctpop_i64(ret, t);
+ tcg_temp_free_i64(t);
} else {
TCGv_i64 t64 = tcg_const_i64(arg2);
tcg_gen_ctz_i64(ret, arg1, t64);
--
2.7.4
- [Qemu-devel] [PATCH v4 53/64] target-xtensa: Use clrsb helper, (continued)
- [Qemu-devel] [PATCH v4 53/64] target-xtensa: Use clrsb helper, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 54/64] tcg: Add opcode for ctpop, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 55/64] target-alpha: Use ctpop helper, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 56/64] target-ppc: Use ctpop helper, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 57/64] target-s390x: Avoid a loop for popcnt, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 58/64] target-sparc: Use ctpop helper, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 59/64] target-tilegx: Use ctpop helper, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 60/64] target-i386: Use ctpop helper, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 61/64] qemu/host-utils.h: Reduce the operation count in the fallback ctpop, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 63/64] tcg/ppc: Handle ctpop opcode, Richard Henderson, 2016/11/23
- [Qemu-devel] [PATCH v4 62/64] tcg: Use ctpop to generate ctz if needed,
Richard Henderson <=
- [Qemu-devel] [PATCH v4 64/64] tcg/i386: Handle ctpop opcode, Richard Henderson, 2016/11/23
- Re: [Qemu-devel] [PATCH v4 00/64] tcg 2.9 patch queue, no-reply, 2016/11/29