qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PULL 10/10] tcg: Further optimizations for add2 and sub2_i


From: Richard Henderson
Subject: [Qemu-devel] [PULL 10/10] tcg: Further optimizations for add2 and sub2_i32
Date: Fri, 9 Jan 2015 13:23:20 -0800

Notice when the low parts of the operation cannot cause overflow,
and thus reduce the high part to a simple add/sub.

Signed-off-by: Richard Henderson <address@hidden>
---
 tcg/optimize.c | 83 +++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 62 insertions(+), 21 deletions(-)

diff --git a/tcg/optimize.c b/tcg/optimize.c
index c674fe2..bf3dc60 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -1129,33 +1129,74 @@ static void tcg_constant_folding(TCGContext *s)
         case INDEX_op_add2_i32:
         case INDEX_op_sub2_i32:
             if (temps[args[2]].state == TCG_TEMP_CONST
-                && temps[args[3]].state == TCG_TEMP_CONST
-                && temps[args[4]].state == TCG_TEMP_CONST
-                && temps[args[5]].state == TCG_TEMP_CONST) {
+                && temps[args[4]].state == TCG_TEMP_CONST) {
                 uint32_t al = temps[args[2]].val;
-                uint32_t ah = temps[args[3]].val;
                 uint32_t bl = temps[args[4]].val;
-                uint32_t bh = temps[args[5]].val;
-                uint64_t a = ((uint64_t)ah << 32) | al;
-                uint64_t b = ((uint64_t)bh << 32) | bl;
-                TCGArg rl, rh;
-                TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
-                TCGArg *args2 = &s->gen_opparam_buf[op2->args];
 
-                if (opc == INDEX_op_add2_i32) {
-                    a += b;
-                } else {
-                    a -= b;
+                if (temps[args[3]].state == TCG_TEMP_CONST
+                    && temps[args[5]].state == TCG_TEMP_CONST) {
+                    /* The entire 64-bit quantity is a constant.  */
+                    uint32_t ah = temps[args[3]].val;
+                    uint32_t bh = temps[args[5]].val;
+                    uint64_t a = ((uint64_t)ah << 32) | al;
+                    uint64_t b = ((uint64_t)bh << 32) | bl;
+                    TCGArg rl, rh;
+                    TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
+                    TCGArg *args2 = &s->gen_opparam_buf[op2->args];
+
+                    if (opc == INDEX_op_add2_i32) {
+                        a += b;
+                    } else {
+                        a -= b;
+                    }
+
+                    rl = args[0];
+                    rh = args[1];
+                    tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)a);
+                    tcg_opt_gen_movi(s, op2, args2, opc, rh,
+                                     (uint32_t)(a >> 32));
+
+                    /* We've done all we need to do with the movi.  Skip it.  
*/
+                    oi_next = op2->next;
+                    break;
                 }
+                if (opc == INDEX_op_add2_i32 ? al + bl >= al : al >= bl) {
+                    /* The low part of the operation is constant,
+                       and does not produce a carry/borrow.  */
+                    TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
+                    TCGArg *args2 = &s->gen_opparam_buf[op2->args];
+
+                    if (opc == INDEX_op_add2_i32) {
+                        al += bl;
+                    } else {
+                        al -= bl;
+                    }
 
-                rl = args[0];
-                rh = args[1];
-                tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)a);
-                tcg_opt_gen_movi(s, op2, args2, opc, rh, (uint32_t)(a >> 32));
+                    tcg_opt_gen_movi(s, op2, args2, opc, args[0], al);
+                do_addsub2_high:
+                    if (opc == INDEX_op_add2_i32) {
+                        op->opc = INDEX_op_add_i32;
+                    } else {
+                        op->opc = INDEX_op_sub_i32;
+                    }
+                    args[0] = args[1];
+                    args[1] = args[3];
+                    args[2] = args[5];
 
-                /* We've done all we need to do with the movi.  Skip it.  */
-                oi_next = op2->next;
-                break;
+                    /* We may be able to simplify the new op further.  */
+                    break;
+                }
+            }
+            if (temps[args[4]].state == TCG_TEMP_CONST
+                && temps[args[4]].val == 0
+                && args[3] != args[0] && args[5] != args[0]) {
+                /* The second low part of the operation is zero,
+                   and thus cannot produce a carry/borrow.  */
+                TCGOp *op2 = insert_op_before(s, op, INDEX_op_mov_i32, 2);
+                TCGArg *args2 = &s->gen_opparam_buf[op2->args];
+
+                tcg_opt_gen_mov(s, op2, args2, opc, args[0], args[2]);
+                goto do_addsub2_high;
             }
             goto do_default;
 
-- 
2.1.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]