qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 13/13] tcg: rework TCG ops flags


From: Aurelien Jarno
Subject: [Qemu-devel] [PATCH 13/13] tcg: rework TCG ops flags
Date: Thu, 27 Sep 2012 19:15:13 +0200

TCG_OPF_CALL_CLOBBER means that an op is clobbering the call registers,
and thus they should be emptied before the op is executed. It doesn't
says anything about globals.

On the other hand, TCG_OPF_SIDE_EFFECTS means that the op might touch
some globals or even affects some non map registers. As a consequence
such an op should not be removed even if its output arguments are not
used, and the globals should be saved back to memory.

Unfortunately the two seems to be wrongly used: saving globals is
decided depending on TCG_OPF_CALL_CLOBBER, while non-qemu store
functions are marked as TCG_OPF_SIDE_EFFECTS. This is why mapping a
memory address using a global and accessing it through ld/st doesn't
work.

While theoretically it could be possible to use PURE and CONST like
for helpers, it only refers to globals and not about other things like
exception. This patch reworks that in the following way:
- TCG_OPF_CALL_CLOBBER: The op clobber the call registers. They are
  freed before emitting the op.
- TCG_OPF_SIDE_EFFECTS: The op is not removed if the returned value
  if not used. It can trigger exception and thus globals are
  synchronized before emitting the op.
- TCG_OPF_READ_GLOBALS: The op can read globals but not write them,
  and thus globals are synchronized before emitting the op.
- TCG_OPF_WRITE_GLOBALS: The op can read and write globals, and thus
  globals are saved back to their canonical location before emitting
  the op.

This patch also update the flags for the affected instructions.
Compared to the previous behaviour, here are the changes:
- Globals are only synchronized and not saved back to memory for
  qemu_ld/st helpers. This significantly reduces the generated code
  size.
- Mapping a memory address using a global and accessing it through
  ld/st now does work, but at a cost of a small increase of the
  generated code size.

Signed-off-by: Aurelien Jarno <address@hidden>
---
 tcg/tcg-opc.h |   38 +++++++++++++++++++-------------------
 tcg/tcg.c     |   26 +++++++++++++++++---------
 tcg/tcg.h     |   16 ++++++++++------
 3 files changed, 46 insertions(+), 34 deletions(-)

Note: While this is restoring the possibility to map a memory address
using both a global and accessing it through ld/st, this reduces the
performances of targets generating a lot of ld/st op. Given it is 
currently technically not allowed, we might instead change the TCG 
documentation to make that clear.

diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 0ffd44f..aeccebb 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -53,14 +53,14 @@ DEF(movi_i32, 1, 0, 1, 0)
 DEF(setcond_i32, 1, 2, 1, 0)
 DEF(movcond_i32, 1, 4, 1, IMPL(TCG_TARGET_HAS_movcond_i32))
 /* load/store */
-DEF(ld8u_i32, 1, 1, 1, 0)
-DEF(ld8s_i32, 1, 1, 1, 0)
-DEF(ld16u_i32, 1, 1, 1, 0)
-DEF(ld16s_i32, 1, 1, 1, 0)
-DEF(ld_i32, 1, 1, 1, 0)
-DEF(st8_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
-DEF(st16_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
-DEF(st_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
+DEF(ld8u_i32, 1, 1, 1, TCG_OPF_READ_GLOBALS)
+DEF(ld8s_i32, 1, 1, 1, TCG_OPF_READ_GLOBALS)
+DEF(ld16u_i32, 1, 1, 1, TCG_OPF_READ_GLOBALS)
+DEF(ld16s_i32, 1, 1, 1, TCG_OPF_READ_GLOBALS)
+DEF(ld_i32, 1, 1, 1, TCG_OPF_READ_GLOBALS)
+DEF(st8_i32, 0, 2, 1, TCG_OPF_WRITE_GLOBALS)
+DEF(st16_i32, 0, 2, 1, TCG_OPF_WRITE_GLOBALS)
+DEF(st_i32, 0, 2, 1, TCG_OPF_WRITE_GLOBALS)
 /* arith */
 DEF(add_i32, 1, 2, 0, 0)
 DEF(sub_i32, 1, 2, 0, 0)
@@ -109,17 +109,17 @@ DEF(movi_i64, 1, 0, 1, IMPL64)
 DEF(setcond_i64, 1, 2, 1, IMPL64)
 DEF(movcond_i64, 1, 4, 1, IMPL64 | IMPL(TCG_TARGET_HAS_movcond_i64))
 /* load/store */
-DEF(ld8u_i64, 1, 1, 1, IMPL64)
-DEF(ld8s_i64, 1, 1, 1, IMPL64)
-DEF(ld16u_i64, 1, 1, 1, IMPL64)
-DEF(ld16s_i64, 1, 1, 1, IMPL64)
-DEF(ld32u_i64, 1, 1, 1, IMPL64)
-DEF(ld32s_i64, 1, 1, 1, IMPL64)
-DEF(ld_i64, 1, 1, 1, IMPL64)
-DEF(st8_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS | IMPL64)
-DEF(st16_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS | IMPL64)
-DEF(st32_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS | IMPL64)
-DEF(st_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS | IMPL64)
+DEF(ld8u_i64, 1, 1, 1, TCG_OPF_READ_GLOBALS | IMPL64)
+DEF(ld8s_i64, 1, 1, 1, TCG_OPF_READ_GLOBALS | IMPL64)
+DEF(ld16u_i64, 1, 1, 1, TCG_OPF_READ_GLOBALS | IMPL64)
+DEF(ld16s_i64, 1, 1, 1, TCG_OPF_READ_GLOBALS | IMPL64)
+DEF(ld32u_i64, 1, 1, 1, TCG_OPF_READ_GLOBALS | IMPL64)
+DEF(ld32s_i64, 1, 1, 1, TCG_OPF_READ_GLOBALS | IMPL64)
+DEF(ld_i64, 1, 1, 1, TCG_OPF_READ_GLOBALS | IMPL64)
+DEF(st8_i64, 0, 2, 1, TCG_OPF_WRITE_GLOBALS | IMPL64)
+DEF(st16_i64, 0, 2, 1, TCG_OPF_WRITE_GLOBALS | IMPL64)
+DEF(st32_i64, 0, 2, 1, TCG_OPF_WRITE_GLOBALS | IMPL64)
+DEF(st_i64, 0, 2, 1, TCG_OPF_WRITE_GLOBALS | IMPL64)
 /* arith */
 DEF(add_i64, 1, 2, 0, IMPL64)
 DEF(sub_i64, 1, 2, 0, IMPL64)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 1c5ab81..1f36777 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1362,10 +1362,17 @@ static void tcg_liveness_analysis(TCGContext *s)
                 /* if end of basic block, update */
                 if (def->flags & TCG_OPF_BB_END) {
                     tcg_la_bb_end(s, dead_temps, mem_temps);
-                } else if (def->flags & TCG_OPF_CALL_CLOBBER) {
-                    /* globals should go back to memory */
-                    memset(dead_temps, 1, s->nb_globals);
-                    memset(mem_temps, 1, s->nb_globals);
+                } else {
+                    if (def->flags & (TCG_OPF_SIDE_EFFECTS |
+                                      TCG_OPF_READ_GLOBALS |
+                                      TCG_OPF_WRITE_GLOBALS)) {
+                        /* globals should be synced to memory */
+                        memset(mem_temps, 1, s->nb_globals);
+                    }
+                    if (def->flags & TCG_OPF_WRITE_GLOBALS) {
+                        /* globals should go back to memory */
+                        memset(dead_temps, 1, s->nb_globals);
+                    }
                 }
 
                 /* input args are live */
@@ -1883,12 +1890,13 @@ static void tcg_reg_alloc_op(TCGContext *s,
                     tcg_reg_free(s, reg);
                 }
             }
-            /* XXX: for load/store we could do that only for the slow path
-               (i.e. when a memory callback is called) */
-            
-            /* store globals and free associated registers (we assume the insn
-               can modify any global. */
+        }
+        /* Save globals if the op might write them, sync if the op might
+           read them or has side effects. */
+        if (def->flags & TCG_OPF_WRITE_GLOBALS) {
             save_globals(s, allocated_regs);
+        } else if (def->flags & (TCG_OPF_SIDE_EFFECTS | TCG_OPF_READ_GLOBALS)) 
{
+            sync_globals(s, allocated_regs);
         }
         
         /* satisfy the output constraints */
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 6875bc1..a182f59 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -497,16 +497,20 @@ typedef struct TCGArgConstraint {
 /* Bits for TCGOpDef->flags, 8 bits available.  */
 enum {
     /* Instruction defines the end of a basic block.  */
-    TCG_OPF_BB_END       = 0x01,
-    /* Instruction clobbers call registers and potentially update globals.  */
-    TCG_OPF_CALL_CLOBBER = 0x02,
+    TCG_OPF_BB_END        = 0x01,
+    /* Instruction clobbers call registers.  */
+    TCG_OPF_CALL_CLOBBER  = 0x02,
     /* Instruction has side effects: it cannot be removed
        if its outputs are not used.  */
-    TCG_OPF_SIDE_EFFECTS = 0x04,
+    TCG_OPF_SIDE_EFFECTS  = 0x04,
+    /* Instruction read globals, they should be synced before.  */
+    TCG_OPF_READ_GLOBALS  = 0x08,
+    /* Instruction write globals, they should be saved back before.  */
+    TCG_OPF_WRITE_GLOBALS = 0x10,
     /* Instruction operands are 64-bits (otherwise 32-bits).  */
-    TCG_OPF_64BIT        = 0x08,
+    TCG_OPF_64BIT         = 0x10,
     /* Instruction is optional and not implemented by the host.  */
-    TCG_OPF_NOT_PRESENT  = 0x10,
+    TCG_OPF_NOT_PRESENT   = 0x20,
 };
 
 typedef struct TCGOpDef {
-- 
1.7.10.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]