[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH v3] tcg: Optimize fence instructions
From: |
Pranith Kumar |
Subject: |
[Qemu-devel] [PATCH v3] tcg: Optimize fence instructions |
Date: |
Tue, 23 Aug 2016 09:48:25 -0400 |
This commit optimizes fence instructions. Two optimizations are
currently implemented. These are:
1. Unnecessary duplicate fence instructions
If the same fence instruction is detected consecutively, we remove
one instance of it.
ex: mb; mb => mb, strl; strl => strl
2. Merging weaker fence with subsequent/previous stronger fence
load-acquire/store-release fence can be combined with a full fence
without relaxing the ordering constraint.
ex: a) ld; ldaq; mb => ld; mb
b) mb; strl; st => mb; st
Signed-off-by: Pranith Kumar <address@hidden>
---
tcg/optimize.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
tcg/tcg.c | 4 ++++
tcg/tcg.h | 1 +
3 files changed, 79 insertions(+)
v3:
- Update with rth's feedback
v2:
- skip non-memory instructions while merging fences
diff --git a/tcg/optimize.c b/tcg/optimize.c
index cffe89b..827f2d0 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -538,6 +538,80 @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
return false;
}
+/* Eliminate duplicate and unnecessary fence instructions */
+void tcg_optimize_mb(TCGContext *s)
+{
+ int oi, oi_next;
+ TCGArg prev_op_mb = -1;
+ TCGOp *prev_op = NULL;
+
+ for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
+ TCGOp *op = &s->gen_op_buf[oi];
+ TCGArg *args = &s->gen_opparam_buf[op->args];
+ TCGOpcode opc = op->opc;
+
+ switch (opc) {
+ case INDEX_op_mb:
+ {
+ if (prev_op_mb == -1) {
+ goto done;
+ }
+
+ TCGBar curr_mb_type = args[0] & 0xF0;
+ TCGBar prev_mb_type = prev_op_mb & 0xF0;
+
+ if (curr_mb_type == prev_mb_type ||
+ (curr_mb_type == TCG_BAR_STRL && prev_mb_type == TCG_BAR_SC)) {
+ /* Remove the current weaker barrier op. The previous
+ * barrier is stronger and sufficient.
+ * mb; strl => mb; st
+ */
+ tcg_op_remove(s, op);
+ op = prev_op;
+ args[0] = prev_op_mb;
+ break;
+ } else if (curr_mb_type == TCG_BAR_SC &&
+ prev_mb_type == TCG_BAR_LDAQ) {
+ /* Remove the previous weaker barrier op. The current
+ * barrier is stronger and sufficient.
+ * ldaq; mb => ld; mb
+ */
+ tcg_op_remove(s, prev_op);
+ } else if (curr_mb_type == TCG_BAR_STRL &&
+ prev_mb_type == TCG_BAR_LDAQ) {
+ /* Consecutive load-acquire and store-release barriers
+ * can be merged into one stronger SC barrier
+ * ldaq; strl => ld; mb; st
+ */
+ args[0] = TCG_BAR_SC | TCG_MO_ALL;
+ tcg_op_remove(s, prev_op);
+ }
+
+ done:
+ prev_op_mb = args[0];
+ prev_op = op;
+ break;
+ }
+ case INDEX_op_qemu_ld_i32:
+ case INDEX_op_qemu_ld_i64:
+ case INDEX_op_qemu_st_i32:
+ case INDEX_op_qemu_st_i64:
+ case INDEX_op_call:
+ prev_op_mb = -1;
+ prev_op = NULL;
+ break;
+ default:
+ if (tcg_op_defs[opc].flags & TCG_OPF_BB_END) {
+ prev_op_mb = -1;
+ prev_op = NULL;
+ }
+ break;
+ }
+
+ oi_next = op->next;
+ }
+}
+
/* Propagate constants and copies, fold constant expressions. */
void tcg_optimize(TCGContext *s)
{
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 42417bd..1db319e 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -2587,6 +2587,10 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
}
}
+#ifdef USE_TCG_OPTIMIZATIONS
+ tcg_optimize_mb(s);
+#endif
+
#ifdef CONFIG_PROFILER
s->la_time += profile_getclock();
#endif
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 9ed78dc..79bb5bb 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -921,6 +921,7 @@ void tcg_op_remove(TCGContext *s, TCGOp *op);
TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg);
TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg);
+void tcg_optimize_mb(TCGContext *s);
void tcg_optimize(TCGContext *s);
/* only used for debugging purposes */
--
2.9.3
- [Qemu-devel] [PATCH v3] tcg: Optimize fence instructions,
Pranith Kumar <=