qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RFC 00/16] TCG indirect registers


From: Max Filippov
Subject: Re: [Qemu-devel] [RFC 00/16] TCG indirect registers
Date: Sun, 22 Sep 2013 21:28:59 +0400
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130805 Thunderbird/17.0.8

On Fri, Sep 20, 2013 at 1:24 AM, Richard Henderson <address@hidden> wrote:
> This is an attempt to improve performance of target-sparc
> by exposing the windowed registers as TCG globals, and all
> the optimization that we can do there.
>
> This is done via allowing tcg_global_mem_new to be used
> with any base pointer, not just off of a fixed register.
> Thus the sparc windowed registers are globals off cpu_regwptr.
>
> In the process of working through this, I attempt to remove
> as many uses of "int" as I can throughout the TCG code gen
> paths, replacing them with TCGReg when we're talking about
> hard registers, and TCGTemp pointers when we're talking about
> temporaries.  This, IMO, reduces confusion as to what kind of
> "int" we mean at any given time.
>
> By the time we get to patch 14, actually implementing the
> indirect temps, it's fairly easy to recurse in order to
> load the base pointer when we need to load or store an
> indirect temp.
>
> I've not yet tried to measure the performance.  As far as
> testing, linux-user-0.3 and sparc-test-0.2 works.  I've
> scanned some of the dumps from those.  In the cases where
> no real optimization was possible, we generate practically
> the same code -- usually with different registers selected.
> In the cases where we can optimize, I've seen some TB's
> cut in half.
>
> Anyway, I wanted some feedback before I take this any further.

Hi Richard,

I've reimplemented xtensa windowed registers in the same
way as done for sparc on top of this series. Haven't got any
measurable performance change. From op,out_asm output most
TBs got longer by 1-4 instructions and all temp indices got
doubled.

--->8---
>From 73300be7dd6b3d31cbfa45225714d5e43c52f077 Mon Sep 17 00:00:00 2001
From: Max Filippov <address@hidden>
Date: Sun, 22 Sep 2013 18:54:53 +0400
Subject: [PATCH] target-xtensa: reimplement windowed registers

Signed-off-by: Max Filippov <address@hidden>
---
 target-xtensa/cpu.c       |  1 +
 target-xtensa/cpu.h       |  5 +++--
 target-xtensa/op_helper.c | 46 ++++++++++------------------------------------
 target-xtensa/translate.c |  7 +++++--
 4 files changed, 19 insertions(+), 40 deletions(-)

diff --git a/target-xtensa/cpu.c b/target-xtensa/cpu.c
index c19d17a..a30511d 100644
--- a/target-xtensa/cpu.c
+++ b/target-xtensa/cpu.c
@@ -59,6 +59,7 @@ static void xtensa_cpu_reset(CPUState *s)
     env->sregs[CACHEATTR] = 0x22222222;
     env->sregs[ATOMCTL] = xtensa_option_enabled(env->config,
             XTENSA_OPTION_ATOMCTL) ? 0x28 : 0x15;
+    rotate_window_abs(env, env->sregs[WINDOW_BASE]);
      env->pending_irq_level = 0;
     reset_mmu(env);
diff --git a/target-xtensa/cpu.h b/target-xtensa/cpu.h
index 95103e9..8100f18 100644
--- a/target-xtensa/cpu.h
+++ b/target-xtensa/cpu.h
@@ -334,11 +334,11 @@ typedef struct XtensaConfigList {
  typedef struct CPUXtensaState {
     const XtensaConfig *config;
-    uint32_t regs[16];
+    uint32_t *regs;
     uint32_t pc;
     uint32_t sregs[256];
     uint32_t uregs[256];
-    uint32_t phys_regs[MAX_NAREG];
+    uint32_t phys_regs[MAX_NAREG + 12];
     float32 fregs[16];
     float_status fp_status;
 @@ -396,6 +396,7 @@ void xtensa_timer_irq(CPUXtensaState *env, uint32_t id, 
uint32_t active);
 void xtensa_rearm_ccompare_timer(CPUXtensaState *env);
 int cpu_xtensa_signal_handler(int host_signum, void *pinfo, void *puc);
 void xtensa_cpu_list(FILE *f, fprintf_function cpu_fprintf);
+void rotate_window_abs(CPUXtensaState *env, uint32_t position);
 void xtensa_sync_window_from_phys(CPUXtensaState *env);
 void xtensa_sync_phys_from_window(CPUXtensaState *env);
 uint32_t xtensa_tlb_get_addr_mask(const CPUXtensaState *env, bool dtlb, 
uint32_t way);
diff --git a/target-xtensa/op_helper.c b/target-xtensa/op_helper.c
index cf97025..ee21550 100644
--- a/target-xtensa/op_helper.c
+++ b/target-xtensa/op_helper.c
@@ -166,39 +166,6 @@ uint32_t HELPER(nsau)(uint32_t v)
     return v ? clz32(v) : 32;
 }
 -static void copy_window_from_phys(CPUXtensaState *env,
-        uint32_t window, uint32_t phys, uint32_t n)
-{
-    assert(phys < env->config->nareg);
-    if (phys + n <= env->config->nareg) {
-        memcpy(env->regs + window, env->phys_regs + phys,
-                n * sizeof(uint32_t));
-    } else {
-        uint32_t n1 = env->config->nareg - phys;
-        memcpy(env->regs + window, env->phys_regs + phys,
-                n1 * sizeof(uint32_t));
-        memcpy(env->regs + window + n1, env->phys_regs,
-                (n - n1) * sizeof(uint32_t));
-    }
-}
-
-static void copy_phys_from_window(CPUXtensaState *env,
-        uint32_t phys, uint32_t window, uint32_t n)
-{
-    assert(phys < env->config->nareg);
-    if (phys + n <= env->config->nareg) {
-        memcpy(env->phys_regs + phys, env->regs + window,
-                n * sizeof(uint32_t));
-    } else {
-        uint32_t n1 = env->config->nareg - phys;
-        memcpy(env->phys_regs + phys, env->regs + window,
-                n1 * sizeof(uint32_t));
-        memcpy(env->phys_regs, env->regs + window + n1,
-                (n - n1) * sizeof(uint32_t));
-    }
-}
-
-
 static inline unsigned windowbase_bound(unsigned a, const CPUXtensaState *env)
 {
     return a & (env->config->nareg / 4 - 1);
@@ -211,18 +178,25 @@ static inline unsigned windowstart_bit(unsigned a, const 
CPUXtensaState *env)
  void xtensa_sync_window_from_phys(CPUXtensaState *env)
 {
-    copy_window_from_phys(env, 0, env->sregs[WINDOW_BASE] * 4, 16);
+    if (env->sregs[WINDOW_BASE] * 4 + 16 > env->config->nareg)
+        memcpy(env->phys_regs + env->config->nareg, env->phys_regs,
+                (env->sregs[WINDOW_BASE] * 4 + 16 - env->config->nareg) *
+                sizeof(uint32_t));
 }
  void xtensa_sync_phys_from_window(CPUXtensaState *env)
 {
-    copy_phys_from_window(env, env->sregs[WINDOW_BASE] * 4, 0, 16);
+    if (env->sregs[WINDOW_BASE] * 4 + 16 > env->config->nareg)
+        memcpy(env->phys_regs, env->phys_regs + env->config->nareg,
+                (env->sregs[WINDOW_BASE] * 4 + 16 - env->config->nareg) *
+                sizeof(uint32_t));
 }
 -static void rotate_window_abs(CPUXtensaState *env, uint32_t position)
+void rotate_window_abs(CPUXtensaState *env, uint32_t position)
 {
     xtensa_sync_phys_from_window(env);
     env->sregs[WINDOW_BASE] = windowbase_bound(position, env);
+    env->regs = env->phys_regs + env->sregs[WINDOW_BASE] * 4;
     xtensa_sync_window_from_phys(env);
 }
 diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index bb7dfd0..61be622 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -70,6 +70,7 @@ typedef struct DisasContext {
 } DisasContext;
  static TCGv_ptr cpu_env;
+static TCGv_ptr cpu_regs;
 static TCGv_i32 cpu_pc;
 static TCGv_i32 cpu_R[16];
 static TCGv_i32 cpu_FR[16];
@@ -208,12 +209,14 @@ void xtensa_translate_init(void)
     int i;
      cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
+    cpu_regs = tcg_global_mem_new_ptr(cpu_env,
+            offsetof(CPUXtensaState, regs), "regs");
     cpu_pc = tcg_global_mem_new_i32(cpu_env,
             offsetof(CPUXtensaState, pc), "pc");
      for (i = 0; i < 16; i++) {
-        cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
-                offsetof(CPUXtensaState, regs[i]),
+        cpu_R[i] = tcg_global_mem_new_i32(cpu_regs,
+                i * sizeof(uint32_t),
                 regnames[i]);
     }
 -- 1.8.1.4


-- 
Thanks.
-- Max



reply via email to

[Prev in Thread] Current Thread [Next in Thread]