qemu-arm
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-arm] [PATCH 04/10] target/i386: optimize cross-page block chaining


From: Emilio G. Cota
Subject: [Qemu-arm] [PATCH 04/10] target/i386: optimize cross-page block chaining in softmmu
Date: Tue, 11 Apr 2017 21:17:24 -0400

Instead of unconditionally exiting to the exec loop, add a helper to
check whether the target TB is valid. As long as the hit rate in
tb_jmp_cache remains high, this improves performance.

Measurements:

-       specINT 2006 (test set), x86_64-softmmu. Host: Intel i7-4790K @ 4.00GHz
                          Y axis: Speedup over 95b31d70

     1.3x+-+-------------------------------------------------------------+-+
         |           cross $$                                              |
    1.25x+-+.............................................................+-+
         |                                                                 |
     1.2x+-+.............................................................+-+
         |                                                        :        |
         |                                                        :        |
    1.15x+-+.............................................................+-+
         |           $$$$ $$$$                     +++            :        |
     1.1x+-+.........$..$.$..$...........................................+-+
         |           $  $ $  $                     $$$           $$$$      |
    1.05x+-+.........$..$.$..$.....................$.$.$$$$......$..$....+-+
         |           $  $ $  $      +++  +++  +++  $+$ $++$  +++ $: $ $$$$ |
         |       +++ $  $ $  $ +++  $$$   :    :   $ $ $  $ $$$$ $: $ $++$ |
       1x+-$$$$G$$$$_$EM$_$ro$s$$$..$.$.......$$$..$.$.$..$.$..$.$..$.$..$-+
         | $++$ $ :$ $  $ $  $ $ $  $ $   :   $+$  $ $ $  $ $++$ $: $ $  $ |
    0.95x+-$..$.$..$.$..$.$..$.$.$..$.$..$$$..$.$..$.$.$..$.$..$.$..$.$..$-+
         | $  $ $  $ $  $ $  $ $ $  $ $  $:$  $ $  $ $ $  $ $  $ $  $ $  $ |
     0.9x+-$$$$-$$$$-$$$$-$$$$-$$$--$$$--$$$--$$$--$$$-$$$$-$$$$-$$$$-$$$$-+
           astarbzip2gcc gobmh264rehmlibquantumcfomneperlbensjxalancbhmean
  png: http://imgur.com/cwRnmCi

That is, a hmean gain of 2.6%.

-      specINT 2006 (train set), x86_64-softmmu. Host: Intel i7-4790K @ 4.00GHz
                          Y axis: Speedup over 95b31d70

    1.25x+-+-------------------------------------------------------------+-+
         |                cross $$                                         |
         |                                                                 |
     1.2x+-+.............................................................+-+
         |                                          :            +++       |
    1.15x+-+.............................................................+-+
         |            :                            $$$ $$$$      $$$$      |
         |           $$$$ +++                      $:$ $++$  +++ $: $      |
     1.1x+-+.........$..$.$$$$.....................$.$.$..$......$..$....+-+
         |       +++ $++$ $++$ +++   :             $ $ $  $   :  $++$ +++  |
    1.05x+-+....$$$$.$..$.$..$......$$$............$.$.$..$.$$$$.$..$.$$$$-+
         |      $++$ $  $ $  $ $$$  $:$            $ $ $  $ $ :$ $  $ $  $ |
         |      $  $ $  $ $  $ $:$  $+$  +++  +++  $ $ $  $ $ :$ $  $ $  $ |
       1x+-$$$$G$AP$_$EM$_$ro$s$i$li$e$..$$$.......$.$.$..$.$..$.$..$.$..$-+
         | $++$ $  $ $  $ $  $ $+$  $ $  $:$  $$$  $ $ $  $ $  $ $  $ $  $ |
    0.95x+-$..$.$..$.$..$.$..$.$.$..$.$..$.$..$.$..$.$.$..$.$..$.$..$.$..$-+
         | $  $ $  $ $  $ $  $ $ $  $ $  $ $  $+$  $ $ $  $ $  $ $  $ $  $ |
         | $  $ $  $ $  $ $  $ $ $  $ $  $ $  $ $  $ $ $  $ $  $ $  $ $  $ |
     0.9x+-$$$$-$$$$-$$$$-$$$$-$$$--$$$--$$$--$$$--$$$-$$$$-$$$$-$$$$-$$$$-+
           astarbzip2gcc gobmh264rehmlibquantumcfomneperlbensjxalancbhmean
  png: http://imgur.com/0CbG7dD

This is the larger "train" set. We get a hmean improvement of 6.1%.

Signed-off-by: Emilio G. Cota <address@hidden>
---
 target/i386/helper.h      |  2 ++
 target/i386/misc_helper.c |  5 +++++
 target/i386/translate.c   | 14 +++++++++++++-
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/target/i386/helper.h b/target/i386/helper.h
index 6fb8fb9..dceb343 100644
--- a/target/i386/helper.h
+++ b/target/i386/helper.h
@@ -1,6 +1,8 @@
 DEF_HELPER_FLAGS_4(cc_compute_all, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int)
 DEF_HELPER_FLAGS_4(cc_compute_c, TCG_CALL_NO_RWG_SE, tl, tl, tl, tl, int)
 
+DEF_HELPER_2(cross_page_check, i32, env, tl)
+
 DEF_HELPER_3(write_eflags, void, env, tl, i32)
 DEF_HELPER_1(read_eflags, tl, env)
 DEF_HELPER_2(divb_AL, void, env, tl)
diff --git a/target/i386/misc_helper.c b/target/i386/misc_helper.c
index ca2ea09..a41daed 100644
--- a/target/i386/misc_helper.c
+++ b/target/i386/misc_helper.c
@@ -637,3 +637,8 @@ void helper_wrpkru(CPUX86State *env, uint32_t ecx, uint64_t 
val)
     env->pkru = val;
     tlb_flush(cs);
 }
+
+uint32_t helper_cross_page_check(CPUX86State *env, target_ulong vaddr)
+{
+    return !!tb_from_jmp_cache(env, vaddr);
+}
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 1d1372f..ffc8ccc 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -2153,7 +2153,19 @@ static inline void gen_goto_tb(DisasContext *s, int 
tb_num, target_ulong eip)
         gen_jmp_im(eip);
         tcg_gen_exit_tb((uintptr_t)s->tb + tb_num);
     } else {
-        /* jump to another page: currently not optimized */
+        /* jump to another page */
+        TCGv vaddr = tcg_const_tl(eip);
+        TCGv_i32 valid = tcg_temp_new_i32();
+        TCGLabel *label = gen_new_label();
+
+        gen_helper_cross_page_check(valid, cpu_env, vaddr);
+        tcg_temp_free(vaddr);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, valid, 0, label);
+        tcg_temp_free_i32(valid);
+        tcg_gen_goto_tb(tb_num);
+        gen_jmp_im(eip);
+        tcg_gen_exit_tb((uintptr_t)s->tb + tb_num);
+        gen_set_label(label);
         gen_jmp_im(eip);
         gen_eob(s);
     }
-- 
2.7.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]