qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH for-2.5] tcg/i386: use softmmu fast path for unalign


From: Aurelien Jarno
Subject: [Qemu-devel] [PATCH for-2.5] tcg/i386: use softmmu fast path for unaligned accesses
Date: Thu, 9 Jul 2015 20:39:57 +0200

Softmmu unaligned load/stores currently goes through through the slow
path for two reasons:
  - to support unaligned access on host with strict alignement
  - to correctly handle accesses crossing pages

x86 is only concerned by the second reason. Unaligned accesses are
avoided by compilers, but are not uncommon. We therefore would like
to see them going through the fast path, if they don't cross pages.

For that we can use the fact that two adjacent TLB entries can't contain
the same page. Therefore accessing the TLB entry corresponding to the
first byte, but comparing its content to page address of the last byte
ensures that we don't cross pages. We can do this check without adding
more instructions in the TLB code (but increasing its length by one
byte) by using the LEA instruction to combine the existing move with the
size addition.

On an x86-64 host, this gives a 3% boot time improvement for a powerpc
guest and 4% for an x86-64 guest.

Cc: Paolo Bonzini <address@hidden>
Cc: Richard Henderson <address@hidden>
Signed-off-by: Aurelien Jarno <address@hidden>
---
 tcg/i386/tcg-target.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index ff4d9cf..f952645 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1172,14 +1172,16 @@ static void * const qemu_st_helpers[16] = {
    First argument register is clobbered.  */
 
 static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg 
addrhi,
-                                    int mem_index, TCGMemOp s_bits,
+                                    int mem_index, TCGMemOp opc,
                                     tcg_insn_unit **label_ptr, int which)
 {
     const TCGReg r0 = TCG_REG_L0;
     const TCGReg r1 = TCG_REG_L1;
+    TCGMemOp s_bits = opc & MO_SIZE;
     TCGType ttype = TCG_TYPE_I32;
     TCGType htype = TCG_TYPE_I32;
     int trexw = 0, hrexw = 0;
+    bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_bits == 0;
 
     if (TCG_TARGET_REG_BITS == 64) {
         if (TARGET_LONG_BITS == 64) {
@@ -1193,13 +1195,20 @@ static inline void tcg_out_tlb_load(TCGContext *s, 
TCGReg addrlo, TCGReg addrhi,
     }
 
     tcg_out_mov(s, htype, r0, addrlo);
-    tcg_out_mov(s, ttype, r1, addrlo);
+    if (aligned) {
+        tcg_out_mov(s, ttype, r1, addrlo);
+    } else {
+        /* For unaligned access check that we don't cross pages using
+           the page address of the last byte.  */
+        tcg_out_modrm_sib_offset(s, OPC_LEA + trexw, r1, addrlo, -1,
+                                 0, (1 << s_bits) - 1);
+    }
 
     tcg_out_shifti(s, SHIFT_SHR + hrexw, r0,
                    TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
 
     tgen_arithi(s, ARITH_AND + trexw, r1,
-                TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
+                TARGET_PAGE_MASK | (aligned ? ((1 << s_bits) - 1) : 0), 0);
     tgen_arithi(s, ARITH_AND + hrexw, r0,
                 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
 
@@ -1534,7 +1543,6 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, bool is64)
     TCGMemOp opc;
 #if defined(CONFIG_SOFTMMU)
     int mem_index;
-    TCGMemOp s_bits;
     tcg_insn_unit *label_ptr[2];
 #endif
 
@@ -1547,9 +1555,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, bool is64)
 
 #if defined(CONFIG_SOFTMMU)
     mem_index = get_mmuidx(oi);
-    s_bits = opc & MO_SIZE;
 
-    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
+    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
                      label_ptr, offsetof(CPUTLBEntry, addr_read));
 
     /* TLB Hit.  */
@@ -1667,7 +1674,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args, bool is64)
     TCGMemOp opc;
 #if defined(CONFIG_SOFTMMU)
     int mem_index;
-    TCGMemOp s_bits;
     tcg_insn_unit *label_ptr[2];
 #endif
 
@@ -1680,9 +1686,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args, bool is64)
 
 #if defined(CONFIG_SOFTMMU)
     mem_index = get_mmuidx(oi);
-    s_bits = opc & MO_SIZE;
 
-    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
+    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
                      label_ptr, offsetof(CPUTLBEntry, addr_write));
 
     /* TLB Hit.  */
-- 
2.1.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]