qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 07/10] tb hash: hash phys_pc, pc, and flags with


From: Emilio G. Cota
Subject: Re: [Qemu-devel] [PATCH 07/10] tb hash: hash phys_pc, pc, and flags with xxhash
Date: Tue, 5 Apr 2016 20:52:39 -0400
User-agent: Mutt/1.5.23 (2014-03-12)

On Tue, Apr 05, 2016 at 14:08:13 -0700, Richard Henderson wrote:
> But the point is that we can do better than dropping data into memory.
> Particularly for those hosts that do not support unaligned data, such as you
> created with the packed structure.

If we made sure the fields in the struct were in the right order
(larger fields first), this shouldn't be an issue.

Anyway I took your proposal and implemented the patch below.
FWIW I cannot measure a perf. difference between this and the packed
struct for arm-softmmu (i.e. 16 bytes) on an x86_64 host.

How does the appended look?

Thanks,

                E.


commit af92a0690f49172621cd8b80759e3ca567d43567
Author: Emilio G. Cota <address@hidden>
Date:   Tue Apr 5 18:06:21 2016 -0400

    rth
    
    Signed-off-by: Emilio G. Cota <address@hidden>

diff --git a/include/exec/tb-hash.h b/include/exec/tb-hash.h
index 6b97a7c..349a856 100644
--- a/include/exec/tb-hash.h
+++ b/include/exec/tb-hash.h
@@ -45,19 +45,124 @@ static inline unsigned int 
tb_jmp_cache_hash_func(target_ulong pc)
            | (tmp & TB_JMP_ADDR_MASK));
 }
 
-static inline
-uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, int flags)
+static inline uint32_t h32_finish(uint32_t h32)
 {
-    struct {
-        tb_page_addr_t phys_pc;
-        target_ulong pc;
-        int flags;
-    } QEMU_PACKED k;
-
-    k.phys_pc = phys_pc;
-    k.pc = pc;
-    k.flags = flags;
-    return qemu_xxh32((uint32_t *)&k, sizeof(k) / sizeof(uint32_t), 1);
+    h32 ^= h32 >> 15;
+    h32 *= PRIME32_2;
+    h32 ^= h32 >> 13;
+    h32 *= PRIME32_3;
+    h32 ^= h32 >> 16;
+
+    return h32;
+}
+
+static inline uint32_t tb_hash_func3(uint32_t a, uint32_t b, uint32_t c, int 
seed)
+{
+    uint32_t h32 = seed + PRIME32_5;
+
+    h32 += 12;
+
+    h32 += a * PRIME32_3;
+    h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+
+    h32 += b * PRIME32_3;
+    h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+
+    h32 += c * PRIME32_3;
+    h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+
+    return h32_finish(h32);
+}
+
+static inline uint32_t tb_hash_func4(uint64_t a0, uint32_t c, uint32_t d, int 
seed)
+{
+    uint32_t v1 = seed + PRIME32_1 + PRIME32_2;
+    uint32_t v2 = seed + PRIME32_2;
+    uint32_t v3 = seed + 0;
+    uint32_t v4 = seed - PRIME32_1;
+    uint32_t a = a0 >> 31 >> 1;
+    uint32_t b = a0;
+    uint32_t h32;
+
+    v1 += a * PRIME32_2;
+    v1 = XXH_rotl32(v1, 13);
+    v1 *= PRIME32_1;
+
+    v2 += b * PRIME32_2;
+    v2 = XXH_rotl32(v2, 13);
+    v2 *= PRIME32_1;
+
+    v3 += c * PRIME32_2;
+    v3 = XXH_rotl32(v3, 13);
+    v3 *= PRIME32_1;
+
+    v4 += d * PRIME32_2;
+    v4 = XXH_rotl32(v4, 13);
+    v4 *= PRIME32_1;
+
+    h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) +
+          XXH_rotl32(v4, 18);
+    h32 += 16;
+
+    return h32_finish(h32);
+}
+
+static inline uint32_t tb_hash_func5(uint64_t a0, uint64_t b0, uint32_t e, int 
seed)
+{
+    uint32_t v1 = seed + PRIME32_1 + PRIME32_2;
+    uint32_t v2 = seed + PRIME32_2;
+    uint32_t v3 = seed + 0;
+    uint32_t v4 = seed - PRIME32_1;
+    uint32_t a = a0 >> 31 >> 1;
+    uint32_t b = a0;
+    uint32_t c = b0 >> 31 >> 1;
+    uint32_t d = b0;
+    uint32_t h32;
+
+    v1 += a * PRIME32_2;
+    v1 = XXH_rotl32(v1, 13);
+    v1 *= PRIME32_1;
+
+    v2 += b * PRIME32_2;
+    v2 = XXH_rotl32(v2, 13);
+    v2 *= PRIME32_1;
+
+    v3 += c * PRIME32_2;
+    v3 = XXH_rotl32(v3, 13);
+    v3 *= PRIME32_1;
+
+    v4 += d * PRIME32_2;
+    v4 = XXH_rotl32(v4, 13);
+    v4 *= PRIME32_1;
+
+    h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) +
+          XXH_rotl32(v4, 18);
+    h32 += 20;
+
+    h32 += e * PRIME32_3;
+    h32  = XXH_rotl32(h32, 17) * PRIME32_4;
+
+    return h32_finish(h32);
+}
+
+static __attribute__((noinline))
+unsigned tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc, int flags)
+{
+#if TARGET_LONG_BITS == 64
+
+    if (sizeof(phys_pc) == sizeof(pc)) {
+        return tb_hash_func5(phys_pc, pc, flags, 1);
+    }
+    return tb_hash_func4(pc, phys_pc, flags, 1);
+
+#else /* 32-bit target */
+
+    if (sizeof(phys_pc) > sizeof(pc)) {
+        return tb_hash_func4(phys_pc, pc, flags, 1);
+    }
+    return tb_hash_func3(pc, phys_pc, flags, 1);
+
+#endif /* TARGET_LONG_BITS */
 }
 
 #endif



reply via email to

[Prev in Thread] Current Thread [Next in Thread]