qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v5] cutils: Rewrite x86 buffer zero checking


From: Paolo Bonzini
Subject: Re: [Qemu-devel] [PATCH v5] cutils: Rewrite x86 buffer zero checking
Date: Wed, 14 Sep 2016 01:21:41 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:45.0) Gecko/20100101 Thunderbird/45.2.0


On 13/09/2016 22:57, Richard Henderson wrote:
> -#if defined(CONFIG_AVX2_OPT) || (defined(CONFIG_CPUID_H) && 
> defined(__SSE2__))
> -#include <cpuid.h>
> -
> +#if (defined(CONFIG_AVX2_OPT) && defined(CONFIG_CPUID_H)) || 
> defined(__SSE2__)

Your __SSE2__ version is better than mine which required cpuid.h just to
simplify the logic a bit.  On the other hand, CONFIG_CPUID_H is not
needed in CONFIG_AVX2_OPT, because the test already requires cpuid.h.

> +#ifdef CONFIG_CPUID_H
> +# define INIT_CACHE
> +# define INIT_ACCEL
> +#else
> +# ifndef __SSE2__
> +#  error "ISA selection confusion"
> +# endif
> +# define INIT_CACHE  = CACHE_SSE2
> +# define INIT_ACCEL  = buffer_zero_sse2
>  #endif

This is ugly, any reason not to initialize INIT_CACHE/INIT_ACCEL to
respectively 0 and NULL, or 0 and buffer_zero_int in the #ifdef
CONFIG_CPUID_H case?

> -#define CACHE_AVX2    2
> -#define CACHE_AVX1    4
> -#define CACHE_SSE4    8
> -#define CACHE_SSE2    16
> +static unsigned cpuid_cache INIT_CACHE;
> +static bool (*buffer_accel)(const void *, size_t) INIT_ACCEL;
>  
> -static unsigned cpuid_cache;
> +#undef INIT_CACHE
> +#undef INIT_ACCEL

The #undef is not really necessary since this file hardly has anything
after the toplevel #endif.

Just tell me which changes you agree with, I can make them locally.

Paolo

> +static void init_accel(unsigned cache)
> +{
> +    bool (*fn)(const void *, size_t) = buffer_zero_int;
> +    if (cache & CACHE_SSE2) {
> +        fn = buffer_zero_sse2;
> +    }
> +#ifdef CONFIG_AVX2_OPT
> +    if (cache & CACHE_SSE4) {
> +        fn = buffer_zero_sse4;
> +    }
> +    if (cache & CACHE_AVX2) {
> +        fn = buffer_zero_avx2;
> +    }
> +#endif
> +    buffer_accel = fn;
> +}
> +
> +#ifdef CONFIG_CPUID_H
> +#include <cpuid.h>
>  static void __attribute__((constructor)) init_cpuid_cache(void)
>  {
>      int max = __get_cpuid_max(0, NULL);
> @@ -145,24 +245,23 @@ static void __attribute__((constructor)) 
> init_cpuid_cache(void)
>              cache |= CACHE_SSE4;
>          }
>  
> +#ifdef bit_AVX2
>          /* We must check that AVX is not just available, but usable.  */
> -        if ((c & bit_OSXSAVE) && (c & bit_AVX)) {
> -            __asm("xgetbv" : "=a"(a), "=d"(d) : "c"(0));
> -            if ((a & 6) == 6) {
> -                cache |= CACHE_AVX1;
> -                if (max >= 7) {
> -                    __cpuid_count(7, 0, a, b, c, d);
> -                    if (b & bit_AVX2) {
> -                        cache |= CACHE_AVX2;
> -                    }
> -                }
> +        if ((c & bit_OSXSAVE) && (c & bit_AVX) && max >= 7) {
> +            int bv;
> +            __asm("xgetbv" : "=a"(bv), "=d"(d) : "c"(0));
> +            __cpuid_count(7, 0, a, b, c, d);
> +            if ((bv & 6) == 6 && (b & bit_AVX2)) {
> +                cache |= CACHE_AVX2;
>              }
>          }
> +#endif
>      }
>      cpuid_cache = cache;
> +    init_accel(cache);
>  }
> +#endif /* CONFIG_CPUID_H */
>  
> -#define HAVE_NEXT_ACCEL
>  bool test_buffer_is_zero_next_accel(void)
>  {
>      /* If no bits set, we just tested buffer_zero_int, and there
> @@ -172,31 +271,20 @@ bool test_buffer_is_zero_next_accel(void)
>      }
>      /* Disable the accelerator we used before and select a new one.  */
>      cpuid_cache &= cpuid_cache - 1;
> +    init_accel(cpuid_cache);
>      return true;
>  }
>  
>  static bool select_accel_fn(const void *buf, size_t len)
>  {
> -    uintptr_t ibuf = (uintptr_t)buf;
> -#ifdef CONFIG_AVX2_OPT
> -    if (len % 128 == 0 && ibuf % 32 == 0 && (cpuid_cache & CACHE_AVX2)) {
> -        return buffer_zero_avx2(buf, len);
> -    }
> -    if (len % 64 == 0 && ibuf % 16 == 0 && (cpuid_cache & CACHE_SSE4)) {
> -        return buffer_zero_sse4(buf, len);
> -    }
> -#endif
> -    if (len % 64 == 0 && ibuf % 16 == 0 && (cpuid_cache & CACHE_SSE2)) {
> -        return buffer_zero_sse2(buf, len);
> +    if (likely(len >= 64)) {
> +        return buffer_accel(buf, len);
>      }
>      return buffer_zero_int(buf, len);
>  }
>  
>  #else
>  #define select_accel_fn  buffer_zero_int
> -#endif
> -
> -#ifndef HAVE_NEXT_ACCEL
>  bool test_buffer_is_zero_next_accel(void)
>  {
>      return false;
> 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]