commit-gnuradio
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commit-gnuradio] [gnuradio] 06/09: volk/rotator: Fix the renormalizatio


From: git
Subject: [Commit-gnuradio] [gnuradio] 06/09: volk/rotator: Fix the renormalization process (missing sqrt)
Date: Tue, 28 Jan 2014 20:10:56 +0000 (UTC)

This is an automated email from the git hooks/post-receive script.

jcorgan pushed a commit to branch maint
in repository gnuradio.

commit d2f86d664cb9536fe595cd55fb7eb32ebde56a2f
Author: Sylvain Munaut <address@hidden>
Date:   Tue Jan 28 14:06:25 2014 +0100

    volk/rotator: Fix the renormalization process (missing sqrt)
    
    Note: We use _mm_sqrt_ps followed by _mm_div_ps rather than using the
          faster _mm_rsqrt_ps followed by _mm_mul_ps. It's slower but it
          avoids some numerical errors that show up clearly on spectrum
          as an image up to -80 dBc. Here the image is at -120 dBc.
    
          This is in the renormalization process anyway so the cost is
          spread on a fair amount of samples.
    
    Signed-off-by: Sylvain Munaut <address@hidden>
---
 volk/kernels/volk/volk_32fc_s32fc_x2_rotator_32fc.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/volk/kernels/volk/volk_32fc_s32fc_x2_rotator_32fc.h 
b/volk/kernels/volk/volk_32fc_s32fc_x2_rotator_32fc.h
index 84fe39f..be70ab3 100644
--- a/volk/kernels/volk/volk_32fc_s32fc_x2_rotator_32fc.h
+++ b/volk/kernels/volk/volk_32fc_s32fc_x2_rotator_32fc.h
@@ -100,7 +100,8 @@ static inline void 
volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(lv_32fc_t* outVector
         tmp1 = _mm_mul_ps(phase_Val, phase_Val);
         tmp2 = _mm_hadd_ps(tmp1, tmp1);
         tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
-        phase_Val = _mm_div_ps(phase_Val, tmp1);
+        tmp2 = _mm_sqrt_ps(tmp1);
+        phase_Val = _mm_div_ps(phase_Val, tmp2);
     }
     for(i = 0; i < halfPoints%ROTATOR_RELOAD; ++i) {
         aVal = _mm_load_ps((float*)aPtr);
@@ -207,7 +208,8 @@ static inline void 
volk_32fc_s32fc_x2_rotator_32fc_u_sse4_1(lv_32fc_t* outVector
         tmp1 = _mm_mul_ps(phase_Val, phase_Val);
         tmp2 = _mm_hadd_ps(tmp1, tmp1);
         tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
-        phase_Val = _mm_div_ps(phase_Val, tmp1);
+        tmp2 = _mm_sqrt_ps(tmp1);
+        phase_Val = _mm_div_ps(phase_Val, tmp2);
     }
     for(i = 0; i < halfPoints%ROTATOR_RELOAD; ++i) {
         aVal = _mm_loadu_ps((float*)aPtr);
@@ -313,7 +315,8 @@ static inline void 
volk_32fc_s32fc_x2_rotator_32fc_a_avx(lv_32fc_t* outVector, c
         tmp1 = _mm256_mul_ps(phase_Val, phase_Val);
         tmp2 = _mm256_hadd_ps(tmp1, tmp1);
         tmp1 = _mm256_shuffle_ps(tmp2, tmp2, 0xD8);
-        phase_Val = _mm256_div_ps(phase_Val, tmp1);
+        tmp2 = _mm256_sqrt_ps(tmp1);
+        phase_Val = _mm256_div_ps(phase_Val, tmp2);
     }
     for(i = 0; i < fourthPoints%ROTATOR_RELOAD; ++i) {
         aVal = _mm256_load_ps((float*)aPtr);
@@ -419,7 +422,8 @@ static inline void 
volk_32fc_s32fc_x2_rotator_32fc_u_avx(lv_32fc_t* outVector, c
         tmp1 = _mm256_mul_ps(phase_Val, phase_Val);
         tmp2 = _mm256_hadd_ps(tmp1, tmp1);
         tmp1 = _mm256_shuffle_ps(tmp2, tmp2, 0xD8);
-        phase_Val = _mm256_div_ps(phase_Val, tmp1);
+        tmp2 = _mm256_sqrt_ps(tmp1);
+        phase_Val = _mm256_div_ps(phase_Val, tmp2);
     }
     for(i = 0; i < fourthPoints%ROTATOR_RELOAD; ++i) {
         aVal = _mm256_loadu_ps((float*)aPtr);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]