commit-gnuradio
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commit-gnuradio] r8964 - gnuradio/branches/developers/eb/vmx/gnuradio-c


From: eb
Subject: [Commit-gnuradio] r8964 - gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter
Date: Mon, 21 Jul 2008 16:03:04 -0600 (MDT)

Author: eb
Date: 2008-07-21 16:03:03 -0600 (Mon, 21 Jul 2008)
New Revision: 8964

Modified:
   
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/gr_fir_fff_vmx.cc
   
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/qa_gr_fir_fff.cc
Log:
work-in-progress on altivec

Modified: 
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/gr_fir_fff_vmx.cc
===================================================================
--- 
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/gr_fir_fff_vmx.cc
  2008-07-21 21:42:29 UTC (rev 8963)
+++ 
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/gr_fir_fff_vmx.cc
  2008-07-21 22:03:03 UTC (rev 8964)
@@ -26,9 +26,11 @@
 #include <altivec.h>
 #include <stdlib.h>
 #include <stdexcept>
+#include <assert.h>
 
 
-static const unsigned FLOATS_PER_VEC = 4;
+static const size_t VS = sizeof(vector float);
+static const size_t FLOATS_PER_VEC = 4;
 
 union v_float_u {
   vector float v;
@@ -94,17 +96,16 @@
 #if 0
 
 float
-dotprod_fff_vmx(const float *a, const float *b, unsigned n)
+dotprod_fff_vmx(const float *a, const float *b, size_t n)
 {
   float        sum = 0;
-  for (unsigned int i = 0; i < n; i++){
+  for (size_t i = 0; i < n; i++){
     sum += a[i] * b[i];
   }
   return sum;
 }
 
-#else
-
+#elif 0
 /*
  *  preconditions:
  *
@@ -113,11 +114,11 @@
  *    b  4-byte aligned
  */
 float
-dotprod_fff_vmx(const float *a, const float *b, unsigned n)
+dotprod_fff_vmx(const float *a, const float *b, size_t n)
 {
-  static const unsigned FLOATS_PER_LOOP = 1 * FLOATS_PER_VEC;
+  static const size_t FLOATS_PER_LOOP = 1 * FLOATS_PER_VEC;
 
-  unsigned loop_cnt = n / FLOATS_PER_LOOP;
+  size_t loop_cnt = n / FLOATS_PER_LOOP;
   vector float acc0 = {0, 0, 0, 0};
 
   vector unsigned char lvsl_a;
@@ -136,7 +137,7 @@
   msq_b0 = vec_ld(0, b);
   b += FLOATS_PER_VEC;
   
-  for (unsigned i = 0; i < loop_cnt; i++){
+  for (size_t i = 0; i < loop_cnt; i++){
     lsq_a0 = vec_ld(0, a);
     lsq_b0 = vec_ld(0, b);
     a += FLOATS_PER_VEC;
@@ -153,6 +154,115 @@
 
   return horizontal_add_f(acc0);
 }
+
+#else
+/*
+ *  preconditions:
+ *
+ *    n > 0 and a multiple of 4
+ *    a   4-byte aligned
+ *    b  16-byte aligned
+ */
+float
+dotprod_fff_vmx(const float *_a, const float *_b, size_t n)
+{
+  const vector float *a = (const vector float *) _a;
+  const vector float *b = (const vector float *) _b;
+
+  static const size_t UNROLL_CNT = 4;
+
+  size_t loop_cnt = n / (UNROLL_CNT * FLOATS_PER_VEC);
+  size_t nleft = n % (UNROLL_CNT * FLOATS_PER_VEC);
+
+  // printf("n = %zd, loop_cnt = %zd, nleft = %zd\n", n, loop_cnt, nleft);
+
+  // Used with vperm to build a* from p*
+  vector unsigned char lvsl_a = vec_lvsl(0, _a);
+
+  vector float p0, p1, p2, p3;
+  vector float a0, a1, a2, a3;
+  vector float b0, b1, b2, b3;
+  vector float acc0 = {0, 0, 0, 0};
+  vector float acc1 = {0, 0, 0, 0};
+  vector float acc2 = {0, 0, 0, 0};
+  vector float acc3 = {0, 0, 0, 0};
+
+  // wind in
+
+  p0 = vec_ld(0*VS, a);
+  p1 = vec_ld(1*VS, a);
+  p2 = vec_ld(2*VS, a);
+  p3 = vec_ld(3*VS, a);
+
+  a0 = vec_perm(p0, p1, lvsl_a);
+  b0 = vec_ld(0*VS, b);
+  p0 = vec_ld((UNROLL_CNT + 0)*VS, a);
+
+  for (size_t i = 0; i < loop_cnt; i++){
+
+    a1 = vec_perm(p1, p2, lvsl_a);
+    b1 = vec_ld(1*VS, b);
+    p1 = vec_ld((UNROLL_CNT + 1)*VS, a);
+    acc0 = vec_madd(a0, b0, acc0);
+
+    a2 = vec_perm(p2, p3, lvsl_a);
+    b2 = vec_ld(2*VS, b);
+    p2 = vec_ld((UNROLL_CNT + 2)*VS, a);
+    acc1 = vec_madd(a1, b1, acc1);
+
+    a3 = vec_perm(p3, p0, lvsl_a);
+    b3 = vec_ld(3*VS, b);
+    p3 = vec_ld((UNROLL_CNT + 3)*VS, a);
+    acc2 = vec_madd(a2, b2, acc2);
+
+    a += UNROLL_CNT;
+    b += UNROLL_CNT;
+
+    a0 = vec_perm(p0, p1, lvsl_a);
+    b0 = vec_ld(0*VS, b);
+    p0 = vec_ld((UNROLL_CNT + 0)*VS, a);
+    acc3 = vec_madd(a3, b3, acc3);
+  }
+
+  assert((nleft % 4) == 0);
+
+  switch (nleft/4){
+  case 0:
+    break;
+    
+  case 1:
+    acc0 = vec_madd(a0, b0, acc0);
+    break;
+
+  case 2:
+    a1 = vec_perm(p1, p2, lvsl_a);
+    b1 = vec_ld(1*VS, b);
+    acc0 = vec_madd(a0, b0, acc0);
+    acc1 = vec_madd(a1, b1, acc1);
+    break;
+
+  case 3:
+    a1 = vec_perm(p1, p2, lvsl_a);
+    b1 = vec_ld(1*VS, b);
+    acc0 = vec_madd(a0, b0, acc0);
+    a2 = vec_perm(p2, p3, lvsl_a);
+    b2 = vec_ld(2*VS, b);
+    acc1 = vec_madd(a1, b1, acc1);
+    acc2 = vec_madd(a2, b2, acc2);
+    break;
+
+  default:
+    assert(0);
+    break;
+  }
+           
+  acc0 = acc0 + acc1;
+  acc2 = acc2 + acc3;
+  acc0 = acc0 + acc2;
+
+  return horizontal_add_f(acc0);
+}
+
 #endif
 
 gr_fir_fff_vmx::gr_fir_fff_vmx()

Modified: 
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/qa_gr_fir_fff.cc
===================================================================
--- 
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/qa_gr_fir_fff.cc
   2008-07-21 21:42:29 UTC (rev 8963)
+++ 
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/qa_gr_fir_fff.cc
   2008-07-21 22:03:03 UTC (rev 8964)
@@ -143,7 +143,7 @@
 static void
 test_random_io (fir_maker_t maker)  
 {
-  const int    MAX_TAPS        = 9;
+  const int    MAX_TAPS        = 32;
   const int    OUTPUT_LEN      = 17;
   const int    INPUT_LEN       = MAX_TAPS + OUTPUT_LEN;
 
@@ -187,7 +187,7 @@
       
       for (int o = 0; o < ol; o++){
        CPPUNIT_ASSERT_DOUBLES_EQUAL (expected_output[o], actual_output[o],
-                           fabs (expected_output[o]) * 1e-4);
+                                     fabs (expected_output[o]) * 5e-3);
       }
 
       delete f1;





reply via email to

[Prev in Thread] Current Thread [Next in Thread]