commit-gnuradio
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commit-gnuradio] r8971 - gnuradio/branches/developers/eb/vmx/gnuradio-c


From: eb
Subject: [Commit-gnuradio] r8971 - gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter
Date: Tue, 22 Jul 2008 00:23:06 -0600 (MDT)

Author: eb
Date: 2008-07-22 00:23:05 -0600 (Tue, 22 Jul 2008)
New Revision: 8971

Added:
   
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/dotprod_fff_altivec.c
   
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/dotprod_fff_altivec.h
Modified:
   gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/Makefile.am
   
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/gr_fir_fff_altivec.cc
Log:
working altivec gr_fir_fff.  About 3x faster on PS3

Modified: 
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/Makefile.am
===================================================================
--- 
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/Makefile.am    
    2008-07-22 03:57:22 UTC (rev 8970)
+++ 
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/Makefile.am    
    2008-07-22 06:23:05 UTC (rev 8971)
@@ -171,7 +171,8 @@
        gr_fir_sysconfig_powerpc.cc \
        gr_cpu_powerpc.cc \
        gr_fir_fff_altivec.cc \
-       gr_altivec.c
+       gr_altivec.c \
+       dotprod_fff_altivec.c
 
 powerpc_qa_CODE = \
        qa_dotprod_powerpc.cc
@@ -291,6 +292,7 @@
 
 noinst_HEADERS =                       \
        assembly.h                      \
+       dotprod_fff_altivec.h           \
        gr_fir_scc_simd.h               \
        gr_fir_scc_x86.h                \
        gr_fir_fcc_simd.h               \

Added: 
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/dotprod_fff_altivec.c
===================================================================
--- 
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/dotprod_fff_altivec.c
                              (rev 0)
+++ 
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/dotprod_fff_altivec.c
      2008-07-22 06:23:05 UTC (rev 8971)
@@ -0,0 +1,162 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+#include <dotprod_fff_altivec.h>
+#include <gr_altivec.h>
+
+/*!
+ * \param x any value
+ * \param pow2 must be a power of 2
+ * \returns \p x rounded down to a multiple of \p pow2.
+ */
+static inline size_t
+gr_p2_round_down(size_t x, size_t pow2)
+{
+  return x & -pow2;
+}
+
+
+#if 0
+
+float
+dotprod_fff_altivec(const float *a, const float *b, size_t n)
+{
+  float        sum = 0;
+  for (size_t i = 0; i < n; i++){
+    sum += a[i] * b[i];
+  }
+  return sum;
+}
+
+#else
+
+/*
+ *  preconditions:
+ *
+ *    n > 0 and a multiple of 4
+ *    a   4-byte aligned
+ *    b  16-byte aligned
+ */
+float
+dotprod_fff_altivec(const float *_a, const float *_b, size_t n)
+{
+  const vector float *a = (const vector float *) _a;
+  const vector float *b = (const vector float *) _b;
+
+  static const size_t UNROLL_CNT = 4;
+
+  n = gr_p2_round_down(n, 4);
+  size_t loop_cnt = n / (UNROLL_CNT * FLOATS_PER_VEC);
+  size_t nleft = n % (UNROLL_CNT * FLOATS_PER_VEC);
+
+  // printf("n = %zd, loop_cnt = %zd, nleft = %zd\n", n, loop_cnt, nleft);
+
+  // Used with vperm to build a* from p*
+  vector unsigned char lvsl_a = vec_lvsl(0, _a);
+
+  vector float p0, p1, p2, p3;
+  vector float a0, a1, a2, a3;
+  vector float b0, b1, b2, b3;
+  vector float acc0 = {0, 0, 0, 0};
+  vector float acc1 = {0, 0, 0, 0};
+  vector float acc2 = {0, 0, 0, 0};
+  vector float acc3 = {0, 0, 0, 0};
+
+  // wind in
+
+  p0 = vec_ld(0*VS, a);
+  p1 = vec_ld(1*VS, a);
+  p2 = vec_ld(2*VS, a);
+  p3 = vec_ld(3*VS, a);
+  a += UNROLL_CNT;
+
+  a0 = vec_perm(p0, p1, lvsl_a);
+  b0 = vec_ld(0*VS, b);
+  p0 = vec_ld(0*VS, a);
+
+  size_t i;
+  for (i = 0; i < loop_cnt; i++){
+
+    a1 = vec_perm(p1, p2, lvsl_a);
+    b1 = vec_ld(1*VS, b);
+    p1 = vec_ld(1*VS, a);
+    acc0 = vec_madd(a0, b0, acc0);
+
+    a2 = vec_perm(p2, p3, lvsl_a);
+    b2 = vec_ld(2*VS, b);
+    p2 = vec_ld(2*VS, a);
+    acc1 = vec_madd(a1, b1, acc1);
+
+    a3 = vec_perm(p3, p0, lvsl_a);
+    b3 = vec_ld(3*VS, b);
+    p3 = vec_ld(3*VS, a);
+    acc2 = vec_madd(a2, b2, acc2);
+
+    a += UNROLL_CNT;
+    b += UNROLL_CNT;
+
+    a0 = vec_perm(p0, p1, lvsl_a);
+    b0 = vec_ld(0*VS, b);
+    p0 = vec_ld(0*VS, a);
+    acc3 = vec_madd(a3, b3, acc3);
+  }
+
+  /*
+   * The compiler ought to be able to figure out that 0, 4, 8 and 12
+   * are the only possible values for nleft.
+   */
+  switch (nleft){
+  case 0:
+    break;
+    
+  case 4:
+    acc0 = vec_madd(a0, b0, acc0);
+    break;
+
+  case 8:
+    a1 = vec_perm(p1, p2, lvsl_a);
+    b1 = vec_ld(1*VS, b);
+    acc0 = vec_madd(a0, b0, acc0);
+    acc1 = vec_madd(a1, b1, acc1);
+    break;
+
+  case 12:
+    a1 = vec_perm(p1, p2, lvsl_a);
+    b1 = vec_ld(1*VS, b);
+    acc0 = vec_madd(a0, b0, acc0);
+    a2 = vec_perm(p2, p3, lvsl_a);
+    b2 = vec_ld(2*VS, b);
+    acc1 = vec_madd(a1, b1, acc1);
+    acc2 = vec_madd(a2, b2, acc2);
+    break;
+  }
+           
+  acc0 = acc0 + acc1;
+  acc2 = acc2 + acc3;
+  acc0 = acc0 + acc2;
+
+  return horizontal_add_f(acc0);
+}
+
+#endif


Property changes on: 
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/dotprod_fff_altivec.c
___________________________________________________________________
Name: svn:eol-style
   + native

Added: 
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/dotprod_fff_altivec.h
===================================================================
--- 
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/dotprod_fff_altivec.h
                              (rev 0)
+++ 
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/dotprod_fff_altivec.h
      2008-07-22 06:23:05 UTC (rev 8971)
@@ -0,0 +1,49 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#ifndef INCLUDED_DOTPROD_FFF_ALTIVEC_H
+#define INCLUDED_DOTPROD_FFF_ALTIVEC_H
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*!
+ * <pre>
+ *
+ *  preconditions:
+ *
+ *    n > 0 and a multiple of 4
+ *    a   4-byte aligned
+ *    b  16-byte aligned
+ *
+ * </pre>
+ */
+float 
+dotprod_fff_altivec(const float *a, const float *b, size_t n);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* INCLUDED_DOTPROD_FFF_ALTIVEC_H */


Property changes on: 
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/dotprod_fff_altivec.h
___________________________________________________________________
Name: svn:eol-style
   + native

Modified: 
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/gr_fir_fff_altivec.cc
===================================================================
--- 
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/gr_fir_fff_altivec.cc
      2008-07-22 03:57:22 UTC (rev 8970)
+++ 
gnuradio/branches/developers/eb/vmx/gnuradio-core/src/lib/filter/gr_fir_fff_altivec.cc
      2008-07-22 06:23:05 UTC (rev 8971)
@@ -28,137 +28,8 @@
 #include <assert.h>
 #include <gr_math.h>
 #include <gr_altivec.h>
+#include <dotprod_fff_altivec.h>
 
-extern "C" {
-
-#if 0
-
-float
-dotprod_fff_altivec(const float *a, const float *b, size_t n)
-{
-  float        sum = 0;
-  for (size_t i = 0; i < n; i++){
-    sum += a[i] * b[i];
-  }
-  return sum;
-}
-
-#else
-/*
- *  preconditions:
- *
- *    n > 0 and a multiple of 4
- *    a   4-byte aligned
- *    b  16-byte aligned
- */
-float
-dotprod_fff_altivec(const float *_a, const float *_b, size_t n)
-{
-  const vector float *a = (const vector float *) _a;
-  const vector float *b = (const vector float *) _b;
-
-  static const size_t UNROLL_CNT = 4;
-
-  n = gr_p2_round_down(n, 4);
-  size_t loop_cnt = n / (UNROLL_CNT * FLOATS_PER_VEC);
-  size_t nleft = n % (UNROLL_CNT * FLOATS_PER_VEC);
-
-  // printf("n = %zd, loop_cnt = %zd, nleft = %zd\n", n, loop_cnt, nleft);
-
-  // Used with vperm to build a* from p*
-  vector unsigned char lvsl_a = vec_lvsl(0, _a);
-
-  vector float p0, p1, p2, p3;
-  vector float a0, a1, a2, a3;
-  vector float b0, b1, b2, b3;
-  vector float acc0 = {0, 0, 0, 0};
-  vector float acc1 = {0, 0, 0, 0};
-  vector float acc2 = {0, 0, 0, 0};
-  vector float acc3 = {0, 0, 0, 0};
-
-  // wind in
-
-  register int r0vs = 0 * VS;
-  register int r1vs = 1 * VS;
-  register int r2vs = 2 * VS;
-  register int r3vs = 3 * VS;
-
-  p0 = vec_ld(r0vs, a);
-  p1 = vec_ld(r1vs, a);
-  p2 = vec_ld(r2vs, a);
-  p3 = vec_ld(r3vs, a);
-  a += UNROLL_CNT;
-
-  a0 = vec_perm(p0, p1, lvsl_a);
-  b0 = vec_ld(r0vs, b);
-  p0 = vec_ld(r0vs, a);
-
-  for (size_t i = 0; i < loop_cnt; i++){
-
-    a1 = vec_perm(p1, p2, lvsl_a);
-    b1 = vec_ld(r1vs, b);
-    p1 = vec_ld(r1vs, a);
-    acc0 = vec_madd(a0, b0, acc0);
-
-    a2 = vec_perm(p2, p3, lvsl_a);
-    b2 = vec_ld(r2vs, b);
-    p2 = vec_ld(r2vs, a);
-    acc1 = vec_madd(a1, b1, acc1);
-
-    a3 = vec_perm(p3, p0, lvsl_a);
-    b3 = vec_ld(r3vs, b);
-    p3 = vec_ld(r3vs, a);
-    acc2 = vec_madd(a2, b2, acc2);
-
-    a += UNROLL_CNT;
-    b += UNROLL_CNT;
-
-    a0 = vec_perm(p0, p1, lvsl_a);
-    b0 = vec_ld(r0vs, b);
-    p0 = vec_ld(r0vs, a);
-    acc3 = vec_madd(a3, b3, acc3);
-  }
-
-  /*
-   * The compiler ought to be able to figure out that 0, 4, 8 and 12
-   * are the only possible values for nleft.
-   */
-  switch (nleft){
-  case 0:
-    break;
-    
-  case 4:
-    acc0 = vec_madd(a0, b0, acc0);
-    break;
-
-  case 8:
-    a1 = vec_perm(p1, p2, lvsl_a);
-    b1 = vec_ld(r1vs, b);
-    acc0 = vec_madd(a0, b0, acc0);
-    acc1 = vec_madd(a1, b1, acc1);
-    break;
-
-  case 12:
-    a1 = vec_perm(p1, p2, lvsl_a);
-    b1 = vec_ld(r1vs, b);
-    acc0 = vec_madd(a0, b0, acc0);
-    a2 = vec_perm(p2, p3, lvsl_a);
-    b2 = vec_ld(r2vs, b);
-    acc1 = vec_madd(a1, b1, acc1);
-    acc2 = vec_madd(a2, b2, acc2);
-    break;
-  }
-           
-  acc0 = acc0 + acc1;
-  acc2 = acc2 + acc3;
-  acc0 = acc0 + acc2;
-
-  return horizontal_add_f(acc0);
-}
-
-#endif
-}
-
 gr_fir_fff_altivec::gr_fir_fff_altivec()
   : gr_fir_fff_generic(),
     d_naligned_taps(0), d_aligned_taps(0)





reply via email to

[Prev in Thread] Current Thread [Next in Thread]