freepooma-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Freepooma-devel] [PATCH] Optimize Vector arithmetic


From: Richard Guenther
Subject: [Freepooma-devel] [PATCH] Optimize Vector arithmetic
Date: Tue, 23 Nov 2004 23:23:17 +0100
User-agent: Mozilla Thunderbird 0.8 (X11/20040926)

Hi!

Due to some oversight(??), Vector arithmetic is severely pessimized by explicit creation of temporary Vectors. The attached patch "fixes" that by removing the casts to Vector<...,Full> from the VectorOperators. This tests ok, it even generates the same code as handwritten loops, but there has to be a reason the code was like it was?

So, please test this on your favorite Vector-using code. And report breakage and/or performance.

Thanks,
Richard.
Index: VectorOperators.h
===================================================================
RCS file: /cvsroot/freepooma/freepooma/src/Tiny/VectorOperators.h,v
retrieving revision 1.19
diff -u -u -r1.19 VectorOperators.h
--- VectorOperators.h   1 Nov 2004 18:17:11 -0000       1.19
+++ VectorOperators.h   23 Nov 2004 22:19:35 -0000
@@ -83,18 +83,19 @@
 template <int D, class T, class E>                                            \
 struct UnaryReturn< Vector<D,T,E>, TAG >                                      \
 {                                                                             \
-  typedef Vector< D, typename UnaryReturn<T,TAG>::Type_t, E > Type_t;         \
+  typedef Vector< D, typename UnaryReturn<T,TAG>::Type_t,                     \
+                  UnaryVectorOp<Vector<D,T,E>,TAG> > Type_t;                  \
 };                                                                            \
                                                                               \
 template <int D, class T, class E>                                            \
-inline typename UnaryReturn< Vector<D,T,E>, TAG >::Type_t                     \
+inline Vector<D, typename UnaryReturn<T,TAG>::Type_t,                         \
+              UnaryVectorOp<Vector<D,T,E>, TAG> >                             \
 FUNC( const Vector<D,T,E>& v1 )                                               \
 {                                                                             \
   typedef Vector<D,T,E> V1;                                                   \
   typedef typename UnaryReturn<T,TAG>::Type_t T3;                             \
   typedef Vector< D, T3, UnaryVectorOp<V1,TAG> > Expr_t;                      \
-  typedef typename UnaryReturn<V1,TAG>::Type_t Return_t;                      \
-  return Return_t( Expr_t(v1) );                                              \
+  return Expr_t(v1);                                                          \
 }
 
 POOMA_VECTOR_UNARY_OPERATOR(acos, FnArcCos)
@@ -121,7 +122,9 @@
 
 //
 // Binary operators.
-// Two things need to be done for each binary operator:
+// We build them using the BinaryVectorOp Vector engine to avoid
+// creating temporaries. Two things need to be done for each
+// binary operator:
 //
 // 1. Define the return type by specializing BinaryReturn.
 //
@@ -134,59 +137,62 @@
 
 #define POOMA_VECTOR_BINARY_OPERATOR(FUNC,TAG)                                \
                                                                               \
-template <int D, class T1, class T2, class E>                                 \
-struct BinaryReturn< Vector<D,T1,E>, Vector<D,T2,E>, TAG >                    \
-{                                                                             \
-  typedef Vector< D, typename BinaryReturn<T1,T2,TAG>::Type_t, E > Type_t;    \
-};                                                                            \
-                                                                              \
 template <int D, class T1, class T2, class E1, class E2>                      \
-inline                                                                        \
-typename BinaryReturn< Vector<D,T1,E1>, Vector<D,T2,E2>, TAG >::Type_t        \
-FUNC( const Vector<D,T1,E1>& v1, const Vector<D,T2,E2>& v2 )                  \
+struct BinaryReturn< Vector<D,T1,E1>, Vector<D,T2,E2>, TAG >                  \
 {                                                                             \
-  typedef Vector<D,T1,E1> V1;                                                 \
-  typedef Vector<D,T2,E2> V2;                                                 \
-  typedef typename BinaryReturn<V1,V2,TAG>::Type_t Return_t;                  \
-  typedef typename Return_t::Element_t T3;                                    \
-  typedef Vector< D, T3, BinaryVectorOp<V1,V2,TAG> > Expr_t;                  \
-  return Return_t( Expr_t(v1,v2) );                                           \
-}                                                                             \
+  typedef Vector<D, typename BinaryReturn<T1,T2,TAG>::Type_t,                 \
+                 BinaryVectorOp<Vector<D,T1,E1>,Vector<D,T2,E2>,TAG> > Type_t;\
+};                                                                            \
                                                                               \
 template <int D, class T1, class T2, class E>                                 \
 struct BinaryReturn< Vector<D,T1,E>, T2, TAG >                                \
 {                                                                             \
-  typedef Vector< D, typename BinaryReturn<T1,T2,TAG>::Type_t, E > Type_t;    \
+  typedef Vector<D, typename BinaryReturn<T1,T2,TAG>::Type_t,                 \
+                 BinaryVectorOp<Vector<D,T1,E>,T2,TAG> > Type_t;              \
 };                                                                            \
                                                                               \
 template <int D, class T1, class T2, class E>                                 \
 struct BinaryReturn< T1, Vector<D,T2,E>, TAG >                                \
 {                                                                             \
-  typedef Vector< D, typename BinaryReturn<T1,T2,TAG>::Type_t, E > Type_t;    \
+  typedef Vector<D, typename BinaryReturn<T1,T2,TAG>::Type_t,                 \
+                 BinaryVectorOp<T1,Vector<D,T2,E>,TAG> > Type_t;              \
 };                                                                            \
                                                                               \
+template <int D, class T1, class T2, class E1, class E2>                      \
+inline Vector<D, typename BinaryReturn<T1,T2,TAG>::Type_t,                    \
+              BinaryVectorOp<Vector<D,T1,E1>, Vector<D,T2,E2>, TAG> >         \
+FUNC( const Vector<D,T1,E1>& v1, const Vector<D,T2,E2>& v2 )                  \
+{                                                                             \
+  typedef Vector<D,T1,E1> V1;                                                 \
+  typedef Vector<D,T2,E2> V2;                                                 \
+  typedef typename BinaryReturn<T1,T2,TAG>::Type_t T3;                        \
+  typedef Vector< D, T3, BinaryVectorOp<V1,V2,TAG> > Expr_t;                  \
+  return Expr_t(v1,v2);                                                       \
+}                                                                             \
+                                                                              \
 template <int D, class T1, class E, class T2>                                 \
-inline typename BinaryReturn< Vector<D,T1,E>, T2, TAG >::Type_t               \
+inline Vector<D, typename BinaryReturn<T1,T2,TAG>::Type_t,                    \
+              BinaryVectorOp<Vector<D,T1,E>, T2, TAG> >                       \
 FUNC( const Vector<D,T1,E>& v1, const T2& x )                                 \
 {                                                                             \
   typedef Vector<D,T1,E> V1;                                                  \
-  typedef typename BinaryReturn<V1,T2,TAG>::Type_t Return_t;                  \
-  typedef typename Return_t::Element_t T3;                                    \
+  typedef typename BinaryReturn<T1,T2,TAG>::Type_t T3;                        \
   typedef Vector< D, T3, BinaryVectorOp<V1,T2,TAG> > Expr_t;                  \
-  return Return_t( Expr_t(v1,x) );                                            \
+  return Expr_t(v1,x);                                                        \
 }                                                                             \
                                                                               \
 template <int D, class T1, class T2, class E>                                 \
-inline typename BinaryReturn< T1, Vector<D,T2,E>, TAG >::Type_t               \
+inline Vector<D, typename BinaryReturn<T1,T2,TAG>::Type_t,                    \
+              BinaryVectorOp<T1, Vector<D,T2,E>, TAG> >                       \
 FUNC( const T1& x, const Vector<D,T2,E>& v2 )                                 \
 {                                                                             \
   typedef Vector<D,T2,E> V2;                                                  \
-  typedef typename BinaryReturn<T1,V2,TAG>::Type_t Return_t;                  \
-  typedef typename Return_t::Element_t T3;                                    \
+  typedef typename BinaryReturn<T1,T2,TAG>::Type_t T3;                        \
   typedef Vector< D, T3, BinaryVectorOp<T1,V2,TAG> > Expr_t;                  \
-  return Return_t( Expr_t(x,v2) );                                            \
+  return Expr_t(x,v2);                                                        \
 }
 
+
 //-----------------------------------------------------------------------------
 
 POOMA_VECTOR_BINARY_OPERATOR(operator+,OpAdd)

reply via email to

[Prev in Thread] Current Thread [Next in Thread]