[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Freepooma-devel] [PATCH] Optimize Vector arithmetic
From: |
Richard Guenther |
Subject: |
[Freepooma-devel] [PATCH] Optimize Vector arithmetic |
Date: |
Tue, 23 Nov 2004 23:23:17 +0100 |
User-agent: |
Mozilla Thunderbird 0.8 (X11/20040926) |
Hi!
Due to some oversight(??), Vector arithmetic is severely pessimized by
explicit creation of temporary Vectors. The attached patch "fixes" that
by removing the casts to Vector<...,Full> from the VectorOperators.
This tests ok, it even generates the same code as handwritten loops, but
there has to be a reason the code was like it was?
So, please test this on your favorite Vector-using code. And report
breakage and/or performance.
Thanks,
Richard.
Index: VectorOperators.h
===================================================================
RCS file: /cvsroot/freepooma/freepooma/src/Tiny/VectorOperators.h,v
retrieving revision 1.19
diff -u -u -r1.19 VectorOperators.h
--- VectorOperators.h 1 Nov 2004 18:17:11 -0000 1.19
+++ VectorOperators.h 23 Nov 2004 22:19:35 -0000
@@ -83,18 +83,19 @@
template <int D, class T, class E> \
struct UnaryReturn< Vector<D,T,E>, TAG > \
{ \
- typedef Vector< D, typename UnaryReturn<T,TAG>::Type_t, E > Type_t; \
+ typedef Vector< D, typename UnaryReturn<T,TAG>::Type_t, \
+ UnaryVectorOp<Vector<D,T,E>,TAG> > Type_t; \
}; \
\
template <int D, class T, class E> \
-inline typename UnaryReturn< Vector<D,T,E>, TAG >::Type_t \
+inline Vector<D, typename UnaryReturn<T,TAG>::Type_t, \
+ UnaryVectorOp<Vector<D,T,E>, TAG> > \
FUNC( const Vector<D,T,E>& v1 ) \
{ \
typedef Vector<D,T,E> V1; \
typedef typename UnaryReturn<T,TAG>::Type_t T3; \
typedef Vector< D, T3, UnaryVectorOp<V1,TAG> > Expr_t; \
- typedef typename UnaryReturn<V1,TAG>::Type_t Return_t; \
- return Return_t( Expr_t(v1) ); \
+ return Expr_t(v1); \
}
POOMA_VECTOR_UNARY_OPERATOR(acos, FnArcCos)
@@ -121,7 +122,9 @@
//
// Binary operators.
-// Two things need to be done for each binary operator:
+// We build them using the BinaryVectorOp Vector engine to avoid
+// creating temporaries. Two things need to be done for each
+// binary operator:
//
// 1. Define the return type by specializing BinaryReturn.
//
@@ -134,59 +137,62 @@
#define POOMA_VECTOR_BINARY_OPERATOR(FUNC,TAG) \
\
-template <int D, class T1, class T2, class E> \
-struct BinaryReturn< Vector<D,T1,E>, Vector<D,T2,E>, TAG > \
-{ \
- typedef Vector< D, typename BinaryReturn<T1,T2,TAG>::Type_t, E > Type_t; \
-}; \
- \
template <int D, class T1, class T2, class E1, class E2> \
-inline \
-typename BinaryReturn< Vector<D,T1,E1>, Vector<D,T2,E2>, TAG >::Type_t \
-FUNC( const Vector<D,T1,E1>& v1, const Vector<D,T2,E2>& v2 ) \
+struct BinaryReturn< Vector<D,T1,E1>, Vector<D,T2,E2>, TAG > \
{ \
- typedef Vector<D,T1,E1> V1; \
- typedef Vector<D,T2,E2> V2; \
- typedef typename BinaryReturn<V1,V2,TAG>::Type_t Return_t; \
- typedef typename Return_t::Element_t T3; \
- typedef Vector< D, T3, BinaryVectorOp<V1,V2,TAG> > Expr_t; \
- return Return_t( Expr_t(v1,v2) ); \
-} \
+ typedef Vector<D, typename BinaryReturn<T1,T2,TAG>::Type_t, \
+ BinaryVectorOp<Vector<D,T1,E1>,Vector<D,T2,E2>,TAG> > Type_t;\
+}; \
\
template <int D, class T1, class T2, class E> \
struct BinaryReturn< Vector<D,T1,E>, T2, TAG > \
{ \
- typedef Vector< D, typename BinaryReturn<T1,T2,TAG>::Type_t, E > Type_t; \
+ typedef Vector<D, typename BinaryReturn<T1,T2,TAG>::Type_t, \
+ BinaryVectorOp<Vector<D,T1,E>,T2,TAG> > Type_t; \
}; \
\
template <int D, class T1, class T2, class E> \
struct BinaryReturn< T1, Vector<D,T2,E>, TAG > \
{ \
- typedef Vector< D, typename BinaryReturn<T1,T2,TAG>::Type_t, E > Type_t; \
+ typedef Vector<D, typename BinaryReturn<T1,T2,TAG>::Type_t, \
+ BinaryVectorOp<T1,Vector<D,T2,E>,TAG> > Type_t; \
}; \
\
+template <int D, class T1, class T2, class E1, class E2> \
+inline Vector<D, typename BinaryReturn<T1,T2,TAG>::Type_t, \
+ BinaryVectorOp<Vector<D,T1,E1>, Vector<D,T2,E2>, TAG> > \
+FUNC( const Vector<D,T1,E1>& v1, const Vector<D,T2,E2>& v2 ) \
+{ \
+ typedef Vector<D,T1,E1> V1; \
+ typedef Vector<D,T2,E2> V2; \
+ typedef typename BinaryReturn<T1,T2,TAG>::Type_t T3; \
+ typedef Vector< D, T3, BinaryVectorOp<V1,V2,TAG> > Expr_t; \
+ return Expr_t(v1,v2); \
+} \
+ \
template <int D, class T1, class E, class T2> \
-inline typename BinaryReturn< Vector<D,T1,E>, T2, TAG >::Type_t \
+inline Vector<D, typename BinaryReturn<T1,T2,TAG>::Type_t, \
+ BinaryVectorOp<Vector<D,T1,E>, T2, TAG> > \
FUNC( const Vector<D,T1,E>& v1, const T2& x ) \
{ \
typedef Vector<D,T1,E> V1; \
- typedef typename BinaryReturn<V1,T2,TAG>::Type_t Return_t; \
- typedef typename Return_t::Element_t T3; \
+ typedef typename BinaryReturn<T1,T2,TAG>::Type_t T3; \
typedef Vector< D, T3, BinaryVectorOp<V1,T2,TAG> > Expr_t; \
- return Return_t( Expr_t(v1,x) ); \
+ return Expr_t(v1,x); \
} \
\
template <int D, class T1, class T2, class E> \
-inline typename BinaryReturn< T1, Vector<D,T2,E>, TAG >::Type_t \
+inline Vector<D, typename BinaryReturn<T1,T2,TAG>::Type_t, \
+ BinaryVectorOp<T1, Vector<D,T2,E>, TAG> > \
FUNC( const T1& x, const Vector<D,T2,E>& v2 ) \
{ \
typedef Vector<D,T2,E> V2; \
- typedef typename BinaryReturn<T1,V2,TAG>::Type_t Return_t; \
- typedef typename Return_t::Element_t T3; \
+ typedef typename BinaryReturn<T1,T2,TAG>::Type_t T3; \
typedef Vector< D, T3, BinaryVectorOp<T1,V2,TAG> > Expr_t; \
- return Return_t( Expr_t(x,v2) ); \
+ return Expr_t(x,v2); \
}
+
//-----------------------------------------------------------------------------
POOMA_VECTOR_BINARY_OPERATOR(operator+,OpAdd)
- [Freepooma-devel] [PATCH] Optimize Vector arithmetic,
Richard Guenther <=