[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Libcvd-members] libcvd/cvd_src utility.cc
From: |
Ethan Eade |
Subject: |
[Libcvd-members] libcvd/cvd_src utility.cc |
Date: |
Wed, 17 May 2006 09:26:18 +0000 |
CVSROOT: /cvsroot/libcvd
Module name: libcvd
Branch:
Changes by: Ethan Eade <address@hidden> 06/05/17 09:26:18
Modified files:
cvd_src : utility.cc
Log message:
Some subtle alignment fixes.
CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/libcvd/libcvd/cvd_src/utility.cc.diff?tr1=1.3&tr2=1.4&r1=text&r2=text
Patches:
Index: libcvd/cvd_src/utility.cc
diff -u libcvd/cvd_src/utility.cc:1.3 libcvd/cvd_src/utility.cc:1.4
--- libcvd/cvd_src/utility.cc:1.3 Tue May 16 14:36:34 2006
+++ libcvd/cvd_src/utility.cc Wed May 17 09:26:18 2006
@@ -17,8 +17,6 @@
using namespace std;
-namespace // anonymous, to hide symbols
-{
using CVD::is_aligned;
using CVD::steps_to_align;
template <class F, class T1, class T2, int A, int M> inline void
maybe_aligned_differences(const T1* a, const T1* b, T2* c, unsigned int count)
@@ -46,7 +44,7 @@
}
}
- template <class F, class T1, class T2, int A, int M> inline void
maybe_aligned_add_mul_add(const T1* a, const T1* b, const T1& c, T2* out,
unsigned int count)
+ template <class F, class T1, class T2, int A, int M> inline void
maybe_aligned_add_mul_add(const T1* a, const T1* b, T1 c, T2* out, unsigned int
count)
{
if (count < M*2) {
F::unaligned_add_mul_add(a,b,c,out,count);
@@ -59,7 +57,7 @@
a += steps;
b += steps;
out += steps;
- if (count < M) {
+ if (count < M || !is_aligned<16>(out)) {
F::unaligned_add_mul_add(a,b,c,out,count);
return;
}
@@ -70,7 +68,7 @@
F::unaligned_add_mul_add(a+block,b+block,c, out+block,count-block);
}
- template <class F, class T1, class T2, int A, int M> inline void
maybe_aligned_assign_mul(const T1* a, const T1& c, T2* out, unsigned int count)
+ template <class F, class T1, class T2, int A, int M> inline void
maybe_aligned_assign_mul(const T1* a, T1 c, T2* out, unsigned int count)
{
if (count < M*2) {
F::unaligned_assign_mul(a,c,out,count);
@@ -89,8 +87,9 @@
}
unsigned int block = (count/M)*M;
F::aligned_assign_mul(a,c,out,block);
- if (count > block)
+ if (count > block) {
F::unaligned_assign_mul(a+block,c, out+block,count-block);
+ }
}
template <class F, class R, class T1, int A, int M> inline R
maybe_aligned_inner_product(const T1* a, const T1* b, unsigned int count)
@@ -139,7 +138,7 @@
return sum;
}
-}
+
namespace CVD {
@@ -205,6 +204,9 @@
template <bool Aligned> inline __m128 load_ps(const void* addr) { return
_mm_loadu_ps((const float*)addr); }
template <> inline __m128 load_ps<true>(const void* addr) { return
_mm_load_ps((const float*)addr); }
+ template <bool Aligned> inline void store_ps(__m128 m, void* addr) {
return _mm_storeu_ps((float*)addr, m); }
+ template <> inline void store_ps<true>(__m128 m, void* addr) { return
_mm_store_ps((float*)addr, m); }
+
template <bool Aligned_b> void float_differences(const __m128* a, const
__m128* b, __m128* diff, unsigned int count)
{
while (count--) {
@@ -212,7 +214,7 @@
}
}
- template <bool Aligned_b> void float_add_multiple_of_sum(const __m128* a,
const __m128* b, const float& c, __m128* out, unsigned int count)
+ template <bool Aligned_b> void float_add_multiple_of_sum(const __m128* a,
const __m128* b, float c, __m128* out, unsigned int count)
{
__m128 cccc = _mm_set1_ps(c);
while (count--) {
@@ -221,13 +223,12 @@
}
}
- void float_assign_multiple(const __m128* a, const float& c, __m128* out,
unsigned int count)
+ template <bool Aligned_out> inline void float_assign_multiple(const
__m128* a, float c, __m128* out, unsigned int count)
{
- __m128 cccc = _mm_set1_ps(c);
- while (count--) {
- *out = _mm_mul_ps(*(a++), cccc);
- ++out;
- }
+ const __m128 cccc = _mm_set1_ps(c);
+ while (count--)
+ store_ps<Aligned_out>(_mm_mul_ps(*(a++), cccc), out++);
+
}
template <bool Aligned_b> double float_inner_product(const __m128* a,
const __m128* b, unsigned int count)
@@ -280,21 +281,24 @@
float_differences<false>((const __m128*)a, (const __m128*)b,
(__m128*)diff, count>>2);
}
- template <class T1, class T2> static inline void
unaligned_add_mul_add(const T1* a, const T1* b, const T1& c, T2* out, size_t
count) {
+ template <class T1, class T2> static inline void
unaligned_add_mul_add(const T1* a, const T1* b, T1 c, T2* out, size_t count) {
add_multiple_of_sum<T1,T2>(a,b,c,out,count);
}
- static inline void aligned_add_mul_add(const float* a, const float* b,
const float& c, float* out, size_t count) {
+ static inline void aligned_add_mul_add(const float* a, const float* b,
float c, float* out, size_t count) {
if (is_aligned<16>(b))
float_add_multiple_of_sum<true>((const __m128*)a, (const
__m128*)b, c, (__m128*)out, count>>2);
else
float_add_multiple_of_sum<false>((const __m128*)a, (const
__m128*)b, c, (__m128*)out, count>>2);
}
- template <class T1, class T2> static inline void
unaligned_assign_mul(const T1* a, const T1& c, T2* out, size_t count) {
+ template <class T1, class T2> static inline void
unaligned_assign_mul(const T1* a, T1 c, T2* out, size_t count) {
assign_multiple<T1,T2>(a,c,out,count);
}
- static inline void aligned_assign_mul(const float* a, const float& c,
float* out, size_t count) {
- float_assign_multiple((const __m128*)a, c, (__m128*)out, count>>2);
+ static inline void aligned_assign_mul(const float* a, float c, float*
out, size_t count) {
+ if (is_aligned<16>(out))
+ float_assign_multiple<false>((const __m128*)a, c, (__m128*)out,
count>>2);
+ else
+ float_assign_multiple<false>((const __m128*)a, c, (__m128*)out,
count>>2);
}
template <class T1> static inline double unaligned_inner_product(const
T1* a, const T1* b, size_t count) {
@@ -327,12 +331,12 @@
maybe_aligned_differences<SSE_funcs, float, float, 16,
4>(a,b,diff,size);
}
- void add_multiple_of_sum(const float* a, const float* b, const float& c,
float* out, unsigned int count)
+ void add_multiple_of_sum(const float* a, const float* b, float c, float*
out, unsigned int count)
{
maybe_aligned_add_mul_add<SSE_funcs,float,float,16,4>(a,b,c,out,count);
}
- void assign_multiple(const float* a, const float& c, float* out, unsigned
int count)
+ void assign_multiple(const float* a, float c, float* out, unsigned int
count)
{
maybe_aligned_assign_mul<SSE_funcs,float,float,16,4>(a,c,out,count);
}
@@ -359,6 +363,8 @@
template <bool Aligned> inline __m128d load_pd(const void* addr) { return
_mm_loadu_pd((const double*)addr); }
template <> inline __m128d load_pd<true>(const void* addr) { return
_mm_load_pd((const double*)addr); }
+ template <bool Aligned> inline void store_pd(__m128d m, void* addr) {
return _mm_storeu_pd((double*)addr, m); }
+ template <> inline void store_pd<true>(__m128d m, void* addr) { return
_mm_store_pd((double*)addr, m); }
template <bool Aligned_b> void int_differences(const __m128i* a, const
__m128i* b, __m128i* diff, unsigned int count)
{
@@ -374,7 +380,7 @@
}
}
- template <bool Aligned_b> void double_add_multiple_of_sum(const __m128d*
a, const __m128d* b, const double& c, __m128d* out, unsigned int count)
+ template <bool Aligned_b> void double_add_multiple_of_sum(const __m128d*
a, const __m128d* b, double c, __m128d* out, unsigned int count)
{
__m128d cc = _mm_set1_pd(c);
while (count--) {
@@ -383,13 +389,11 @@
}
}
- void double_assign_multiple(const __m128d* a, const double& c, __m128d*
out, unsigned int count)
+ template <bool Aligned_out> void double_assign_multiple(const __m128d* a,
double c, __m128d* out, unsigned int count)
{
__m128d cc = _mm_set1_pd(c);
- while (count--) {
- *out = _mm_mul_pd(*(a++), cc);
- ++out;
- }
+ while (count--)
+ store_pd<Aligned_out>(_mm_mul_pd(*(a++), cc), out++);
}
template <bool Aligned_b> double double_inner_product(const __m128d* a,
const __m128d* b, unsigned int count)
{
@@ -477,11 +481,11 @@
double_differences<false>((const __m128d*)a,(const
__m128d*)b,(__m128d*)diff,count>>1);
}
- template <class T1, class T2> static inline void
unaligned_add_mul_add(const T1* a, const T1* b, const T1& c, T2* out, size_t
count) {
+ template <class T1, class T2> static inline void
unaligned_add_mul_add(const T1* a, const T1* b, T1 c, T2* out, size_t count) {
add_multiple_of_sum<T1,T2>(a,b,c,out,count);
}
- static inline void aligned_add_mul_add(const double* a, const double*
b, const double& c, double* out, unsigned int count)
+ static inline void aligned_add_mul_add(const double* a, const double*
b, double c, double* out, unsigned int count)
{
if (is_aligned<16>(b))
double_add_multiple_of_sum<true>((const __m128d*)a, (const
__m128d*)b, c, (__m128d*)out, count>>1);
@@ -489,13 +493,16 @@
double_add_multiple_of_sum<false>((const __m128d*)a, (const
__m128d*)b, c, (__m128d*)out, count>>1);
}
- template <class T1, class T2> static inline void
unaligned_assign_mul(const T1* a, const T1& c, T2* out, size_t count) {
+ template <class T1, class T2> static inline void
unaligned_assign_mul(const T1* a, T1 c, T2* out, size_t count) {
assign_multiple<T1,T2>(a,c,out,count);
}
- static inline void aligned_assign_mul(const double* a, const double& c,
double* out, unsigned int count)
+ static inline void aligned_assign_mul(const double* a, double c,
double* out, unsigned int count)
{
- double_assign_multiple((const __m128d*)a, c, (__m128d*)out,
count>>1);
+ if (is_aligned<16>(out))
+ double_assign_multiple<true>((const __m128d*)a, c,
(__m128d*)out, count>>1);
+ else
+ double_assign_multiple<false>((const __m128d*)a, c,
(__m128d*)out, count>>1);
}
template <class T1> static inline double unaligned_inner_product(const
T1* a, const T1* b, size_t count) {
@@ -545,12 +552,12 @@
maybe_aligned_differences<SSE2_funcs, double, double, 16,
2>(a,b,diff,size);
}
- void add_multiple_of_sum(const double* a, const double* b, const double&
c, double* out, unsigned int count)
+ void add_multiple_of_sum(const double* a, const double* b, double c,
double* out, unsigned int count)
{
maybe_aligned_add_mul_add<SSE2_funcs, double, double, 16,
2>(a,b,c,out,count);
}
- void assign_multiple(const double* a, const double& c, double* out,
unsigned int count)
+ void assign_multiple(const double* a, double c, double* out, unsigned int
count)
{
maybe_aligned_assign_mul<SSE2_funcs, double, double, 16,
2>(a,c,out,count);
}