libcvd-members
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[libcvd-members] libcvd Makefile.in configure.in cvd_src/fast_co...


From: Edward Rosten
Subject: [libcvd-members] libcvd Makefile.in configure.in cvd_src/fast_co...
Date: Sat, 11 Aug 2007 06:07:35 +0000

CVSROOT:        /cvsroot/libcvd
Module name:    libcvd
Changes by:     Edward Rosten <edrosten>        07/08/11 06:07:35

Modified files:
        .              : Makefile.in configure.in 
        cvd_src        : fast_corner.cxx 
Added files:
        cvd_src        : faster_corner_10.cxx faster_corner_12.cxx 
                         faster_corner_9.cxx faster_corner_utilities.h 
                         slower_corner_10.cxx slower_corner_12.cxx 
                         slower_corner_9.cxx 
Removed files:
        cvd_src        : faster_corner.cxx slower_corner.cxx 

Log message:
        Maybe fixed bug in faster_corner_9 which was previously in 
faster_corner_10.
        
        Also, rearranged code a bit to make build system nicer.

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/libcvd/Makefile.in?cvsroot=libcvd&r1=1.55&r2=1.56
http://cvs.savannah.gnu.org/viewcvs/libcvd/configure.in?cvsroot=libcvd&r1=1.93&r2=1.94
http://cvs.savannah.gnu.org/viewcvs/libcvd/cvd_src/fast_corner.cxx?cvsroot=libcvd&r1=1.15&r2=1.16
http://cvs.savannah.gnu.org/viewcvs/libcvd/cvd_src/faster_corner_10.cxx?cvsroot=libcvd&rev=1.1
http://cvs.savannah.gnu.org/viewcvs/libcvd/cvd_src/faster_corner_12.cxx?cvsroot=libcvd&rev=1.1
http://cvs.savannah.gnu.org/viewcvs/libcvd/cvd_src/faster_corner_9.cxx?cvsroot=libcvd&rev=1.1
http://cvs.savannah.gnu.org/viewcvs/libcvd/cvd_src/faster_corner_utilities.h?cvsroot=libcvd&rev=1.1
http://cvs.savannah.gnu.org/viewcvs/libcvd/cvd_src/slower_corner_10.cxx?cvsroot=libcvd&rev=1.1
http://cvs.savannah.gnu.org/viewcvs/libcvd/cvd_src/slower_corner_12.cxx?cvsroot=libcvd&rev=1.1
http://cvs.savannah.gnu.org/viewcvs/libcvd/cvd_src/slower_corner_9.cxx?cvsroot=libcvd&rev=1.1
http://cvs.savannah.gnu.org/viewcvs/libcvd/cvd_src/faster_corner.cxx?cvsroot=libcvd&r1=1.8&r2=0
http://cvs.savannah.gnu.org/viewcvs/libcvd/cvd_src/slower_corner.cxx?cvsroot=libcvd&r1=1.2&r2=0

Patches:
Index: Makefile.in
===================================================================
RCS file: /cvsroot/libcvd/libcvd/Makefile.in,v
retrieving revision 1.55
retrieving revision 1.56
diff -u -b -r1.55 -r1.56
--- Makefile.in 8 Aug 2007 20:47:26 -0000       1.55
+++ Makefile.in 11 Aug 2007 06:07:34 -0000      1.56
@@ -117,22 +117,27 @@
        CVD_OBJS+=cvd_src/fast/fast_7_detect.o                    \
                          cvd_src/fast/fast_7_score.o                     
 endif
+
 ifeq (@have_FAST_8@,yes)
        CVD_OBJS+=cvd_src/fast/fast_8_detect.o                    \
                          cvd_src/fast/fast_8_score.o                     
 endif
+
 ifeq (@have_FAST_9@,yes)
        CVD_OBJS+=cvd_src/fast/fast_9_detect.o                    \
                          cvd_src/fast/fast_9_score.o                     
 endif
+
 ifeq (@have_FAST_10@,yes)
        CVD_OBJS+=cvd_src/fast/fast_10_detect.o                    \
                          cvd_src/fast/fast_10_score.o                     
 endif
+
 ifeq (@have_FAST_11@,yes)
        CVD_OBJS+=cvd_src/fast/fast_11_detect.o                    \
                          cvd_src/fast/fast_11_score.o                     
 endif
+
 ifeq (@have_FAST_12@,yes)
        CVD_OBJS+=cvd_src/fast/fast_12_detect.o                    \
                          cvd_src/fast/fast_12_score.o                     
@@ -247,13 +252,27 @@
 endif
 
 
-ifeq (@have_sse2@,yes)
-       CVD_OBJS+=cvd_src/faster_corner.o
+ifeq (@have_sse2@@have_FAST_9@,yesyes)
+       CVD_OBJS+=cvd_src/faster_corner_9.o
 else
-       CVD_OBJS+=cvd_src/slower_corner.o
+       CVD_OBJS+=cvd_src/slower_orner_9.o
 endif
 
 
+ifeq (@have_sse2@@have_FAST_10@,yesyes)
+       CVD_OBJS+=cvd_src/faster_corner_10.o
+else
+       CVD_OBJS+=cvd_src/slower_orner_10.o
+endif
+
+ifeq (@have_sse2@@have_FAST_12@,yesyes)
+       CVD_OBJS+=cvd_src/faster_corner_12.o
+else
+       CVD_OBJS+=cvd_src/slower_orner_12.o
+endif
+
+
+
 
################################################################################
 #
 #

Index: configure.in
===================================================================
RCS file: /cvsroot/libcvd/libcvd/configure.in,v
retrieving revision 1.93
retrieving revision 1.94
diff -u -b -r1.93 -r1.94
--- configure.in        11 Aug 2007 05:25:42 -0000      1.93
+++ configure.in        11 Aug 2007 06:07:34 -0000      1.94
@@ -1135,6 +1135,7 @@
 then
        AC_SUBST(have_FAST_7, yes) 
        AC_DEFINE(CVD_HAVE_FAST_7)
+       AC_SUBST(have_fast_7)
 fi
 if echo $fasts | grep -q 8 
 then

Index: cvd_src/fast_corner.cxx
===================================================================
RCS file: /cvsroot/libcvd/libcvd/cvd_src/fast_corner.cxx,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -b -r1.15 -r1.16
--- cvd_src/fast_corner.cxx     25 Jul 2007 20:52:25 -0000      1.15
+++ cvd_src/fast_corner.cxx     11 Aug 2007 06:07:34 -0000      1.16
@@ -96,6 +96,7 @@
 }
 
 
+#ifdef CVD_HAVE_FAST_9
 void fast_corner_detect_9_nonmax(const BasicImage<byte>& I, 
std::vector<ImageRef>& corners, int barrier)
 {
        vector<ImageRef> c;
@@ -104,6 +105,7 @@
        fast_corner_score_9(I, c, barrier, s);
        nonmax_suppression(c, s, corners);
 }
+#endif
 
 #ifdef CVD_HAVE_FAST_7
 void fast_corner_detect_7(const BasicImage<byte>& I, std::vector<ImageRef>& 
corners, int barrier)

Index: cvd_src/faster_corner_10.cxx
===================================================================
RCS file: cvd_src/faster_corner_10.cxx
diff -N cvd_src/faster_corner_10.cxx
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ cvd_src/faster_corner_10.cxx        11 Aug 2007 06:07:34 -0000      1.1
@@ -0,0 +1,200 @@
+#include <cvd/fast_corner.h>
+
+#include <vector>
+#include <list>
+#include <cvd/utility.h>
+using namespace CVD;
+using namespace std;
+#include <emmintrin.h>
+
+#include "cvd_src/fast/prototypes.h"
+#include "cvd_src/faster_corner_utilities.h"
+namespace CVD
+{
+    #include "cvd_src/corner_10.h"    
+
+    template <bool Aligned> void faster_corner_detect_10(const 
BasicImage<byte>& I, std::vector<ImageRef>& corners, const int barrier)
+    {
+       const int w = I.size().x;
+       const int stride = 3*w;
+ 
+       // The compiler refuses to reserve a register for this
+       register const __m128i barriers = _mm_set1_epi8((byte)barrier);
+       const byte* const end = I[I.size().y - 3]-16;
+
+       int xend = I.size().x - 3;
+       xend -= I.size().x % 16;
+
+       for(int y=3; y < I.size().y - 3; y++)
+       {
+           for(int x=3; x < 16; x++)
+               if(is_corner_10<Less>(&I[y][x], I.row_stride(), barrier) || 
is_corner_10<Greater>(&I[y][x], I.row_stride(), barrier))
+                   corners.push_back(ImageRef(x, y));
+           
+           for(int x=16; x < xend; x++)
+           {
+               const byte* p = &I[y][x];
+               __m128i lo, hi;
+               {
+                   const __m128i here = load_si128<Aligned>((const 
__m128i*)(p));
+                   lo = _mm_subs_epu8(here, barriers);
+                   hi = _mm_adds_epu8(barriers, here);
+               }
+               unsigned int ans_b, ans_e;
+               {
+                   __m128i top = load_si128<Aligned>((const 
__m128i*)(p-stride));
+                   __m128i bottom = load_si128<Aligned>((const 
__m128i*)(p+stride));
+
+                   CHECK_BARRIER(lo, hi, top, ans_b);
+                   CHECK_BARRIER(lo, hi, bottom, ans_e);
+                   if (!(ans_b | ans_e))
+                       continue;        
+               }
+
+               unsigned int ans_m, ans_p, possible;
+               {
+                   __m128i ul = _mm_loadu_si128((const __m128i*)(p-2-2*w));
+                   __m128i lr = _mm_loadu_si128((const __m128i*)(p+2+2*w));
+                   CHECK_BARRIER(lo, hi, ul, ans_m);
+                   CHECK_BARRIER(lo, hi, lr, ans_p);
+                   possible = (ans_m & ans_b) | (ans_e & ans_p);
+                   if (!possible)
+                       continue;
+               }
+
+               unsigned int ans_o, ans_n;
+               {
+                   __m128i ll = _mm_loadu_si128((const __m128i*)(p-2+2*w));
+                   __m128i ur = _mm_loadu_si128((const __m128i*)(p+2-2*w));
+                   CHECK_BARRIER(lo, hi, ll, ans_o);
+                   CHECK_BARRIER(lo, hi, ur, ans_n);
+                   possible &= ans_o | (ans_b & ans_n);
+                   possible &= ans_n | (ans_e & ans_o);
+                   if (!possible)
+                       continue;
+               }
+     
+               unsigned int ans_h, ans_k;
+               {
+                   __m128i left = _mm_loadu_si128((const __m128i*)(p-3));
+                   __m128i right = _mm_loadu_si128((const __m128i*)(p+3));
+                   CHECK_BARRIER(lo, hi, left, ans_h);
+                   CHECK_BARRIER(lo, hi, right, ans_k);
+                   possible &= ans_h | (ans_n & ans_k & ans_p);
+                   possible &= ans_k | (ans_m & ans_h & ans_o);
+                   if (!possible)
+                       continue;
+               }
+               
+               unsigned int ans_a, ans_c;
+               {
+                   __m128i a = _mm_loadu_si128((const __m128i*)(p-1-stride));
+                   __m128i c = _mm_insert_epi16(_mm_srli_si128(a,2), *(const 
unsigned short*)(p+15-stride), 7);
+                   //__m128i c = _mm_loadu_si128((const __m128i*)(p+1-stride));
+                   CHECK_BARRIER(lo, hi, a, ans_a);
+                   CHECK_BARRIER(lo, hi, c, ans_c);
+                   possible &= ans_a | (ans_e & ans_p);
+                   possible &= ans_c | (ans_o & ans_e);
+                   if (!possible)
+                       continue;
+               }
+
+               unsigned int ans_d, ans_f;
+               {
+                   __m128i d = _mm_loadu_si128((const __m128i*)(p-1+stride));
+                   __m128i f = _mm_insert_epi16(_mm_srli_si128(d,2), *(const 
unsigned short*)(p+15+stride), 7);
+                   //__m128i f = _mm_loadu_si128((const __m128i*)(p+1+stride));
+                   CHECK_BARRIER(lo, hi, d, ans_d);
+                   CHECK_BARRIER(lo, hi, f, ans_f);
+                   const unsigned int ans_abc = ans_a & ans_b & ans_c;
+                   possible &= ans_d | (ans_abc & ans_n);
+                   possible &= ans_f | (ans_m & ans_abc);
+                   if (!possible)
+                       continue;
+               }
+
+               unsigned int ans_g, ans_i;
+               {
+                   __m128i g = _mm_loadu_si128((const __m128i*)(p-3-w));
+                   __m128i ii = _mm_loadu_si128((const __m128i*)(p-3+w));
+                   CHECK_BARRIER(lo, hi, g, ans_g);
+                   CHECK_BARRIER(lo, hi, ii, ans_i);
+                   possible &= ans_g | (ans_f & ans_p & ans_k);
+                   possible &= ans_i | (ans_c & ans_n & ans_k);
+                   if (!possible)
+                       continue;
+               }
+
+               unsigned int ans_j, ans_l;
+               {
+                   __m128i jj = _mm_loadu_si128((const __m128i*)(p+3-w));
+                   __m128i l = _mm_loadu_si128((const __m128i*)(p+3+w));
+                   CHECK_BARRIER(lo, hi, jj, ans_j);
+                   CHECK_BARRIER(lo, hi, l, ans_l);
+                   const unsigned int ans_ghi = ans_g & ans_h & ans_i;
+                   possible &= ans_j | (ans_d & ans_o & ans_ghi);
+                   possible &= ans_l | (ans_m & ans_a & ans_ghi);
+                   if (!possible)
+                       continue;
+               }
+
+               //if(possible & 0x0f) //Does this make it faster?
+               {
+                   if(possible & (1<< 0))
+                           corners.push_back(ImageRef(y, x + 0));
+                   if(possible & (1<< 1))
+                           corners.push_back(ImageRef(y, x + 1));
+                   if(possible & (1<< 2))
+                           corners.push_back(ImageRef(y, x + 2));
+                   if(possible & (1<< 3))
+                           corners.push_back(ImageRef(y, x + 3));
+                   if(possible & (1<< 4))
+                           corners.push_back(ImageRef(y, x + 4));
+                   if(possible & (1<< 5))
+                           corners.push_back(ImageRef(y, x + 5));
+                   if(possible & (1<< 6))
+                           corners.push_back(ImageRef(y, x + 6));
+                   if(possible & (1<< 7))
+                           corners.push_back(ImageRef(y, x + 7));
+               }
+               //if(possible & 0xf0) //Does this mak( ,  fast)r?
+               {
+                   if(possible & (1<< 8))
+                           corners.push_back(ImageRef(y, x + 8));
+                   if(possible & (1<< 9))
+                           corners.push_back(ImageRef(y, x + 9));
+                   if(possible & (1<<10))
+                           corners.push_back(ImageRef(y, x +10));
+                   if(possible & (1<<11))
+                           corners.push_back(ImageRef(y, x +11));
+                   if(possible & (1<<12))
+                           corners.push_back(ImageRef(y, x +12));
+                   if(possible & (1<<13))
+                           corners.push_back(ImageRef(y, x +13));
+                   if(possible & (1<<14))
+                           corners.push_back(ImageRef(y, x +14));
+                   if(possible & (1<<15))
+                           corners.push_back(ImageRef(y, x +15));
+               }
+           }
+
+           for(int x=xend; x < I.size().x - 3; x++)
+               if(is_corner_10<Less>(&I[y][x], I.row_stride(), barrier) || 
is_corner_10<Greater>(&I[y][x], I.row_stride(), barrier))
+                   corners.push_back(ImageRef(x, y));
+       }
+    }
+
+    void fast_corner_detect_10(const BasicImage<byte>& I, 
std::vector<ImageRef>& corners, int barrier)
+    {
+       if (I.size().x < 22) {
+           fast_corner_detect_plain_10(I,corners,barrier);
+           return;
+       } else if (I.size().x < 22 || I.size().y < 7)
+           return;
+
+       if (is_aligned<16>(I[0]) && is_aligned<16>(I[1]))
+           faster_corner_detect_10<true>(I, corners, barrier);
+       else
+           faster_corner_detect_10<false>(I, corners, barrier);
+    }
+}

Index: cvd_src/faster_corner_12.cxx
===================================================================
RCS file: cvd_src/faster_corner_12.cxx
diff -N cvd_src/faster_corner_12.cxx
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ cvd_src/faster_corner_12.cxx        11 Aug 2007 06:07:34 -0000      1.1
@@ -0,0 +1,127 @@
+#include <cvd/fast_corner.h>
+
+#include <vector>
+#include <list>
+#include <cvd/utility.h>
+using namespace CVD;
+using namespace std;
+#include <emmintrin.h>
+
+#include "cvd_src/fast/prototypes.h"
+#include "cvd_src/faster_corner_utilities.h"
+namespace CVD
+{
+    #include "cvd_src/corner_12.h"    
+
+    template <int I, int N> struct BitCheck {
+       template <class C> static inline void eval(unsigned int three, const 
byte* p, const int w, const int barrier, C& corners) {
+           const int BIT = 1<<I;
+           if (three & BIT) {
+               if (three & (BIT << 16)) {
+                   if (is_corner_12<Greater>(p, w, *p+barrier)) 
+                       corners.push_back(p);
+               } else {
+                   if (is_corner_12<Less>(p, w, *p-barrier))
+                       corners.push_back(p);
+               }
+           }
+           BitCheck<I+1,N>::eval(three, p+1, w, barrier, corners);
+       }
+    };
+
+    template <int N> struct BitCheck<N,N> { 
+       template <class C> static inline void eval(unsigned int three, const 
byte* p, const int w, const int barrier, C& corners) {} 
+    };
+
+    template <int CHUNKS, class C> inline void process_16(unsigned int three, 
const byte* p, const int w, const int barrier, C& corners)
+    {    
+       three |= (three >> 16);
+       const int BITS = 16/CHUNKS;
+       const int mask = ((1<<BITS)-1);
+       for (int i=0; i<CHUNKS; ++i) {
+           if (three & mask)
+               BitCheck<0,BITS>::eval(three, p, w, barrier, corners);
+           p += BITS;
+           three >>= BITS;
+       }
+    }
+
+    template <bool Aligned> void faster_corner_detect_12(const 
BasicImage<byte>& I, std::vector<ImageRef>& corners, int barrier)
+    {
+       const int w = I.size().x;
+       const int stride = 3*w;
+       typedef std::list<const byte*> Passed;
+       Passed passed;
+    
+       // The compiler refuses to reserve a register for this,
+       // even though xmm6 and xmm7 go unused.
+       // It loads it from memory each time.  I am stymied.
+       register const __m128i barriers = _mm_set1_epi8((byte)barrier);
+
+       for (int i=3; i<I.size().y-3; ++i) {
+           const byte* p = I[i];
+           for (int j=0; j<w/16; ++j, p+=16) {
+               __m128i lo, hi;
+               {
+                   const __m128i here = load_si128<Aligned>((const 
__m128i*)(p));
+                   lo = _mm_subs_epu8(here, barriers);
+                   hi = _mm_adds_epu8(barriers, here);
+               }
+               const __m128i above = load_si128<Aligned>((const 
__m128i*)(p-stride));
+               const __m128i below = load_si128<Aligned>((const 
__m128i*)(p+stride));
+               unsigned int up_flags, down_flags;
+               CHECK_BARRIER(lo, hi, above, up_flags);
+               CHECK_BARRIER(lo, hi, below, down_flags);
+               const unsigned int either_ud = up_flags | down_flags;
+               if (either_ud) {
+                   unsigned int left_flags;
+                   {
+                       const __m128i other = _mm_loadu_si128((const 
__m128i*)(p-3));
+                       CHECK_BARRIER(lo, hi, other, left_flags);
+                   }
+                   const unsigned int both_ud = up_flags & down_flags;
+                   if (both_ud | (either_ud&left_flags)) {
+                       unsigned int right_flags;
+                       {
+                           const __m128i other = _mm_loadu_si128((const 
__m128i*)(p+3));
+                           CHECK_BARRIER(lo, hi, other, right_flags);
+                       }
+                       const unsigned int at_least_three = (either_ud & 
(left_flags & right_flags)) | (both_ud & (left_flags | right_flags));
+                       if (at_least_three) {
+                           process_16<4>(at_least_three, p, w, barrier, 
passed);
+                       }
+                   }
+               }
+           }
+           passed.push_back(0);
+       }
+       corners.reserve(passed.size());
+       int row = 3;
+       const byte* row_start = I[3];
+       for (Passed::iterator it = passed.begin(); it != passed.end(); ++it) {
+           if (*it == 0) {
+               row_start=I[++row];
+               continue;
+           }
+           int x = *it - row_start;
+           if (x > 2 && x < w-3)
+               corners.push_back(ImageRef(x, row));
+       }
+    }
+
+
+    void fast_corner_detect_12(const BasicImage<byte>& I, 
std::vector<ImageRef>& corners, int barrier)
+    {
+       if (I.size().x < 22) {
+           fast_corner_detect_plain_12(I,corners,barrier);
+           return;
+       } else if (I.size().x < 22 || I.size().y < 7)
+           return;
+
+       if (is_aligned<16>(I[0]) && is_aligned<16>(I[1]))
+           faster_corner_detect_12<true>(I, corners, barrier);
+       else
+           faster_corner_detect_12<false>(I, corners, barrier);
+    }
+}
+

Index: cvd_src/faster_corner_9.cxx
===================================================================
RCS file: cvd_src/faster_corner_9.cxx
diff -N cvd_src/faster_corner_9.cxx
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ cvd_src/faster_corner_9.cxx 11 Aug 2007 06:07:34 -0000      1.1
@@ -0,0 +1,257 @@
+#include <cvd/fast_corner.h>
+
+#include <vector>
+#include <list>
+#include <cvd/utility.h>
+using namespace CVD;
+using namespace std;
+
+#include <emmintrin.h>
+
+#include "cvd_src/fast/prototypes.h"
+#include "cvd_src/faster_corner_utilities.h"
+
+namespace CVD
+{
+    #include "cvd_src/corner_9.h"    
+
+    template <bool Aligned> void faster_corner_detect_9(const 
BasicImage<byte>& I, std::vector<ImageRef>& corners, const int barrier)
+    {
+       const int w = I.size().x;
+       const int stride = 3*w;
+ 
+       // The compiler refuses to reserve a register for this
+       register const __m128i barriers = _mm_set1_epi8((byte)barrier);
+       const byte* const end = I[I.size().y - 3]-16;
+
+       int xend = I.size().x - 3;
+       xend -= I.size().x % 16;
+
+       for(int y=3; y < I.size().y - 3; y++)
+       {
+           for(int x=3; x < 16; x++)
+               if(is_corner_9<Less>(&I[y][x], I.row_stride(), barrier) || 
is_corner_9<Greater>(&I[y][x], I.row_stride(), barrier))
+                   corners.push_back(ImageRef(x, y));
+           
+           for(int x=16; x < xend; x++)
+           {
+               const byte* p = &I[y][x];
+               __m128i lo, hi;
+               {
+                   const __m128i here = load_si128<Aligned>((const 
__m128i*)(p));
+                   lo = _mm_subs_epu8(here, barriers);
+                   hi = _mm_adds_epu8(barriers, here);
+               }
+               unsigned int ans_0, ans_8, possible;
+               {
+                   __m128i top = load_si128<Aligned>((const 
__m128i*)(p-stride));
+                   __m128i bottom = load_si128<Aligned>((const 
__m128i*)(p+stride));
+
+                   CHECK_BARRIER(lo, hi, top, ans_0);
+                   CHECK_BARRIER(lo, hi, bottom, ans_8);
+                   possible = ans_0 | ans_8;
+                   if (!possible)
+                       continue;        
+               }
+
+               unsigned int ans_15, ans_1;
+               {
+                   __m128i a = _mm_loadu_si128((const __m128i*)(p-1-stride));
+                   __m128i c = _mm_insert_epi16(_mm_srli_si128(a,2), *(const 
unsigned short*)(p+15-stride), 7);
+                   CHECK_BARRIER(lo, hi, a, ans_15);
+                   CHECK_BARRIER(lo, hi, c, ans_1);
+                   possible &= ans_8 | (ans_15 & ans_1);
+                   if (!possible)
+                       continue;
+               }
+
+               unsigned int ans_9, ans_7;
+               {
+                   __m128i d = _mm_loadu_si128((const __m128i*)(p-1+stride));
+                   __m128i f = _mm_insert_epi16(_mm_srli_si128(d,2), *(const 
unsigned short*)(p+15+stride), 7);
+                   CHECK_BARRIER(lo, hi, d, ans_9);
+                   CHECK_BARRIER(lo, hi, f, ans_7);
+                   possible &= ans_9 | (ans_0 & ans_1);
+                   possible &= ans_7 | (ans_15 & ans_0);
+                   if (!possible)
+                       continue;
+               }
+
+               unsigned int ans_12, ans_4;
+               {
+                   __m128i left = _mm_loadu_si128((const __m128i*)(p-3));
+                   __m128i right = _mm_loadu_si128((const __m128i*)(p+3));
+                   CHECK_BARRIER(lo, hi, left, ans_12);
+                   CHECK_BARRIER(lo, hi, right, ans_4);
+                   possible &= ans_12 | (ans_4 & (ans_1 | ans_7));
+                   possible &= ans_4 | (ans_12 & (ans_9 | ans_15));
+                   if (!possible)
+                       continue;
+               }
+
+               unsigned int ans_14, ans_6;
+               {
+                   __m128i ul = _mm_loadu_si128((const __m128i*)(p-2-2*w));
+                   __m128i lr = _mm_loadu_si128((const __m128i*)(p+2+2*w));
+                   CHECK_BARRIER(lo, hi, ul, ans_14);
+                   CHECK_BARRIER(lo, hi, lr, ans_6);
+                   {
+                       const unsigned int ans_6_7 = ans_6 & ans_7;
+                       possible &= ans_14 | (ans_6_7 & (ans_4 | (ans_8 & 
ans_9)));
+                       possible &= ans_1 | (ans_6_7) | ans_12;
+                   }
+                   {
+                       const unsigned int ans_14_15 = ans_14 & ans_15;
+                       possible &= ans_6 | (ans_14_15 & (ans_12 | (ans_0 & 
ans_1)));
+                       possible &= ans_9 | (ans_14_15) | ans_4;
+                   }
+                   if (!possible)
+                       continue;
+               }
+
+               unsigned int ans_10, ans_2;
+               {
+                   __m128i ll = _mm_loadu_si128((const __m128i*)(p-2+2*w));
+                   __m128i ur = _mm_loadu_si128((const __m128i*)(p+2-2*w));
+                   CHECK_BARRIER(lo, hi, ll, ans_10);
+                   CHECK_BARRIER(lo, hi, ur, ans_2);
+                   {
+                       const unsigned int ans_1_2 = ans_1 & ans_2;
+                       possible &= ans_10 | (ans_1_2 & ((ans_0 & ans_15) | 
ans_4));
+                       possible &= ans_12 | (ans_1_2) | (ans_6 & ans_7);
+                   }
+                   {
+                       const unsigned int ans_9_10 = ans_9 & ans_10;
+                       possible &= ans_2 | (ans_9_10 & ((ans_7 & ans_8) | 
ans_12));
+                       possible &= ans_4 | (ans_9_10) | (ans_14 & ans_15);
+                   }
+                   possible &= ans_8 | ans_14 | ans_2;
+                   possible &= ans_0 | ans_10 | ans_6;
+                   if (!possible)
+                       continue;
+               }
+
+               unsigned int ans_13, ans_5;
+               {
+                   __m128i g = _mm_loadu_si128((const __m128i*)(p-3-w));
+                   __m128i l = _mm_loadu_si128((const __m128i*)(p+3+w));
+                   CHECK_BARRIER(lo, hi, g, ans_13);
+                   CHECK_BARRIER(lo, hi, l, ans_5);
+                   const unsigned int ans_15_0 = ans_15 & ans_0; 
+                   const unsigned int ans_7_8 = ans_7 & ans_8;
+                   {
+                       const unsigned int ans_12_13 = ans_12 & ans_13;
+                       possible &= ans_5 | (ans_12_13 & ans_14 & ((ans_15_0) | 
ans_10));
+                       possible &= ans_7 | (ans_1 & ans_2) | (ans_12_13);
+                       possible &= ans_2 | (ans_12_13) | (ans_7_8);
+                   }
+                   {
+                       const unsigned int ans_4_5 = ans_4 & ans_5;
+                       const unsigned int ans_9_10 = ans_9 & ans_10;
+                       possible &= ans_13 | (ans_4_5 & ans_6 & ((ans_7_8) | 
ans_2));
+                       possible &= ans_15 | (ans_4_5) | (ans_9_10);
+                       possible &= ans_10 | (ans_4_5) | (ans_15_0);
+                       possible &= ans_15 | (ans_9_10) | (ans_4_5);
+                   }
+
+                   possible &= ans_8 | (ans_13 & ans_14) | ans_2;
+                   possible &= ans_0 | (ans_5 & ans_6) | ans_10;
+                   if (!possible)
+                       continue;
+               }
+
+
+               unsigned int ans_11, ans_3;
+               {
+                   __m128i ii = _mm_loadu_si128((const __m128i*)(p-3+w));
+                   __m128i jj = _mm_loadu_si128((const __m128i*)(p+3-w));
+                   CHECK_BARRIER(lo, hi, ii, ans_11);
+                   CHECK_BARRIER(lo, hi, jj, ans_3);
+                   {
+                       const unsigned int ans_2_3 = ans_2 & ans_3;
+                       possible &= ans_11 | (ans_2_3 & ans_4 & ((ans_0 & 
ans_1) | (ans_5 & ans_6)));
+                       possible &= ans_13 | (ans_7 & ans_8) | (ans_2_3);
+                       possible &= ans_8 | (ans_2_3) | (ans_13 & ans_14);
+                   }
+                   {
+                       const unsigned int ans_11_12 = ans_11 & ans_12;
+                       possible &= ans_3 | (ans_10 & ans_11_12 & ((ans_8 & 
ans_9) | (ans_13 & ans_14)));
+                       possible &= ans_1 | (ans_11_12) | (ans_6 & ans_7);
+                       possible &= ans_6 | (ans_0 & ans_1) | (ans_11_12);
+                   }
+                   {
+                       const unsigned int ans_3_4 = ans_3 & ans_4;
+                       possible &= ans_9 | (ans_3_4) | (ans_14 & ans_15);
+                       possible &= ans_14 | (ans_8 & ans_9) | (ans_3_4);
+                   }
+                   {
+                       const unsigned int ans_10_11 = ans_10 & ans_11;
+                       possible &= ans_5 | (ans_15 & ans_0) | (ans_10_11);
+                       possible &= ans_0 | (ans_10_11) | (ans_5 & ans_6);
+                   }
+                   if (!possible)
+                       continue;
+
+               }
+
+               //if(possible & 0x0f) //Does this make it faster?
+               {
+                   if(possible & (1<< 0))
+                           corners.push_back(ImageRef(y, x + 0));
+                   if(possible & (1<< 1))
+                           corners.push_back(ImageRef(y, x + 1));
+                   if(possible & (1<< 2))
+                           corners.push_back(ImageRef(y, x + 2));
+                   if(possible & (1<< 3))
+                           corners.push_back(ImageRef(y, x + 3));
+                   if(possible & (1<< 4))
+                           corners.push_back(ImageRef(y, x + 4));
+                   if(possible & (1<< 5))
+                           corners.push_back(ImageRef(y, x + 5));
+                   if(possible & (1<< 6))
+                           corners.push_back(ImageRef(y, x + 6));
+                   if(possible & (1<< 7))
+                           corners.push_back(ImageRef(y, x + 7));
+               }
+               //if(possible & 0xf0) //Does this mak( ,  fast)r?
+               {
+                   if(possible & (1<< 8))
+                           corners.push_back(ImageRef(y, x + 8));
+                   if(possible & (1<< 9))
+                           corners.push_back(ImageRef(y, x + 9));
+                   if(possible & (1<<10))
+                           corners.push_back(ImageRef(y, x +10));
+                   if(possible & (1<<11))
+                           corners.push_back(ImageRef(y, x +11));
+                   if(possible & (1<<12))
+                           corners.push_back(ImageRef(y, x +12));
+                   if(possible & (1<<13))
+                           corners.push_back(ImageRef(y, x +13));
+                   if(possible & (1<<14))
+                           corners.push_back(ImageRef(y, x +14));
+                   if(possible & (1<<15))
+                           corners.push_back(ImageRef(y, x +15));
+               }
+           }
+
+           for(int x=xend; x < I.size().x - 3; x++)
+               if(is_corner_9<Less>(&I[y][x], I.row_stride(), barrier) || 
is_corner_9<Greater>(&I[y][x], I.row_stride(), barrier))
+                   corners.push_back(ImageRef(x, y));
+       }
+    }
+
+    void fast_corner_detect_9(const BasicImage<byte>& I, 
std::vector<ImageRef>& corners, int barrier)
+    {
+       if (I.size().x < 22) {
+           fast_corner_detect_plain_9(I,corners,barrier);
+           return;
+       } else if (I.size().x < 22 || I.size().y < 7)
+           return;
+
+       if (is_aligned<16>(I[0]) && is_aligned<16>(I[1]))
+           faster_corner_detect_9<true>(I, corners, barrier);
+       else
+           faster_corner_detect_9<false>(I, corners, barrier);
+    }
+
+}

Index: cvd_src/faster_corner_utilities.h
===================================================================
RCS file: cvd_src/faster_corner_utilities.h
diff -N cvd_src/faster_corner_utilities.h
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ cvd_src/faster_corner_utilities.h   11 Aug 2007 06:07:35 -0000      1.1
@@ -0,0 +1,26 @@
+#ifndef CVD_INTERNAL_INC_FAST_CORNER_UTILITIES_H
+#define CVD_INTERNAL_INC_FAST_CORNER_UTILITIES_H
+
+#include <emmintrin.h>
+
+namespace CVD
+{
+
+    struct Less { template <class T1, class T2> static bool eval(const T1 a, 
const T2 b) { return a < b; }};
+    struct Greater { template <class T1, class T2> static bool eval(const T1 
a, const T2 b) { return b < a; }};
+
+#define CHECK_BARRIER(lo, hi, other, flags)                            \
+    {                                                                  \
+       __m128i diff = _mm_subs_epu8(lo, other);                        \
+       __m128i diff2 = _mm_subs_epu8(other, hi);                       \
+       __m128i z = _mm_setzero_si128();                                \
+       diff = _mm_cmpeq_epi8(diff, z);                                 \
+       diff2 = _mm_cmpeq_epi8(diff2, z);                               \
+       flags = ~(_mm_movemask_epi8(diff) | (_mm_movemask_epi8(diff2) << 16)); \
+    }
+     
+    template <bool Aligned> inline __m128i load_si128(const void* addr) { 
return _mm_loadu_si128((const __m128i*)addr); }
+    template <> inline __m128i load_si128<true>(const void* addr) { return 
_mm_load_si128((const __m128i*)addr); }
+
+}
+#endif

Index: cvd_src/slower_corner_10.cxx
===================================================================
RCS file: cvd_src/slower_corner_10.cxx
diff -N cvd_src/slower_corner_10.cxx
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ cvd_src/slower_corner_10.cxx        11 Aug 2007 06:07:35 -0000      1.1
@@ -0,0 +1,10 @@
+#include <cvd/fast_corner.h>
+#include "cvd_src/fast/prototypes.h"
+
+namespace CVD
+{
+       void fast_corner_detect_10(const SubImage<byte>& i, 
std::vector<ImageRef>& corners, int b)
+       {
+               fast_corner_detect_plain_10(i, corners, b);
+       }
+}

Index: cvd_src/slower_corner_12.cxx
===================================================================
RCS file: cvd_src/slower_corner_12.cxx
diff -N cvd_src/slower_corner_12.cxx
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ cvd_src/slower_corner_12.cxx        11 Aug 2007 06:07:35 -0000      1.1
@@ -0,0 +1,10 @@
+#include <cvd/fast_corner.h>
+#include "cvd_src/fast/prototypes.h"
+
+namespace CVD
+{
+       void fast_corner_detect_12(const SubImage<byte>& i, 
std::vector<ImageRef>& corners, int b)
+       {
+               fast_corner_detect_plain_12(i, corners, b);
+       }
+}

Index: cvd_src/slower_corner_9.cxx
===================================================================
RCS file: cvd_src/slower_corner_9.cxx
diff -N cvd_src/slower_corner_9.cxx
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ cvd_src/slower_corner_9.cxx 11 Aug 2007 06:07:35 -0000      1.1
@@ -0,0 +1,10 @@
+#include <cvd/fast_corner.h>
+#include "cvd_src/fast/prototypes.h"
+
+namespace CVD
+{
+       void fast_corner_detect_9(const SubImage<byte>& i, 
std::vector<ImageRef>& corners, int b)
+       {
+               fast_corner_detect_plain_9(i, corners, b);
+       }
+}

Index: cvd_src/faster_corner.cxx
===================================================================
RCS file: cvd_src/faster_corner.cxx
diff -N cvd_src/faster_corner.cxx
--- cvd_src/faster_corner.cxx   3 Aug 2007 23:13:24 -0000       1.8
+++ /dev/null   1 Jan 1970 00:00:00 -0000
@@ -1,567 +0,0 @@
-#include <cvd/fast_corner.h>
-
-#include <vector>
-#include <list>
-#include <cvd/utility.h>
-using namespace CVD;
-using namespace std;
-
-#if (CVD_HAVE_EMMINTRIN && CVD_HAVE_SSE2)
-#include <emmintrin.h>
-
-#include "cvd_src/fast/prototypes.h"
-
-namespace CVD
-{
-    #include "cvd_src/corner_9.h"    
-    #include "cvd_src/corner_10.h"    
-    #include "cvd_src/corner_12.h"    
-
-    struct Less { template <class T1, class T2> static bool eval(const T1 a, 
const T2 b) { return a < b; }};
-    struct Greater { template <class T1, class T2> static bool eval(const T1 
a, const T2 b) { return b < a; }};
-
-    template <int I, int N> struct BitCheck {
-       template <class C> static inline void eval(unsigned int three, const 
byte* p, const int w, const int barrier, C& corners) {
-           const int BIT = 1<<I;
-           if (three & BIT) {
-               if (three & (BIT << 16)) {
-                   if (is_corner_12<Greater>(p, w, *p+barrier)) 
-                       corners.push_back(p);
-               } else {
-                   if (is_corner_12<Less>(p, w, *p-barrier))
-                       corners.push_back(p);
-               }
-           }
-           BitCheck<I+1,N>::eval(three, p+1, w, barrier, corners);
-       }
-    };
-
-    template <int N> struct BitCheck<N,N> { 
-       template <class C> static inline void eval(unsigned int three, const 
byte* p, const int w, const int barrier, C& corners) {} 
-    };
-
-    template <int CHUNKS, class C> inline void process_16(unsigned int three, 
const byte* p, const int w, const int barrier, C& corners)
-    {    
-       three |= (three >> 16);
-       const int BITS = 16/CHUNKS;
-       const int mask = ((1<<BITS)-1);
-       for (int i=0; i<CHUNKS; ++i) {
-           if (three & mask)
-               BitCheck<0,BITS>::eval(three, p, w, barrier, corners);
-           p += BITS;
-           three >>= BITS;
-       }
-    }
-
-#define CHECK_BARRIER(lo, hi, other, flags)                            \
-    {                                                                  \
-       __m128i diff = _mm_subs_epu8(lo, other);                        \
-       __m128i diff2 = _mm_subs_epu8(other, hi);                       \
-       __m128i z = _mm_setzero_si128();                                \
-       diff = _mm_cmpeq_epi8(diff, z);                                 \
-       diff2 = _mm_cmpeq_epi8(diff2, z);                               \
-       flags = ~(_mm_movemask_epi8(diff) | (_mm_movemask_epi8(diff2) << 16)); \
-    }
-    
-    
-    template <bool Aligned> inline __m128i load_si128(const void* addr) { 
return _mm_loadu_si128((const __m128i*)addr); }
-    template <> inline __m128i load_si128<true>(const void* addr) { return 
_mm_load_si128((const __m128i*)addr); }
-       
-    #ifdef CVD_HAVE_FAST_12
-
-    template <bool Aligned> void faster_corner_detect_12(const 
BasicImage<byte>& I, std::vector<ImageRef>& corners, int barrier)
-    {
-       const int w = I.size().x;
-       const int stride = 3*w;
-       typedef std::list<const byte*> Passed;
-       Passed passed;
-    
-       // The compiler refuses to reserve a register for this,
-       // even though xmm6 and xmm7 go unused.
-       // It loads it from memory each time.  I am stymied.
-       register const __m128i barriers = _mm_set1_epi8((byte)barrier);
-
-       for (int i=3; i<I.size().y-3; ++i) {
-           const byte* p = I[i];
-           for (int j=0; j<w/16; ++j, p+=16) {
-               __m128i lo, hi;
-               {
-                   const __m128i here = load_si128<Aligned>((const 
__m128i*)(p));
-                   lo = _mm_subs_epu8(here, barriers);
-                   hi = _mm_adds_epu8(barriers, here);
-               }
-               const __m128i above = load_si128<Aligned>((const 
__m128i*)(p-stride));
-               const __m128i below = load_si128<Aligned>((const 
__m128i*)(p+stride));
-               unsigned int up_flags, down_flags;
-               CHECK_BARRIER(lo, hi, above, up_flags);
-               CHECK_BARRIER(lo, hi, below, down_flags);
-               const unsigned int either_ud = up_flags | down_flags;
-               if (either_ud) {
-                   unsigned int left_flags;
-                   {
-                       const __m128i other = _mm_loadu_si128((const 
__m128i*)(p-3));
-                       CHECK_BARRIER(lo, hi, other, left_flags);
-                   }
-                   const unsigned int both_ud = up_flags & down_flags;
-                   if (both_ud | (either_ud&left_flags)) {
-                       unsigned int right_flags;
-                       {
-                           const __m128i other = _mm_loadu_si128((const 
__m128i*)(p+3));
-                           CHECK_BARRIER(lo, hi, other, right_flags);
-                       }
-                       const unsigned int at_least_three = (either_ud & 
(left_flags & right_flags)) | (both_ud & (left_flags | right_flags));
-                       if (at_least_three) {
-                           process_16<4>(at_least_three, p, w, barrier, 
passed);
-                       }
-                   }
-               }
-           }
-           passed.push_back(0);
-       }
-       corners.reserve(passed.size());
-       int row = 3;
-       const byte* row_start = I[3];
-       for (Passed::iterator it = passed.begin(); it != passed.end(); ++it) {
-           if (*it == 0) {
-               row_start=I[++row];
-               continue;
-           }
-           int x = *it - row_start;
-           if (x > 2 && x < w-3)
-               corners.push_back(ImageRef(x, row));
-       }
-    }
-
-
-    void fast_corner_detect_12(const BasicImage<byte>& I, 
std::vector<ImageRef>& corners, int barrier)
-    {
-       if (I.size().x < 22) {
-           fast_corner_detect_plain_12(I,corners,barrier);
-           return;
-       } else if (I.size().x < 22 || I.size().y < 7)
-           return;
-
-       if (is_aligned<16>(I[0]) && is_aligned<16>(I[1]))
-           faster_corner_detect_12<true>(I, corners, barrier);
-       else
-           faster_corner_detect_12<false>(I, corners, barrier);
-    }
-    #endif
-
-    #ifdef CVD_HAVE_FAST_10
-
-    template <bool Aligned> void faster_corner_detect_10(const 
BasicImage<byte>& I, std::vector<ImageRef>& corners, const int barrier)
-    {
-       const int w = I.size().x;
-       const int stride = 3*w;
- 
-       // The compiler refuses to reserve a register for this
-       register const __m128i barriers = _mm_set1_epi8((byte)barrier);
-       const byte* const end = I[I.size().y - 3]-16;
-
-       int xend = I.size().x - 3;
-       xend -= I.size().x % 16;
-
-       for(int y=3; y < I.size().y - 3; y++)
-       {
-           for(int x=3; x < 16; x++)
-               if(is_corner_10<Less>(&I[y][x], I.row_stride(), barrier) || 
is_corner_10<Greater>(&I[y][x], I.row_stride(), barrier))
-                   corners.push_back(ImageRef(x, y));
-           
-           for(int x=16; x < xend; x++)
-           {
-               const byte* p = &I[y][x];
-               __m128i lo, hi;
-               {
-                   const __m128i here = load_si128<Aligned>((const 
__m128i*)(p));
-                   lo = _mm_subs_epu8(here, barriers);
-                   hi = _mm_adds_epu8(barriers, here);
-               }
-               unsigned int ans_b, ans_e;
-               {
-                   __m128i top = load_si128<Aligned>((const 
__m128i*)(p-stride));
-                   __m128i bottom = load_si128<Aligned>((const 
__m128i*)(p+stride));
-
-                   CHECK_BARRIER(lo, hi, top, ans_b);
-                   CHECK_BARRIER(lo, hi, bottom, ans_e);
-                   if (!(ans_b | ans_e))
-                       continue;        
-               }
-
-               unsigned int ans_m, ans_p, possible;
-               {
-                   __m128i ul = _mm_loadu_si128((const __m128i*)(p-2-2*w));
-                   __m128i lr = _mm_loadu_si128((const __m128i*)(p+2+2*w));
-                   CHECK_BARRIER(lo, hi, ul, ans_m);
-                   CHECK_BARRIER(lo, hi, lr, ans_p);
-                   possible = (ans_m & ans_b) | (ans_e & ans_p);
-                   if (!possible)
-                       continue;
-               }
-
-               unsigned int ans_o, ans_n;
-               {
-                   __m128i ll = _mm_loadu_si128((const __m128i*)(p-2+2*w));
-                   __m128i ur = _mm_loadu_si128((const __m128i*)(p+2-2*w));
-                   CHECK_BARRIER(lo, hi, ll, ans_o);
-                   CHECK_BARRIER(lo, hi, ur, ans_n);
-                   possible &= ans_o | (ans_b & ans_n);
-                   possible &= ans_n | (ans_e & ans_o);
-                   if (!possible)
-                       continue;
-               }
-     
-               unsigned int ans_h, ans_k;
-               {
-                   __m128i left = _mm_loadu_si128((const __m128i*)(p-3));
-                   __m128i right = _mm_loadu_si128((const __m128i*)(p+3));
-                   CHECK_BARRIER(lo, hi, left, ans_h);
-                   CHECK_BARRIER(lo, hi, right, ans_k);
-                   possible &= ans_h | (ans_n & ans_k & ans_p);
-                   possible &= ans_k | (ans_m & ans_h & ans_o);
-                   if (!possible)
-                       continue;
-               }
-               
-               unsigned int ans_a, ans_c;
-               {
-                   __m128i a = _mm_loadu_si128((const __m128i*)(p-1-stride));
-                   __m128i c = _mm_insert_epi16(_mm_srli_si128(a,2), *(const 
unsigned short*)(p+15-stride), 7);
-                   //__m128i c = _mm_loadu_si128((const __m128i*)(p+1-stride));
-                   CHECK_BARRIER(lo, hi, a, ans_a);
-                   CHECK_BARRIER(lo, hi, c, ans_c);
-                   possible &= ans_a | (ans_e & ans_p);
-                   possible &= ans_c | (ans_o & ans_e);
-                   if (!possible)
-                       continue;
-               }
-
-               unsigned int ans_d, ans_f;
-               {
-                   __m128i d = _mm_loadu_si128((const __m128i*)(p-1+stride));
-                   __m128i f = _mm_insert_epi16(_mm_srli_si128(d,2), *(const 
unsigned short*)(p+15+stride), 7);
-                   //__m128i f = _mm_loadu_si128((const __m128i*)(p+1+stride));
-                   CHECK_BARRIER(lo, hi, d, ans_d);
-                   CHECK_BARRIER(lo, hi, f, ans_f);
-                   const unsigned int ans_abc = ans_a & ans_b & ans_c;
-                   possible &= ans_d | (ans_abc & ans_n);
-                   possible &= ans_f | (ans_m & ans_abc);
-                   if (!possible)
-                       continue;
-               }
-
-               unsigned int ans_g, ans_i;
-               {
-                   __m128i g = _mm_loadu_si128((const __m128i*)(p-3-w));
-                   __m128i ii = _mm_loadu_si128((const __m128i*)(p-3+w));
-                   CHECK_BARRIER(lo, hi, g, ans_g);
-                   CHECK_BARRIER(lo, hi, ii, ans_i);
-                   possible &= ans_g | (ans_f & ans_p & ans_k);
-                   possible &= ans_i | (ans_c & ans_n & ans_k);
-                   if (!possible)
-                       continue;
-               }
-
-               unsigned int ans_j, ans_l;
-               {
-                   __m128i jj = _mm_loadu_si128((const __m128i*)(p+3-w));
-                   __m128i l = _mm_loadu_si128((const __m128i*)(p+3+w));
-                   CHECK_BARRIER(lo, hi, jj, ans_j);
-                   CHECK_BARRIER(lo, hi, l, ans_l);
-                   const unsigned int ans_ghi = ans_g & ans_h & ans_i;
-                   possible &= ans_j | (ans_d & ans_o & ans_ghi);
-                   possible &= ans_l | (ans_m & ans_a & ans_ghi);
-                   if (!possible)
-                       continue;
-               }
-
-               //if(possible & 0x0f) //Does this make it faster?
-               {
-                   if(possible & (1<< 0))
-                           corners.push_back(ImageRef(y, x + 0));
-                   if(possible & (1<< 1))
-                           corners.push_back(ImageRef(y, x + 1));
-                   if(possible & (1<< 2))
-                           corners.push_back(ImageRef(y, x + 2));
-                   if(possible & (1<< 3))
-                           corners.push_back(ImageRef(y, x + 3));
-                   if(possible & (1<< 4))
-                           corners.push_back(ImageRef(y, x + 4));
-                   if(possible & (1<< 5))
-                           corners.push_back(ImageRef(y, x + 5));
-                   if(possible & (1<< 6))
-                           corners.push_back(ImageRef(y, x + 6));
-                   if(possible & (1<< 7))
-                           corners.push_back(ImageRef(y, x + 7));
-               }
-               //if(possible & 0xf0) //Does this mak( ,  fast)r?
-               {
-                   if(possible & (1<< 8))
-                           corners.push_back(ImageRef(y, x + 8));
-                   if(possible & (1<< 9))
-                           corners.push_back(ImageRef(y, x + 9));
-                   if(possible & (1<<10))
-                           corners.push_back(ImageRef(y, x +10));
-                   if(possible & (1<<11))
-                           corners.push_back(ImageRef(y, x +11));
-                   if(possible & (1<<12))
-                           corners.push_back(ImageRef(y, x +12));
-                   if(possible & (1<<13))
-                           corners.push_back(ImageRef(y, x +13));
-                   if(possible & (1<<14))
-                           corners.push_back(ImageRef(y, x +14));
-                   if(possible & (1<<15))
-                           corners.push_back(ImageRef(y, x +15));
-               }
-           }
-
-           for(int x=xend; x < I.size().x - 3; x++)
-               if(is_corner_10<Less>(&I[y][x], I.row_stride(), barrier) || 
is_corner_10<Greater>(&I[y][x], I.row_stride(), barrier))
-                   corners.push_back(ImageRef(x, y));
-       }
-    }
-
-    void fast_corner_detect_10(const BasicImage<byte>& I, 
std::vector<ImageRef>& corners, int barrier)
-    {
-       if (I.size().x < 22) {
-           fast_corner_detect_plain_10(I,corners,barrier);
-           return;
-       } else if (I.size().x < 22 || I.size().y < 7)
-           return;
-
-       if (is_aligned<16>(I[0]) && is_aligned<16>(I[1]))
-           faster_corner_detect_10<true>(I, corners, barrier);
-       else
-           faster_corner_detect_10<false>(I, corners, barrier);
-    }
-
-    #endif
-
-    #ifdef CVD_HAVE_FAST_9
-
-    template <bool Aligned> void faster_corner_detect_9(const 
BasicImage<byte>& I, std::vector<ImageRef>& corners, const int barrier)
-    {
-       const int w = I.size().x;
-       const int stride = 3*w;
-       typedef std::list<std::pair<const byte*, unsigned int> > Passed;
-       Passed passed;
-       // The compiler refuses to reserve a register for this
-       register const __m128i barriers = _mm_set1_epi8((byte)barrier);
-       const byte* const end = I[I.size().y - 3]-16;
-
-       for (const byte* p = I[3] + 16; p!=end; p+=16) {
-           __m128i lo, hi;
-           {
-               const __m128i here = load_si128<Aligned>((const __m128i*)(p));
-               lo = _mm_subs_epu8(here, barriers);
-               hi = _mm_adds_epu8(barriers, here);
-           }
-           unsigned int ans_0, ans_8, possible;
-           {
-               __m128i top = load_si128<Aligned>((const __m128i*)(p-stride));
-               __m128i bottom = load_si128<Aligned>((const 
__m128i*)(p+stride));
-
-               CHECK_BARRIER(lo, hi, top, ans_0);
-               CHECK_BARRIER(lo, hi, bottom, ans_8);
-               possible = ans_0 | ans_8;
-               if (!possible)
-                   continue;    
-           }
-
-           unsigned int ans_15, ans_1;
-           {
-               __m128i a = _mm_loadu_si128((const __m128i*)(p-1-stride));
-               __m128i c = _mm_insert_epi16(_mm_srli_si128(a,2), *(const 
unsigned short*)(p+15-stride), 7);
-               CHECK_BARRIER(lo, hi, a, ans_15);
-               CHECK_BARRIER(lo, hi, c, ans_1);
-               possible &= ans_8 | (ans_15 & ans_1);
-               if (!possible)
-                   continue;
-           }
-
-           unsigned int ans_9, ans_7;
-           {
-               __m128i d = _mm_loadu_si128((const __m128i*)(p-1+stride));
-               __m128i f = _mm_insert_epi16(_mm_srli_si128(d,2), *(const 
unsigned short*)(p+15+stride), 7);
-               CHECK_BARRIER(lo, hi, d, ans_9);
-               CHECK_BARRIER(lo, hi, f, ans_7);
-               possible &= ans_9 | (ans_0 & ans_1);
-               possible &= ans_7 | (ans_15 & ans_0);
-               if (!possible)
-                   continue;
-           }
-
-           unsigned int ans_12, ans_4;
-           {
-               __m128i left = _mm_loadu_si128((const __m128i*)(p-3));
-               __m128i right = _mm_loadu_si128((const __m128i*)(p+3));
-               CHECK_BARRIER(lo, hi, left, ans_12);
-               CHECK_BARRIER(lo, hi, right, ans_4);
-               possible &= ans_12 | (ans_4 & (ans_1 | ans_7));
-               possible &= ans_4 | (ans_12 & (ans_9 | ans_15));
-               if (!possible)
-                   continue;
-           }
-
-           unsigned int ans_14, ans_6;
-           {
-               __m128i ul = _mm_loadu_si128((const __m128i*)(p-2-2*w));
-               __m128i lr = _mm_loadu_si128((const __m128i*)(p+2+2*w));
-               CHECK_BARRIER(lo, hi, ul, ans_14);
-               CHECK_BARRIER(lo, hi, lr, ans_6);
-               {
-                   const unsigned int ans_6_7 = ans_6 & ans_7;
-                   possible &= ans_14 | (ans_6_7 & (ans_4 | (ans_8 & ans_9)));
-                   possible &= ans_1 | (ans_6_7) | ans_12;
-               }
-               {
-                   const unsigned int ans_14_15 = ans_14 & ans_15;
-                   possible &= ans_6 | (ans_14_15 & (ans_12 | (ans_0 & 
ans_1)));
-                   possible &= ans_9 | (ans_14_15) | ans_4;
-               }
-               if (!possible)
-                   continue;
-           }
-
-           unsigned int ans_10, ans_2;
-           {
-               __m128i ll = _mm_loadu_si128((const __m128i*)(p-2+2*w));
-               __m128i ur = _mm_loadu_si128((const __m128i*)(p+2-2*w));
-               CHECK_BARRIER(lo, hi, ll, ans_10);
-               CHECK_BARRIER(lo, hi, ur, ans_2);
-               {
-                   const unsigned int ans_1_2 = ans_1 & ans_2;
-                   possible &= ans_10 | (ans_1_2 & ((ans_0 & ans_15) | ans_4));
-                   possible &= ans_12 | (ans_1_2) | (ans_6 & ans_7);
-               }
-               {
-                   const unsigned int ans_9_10 = ans_9 & ans_10;
-                   possible &= ans_2 | (ans_9_10 & ((ans_7 & ans_8) | ans_12));
-                   possible &= ans_4 | (ans_9_10) | (ans_14 & ans_15);
-               }
-               possible &= ans_8 | ans_14 | ans_2;
-               possible &= ans_0 | ans_10 | ans_6;
-               if (!possible)
-                   continue;
-           }
-
-           unsigned int ans_13, ans_5;
-           {
-               __m128i g = _mm_loadu_si128((const __m128i*)(p-3-w));
-               __m128i l = _mm_loadu_si128((const __m128i*)(p+3+w));
-               CHECK_BARRIER(lo, hi, g, ans_13);
-               CHECK_BARRIER(lo, hi, l, ans_5);
-               const unsigned int ans_15_0 = ans_15 & ans_0; 
-               const unsigned int ans_7_8 = ans_7 & ans_8;
-               {
-                   const unsigned int ans_12_13 = ans_12 & ans_13;
-                   possible &= ans_5 | (ans_12_13 & ans_14 & ((ans_15_0) | 
ans_10));
-                   possible &= ans_7 | (ans_1 & ans_2) | (ans_12_13);
-                   possible &= ans_2 | (ans_12_13) | (ans_7_8);
-               }
-               {
-                   const unsigned int ans_4_5 = ans_4 & ans_5;
-                   const unsigned int ans_9_10 = ans_9 & ans_10;
-                   possible &= ans_13 | (ans_4_5 & ans_6 & ((ans_7_8) | 
ans_2));
-                   possible &= ans_15 | (ans_4_5) | (ans_9_10);
-                   possible &= ans_10 | (ans_4_5) | (ans_15_0);
-                   possible &= ans_15 | (ans_9_10) | (ans_4_5);
-               }
-
-               possible &= ans_8 | (ans_13 & ans_14) | ans_2;
-               possible &= ans_0 | (ans_5 & ans_6) | ans_10;
-               if (!possible)
-                   continue;
-           }
-
-
-           unsigned int ans_11, ans_3;
-           {
-               __m128i ii = _mm_loadu_si128((const __m128i*)(p-3+w));
-               __m128i jj = _mm_loadu_si128((const __m128i*)(p+3-w));
-               CHECK_BARRIER(lo, hi, ii, ans_11);
-               CHECK_BARRIER(lo, hi, jj, ans_3);
-               {
-                   const unsigned int ans_2_3 = ans_2 & ans_3;
-                   possible &= ans_11 | (ans_2_3 & ans_4 & ((ans_0 & ans_1) | 
(ans_5 & ans_6)));
-                   possible &= ans_13 | (ans_7 & ans_8) | (ans_2_3);
-                   possible &= ans_8 | (ans_2_3) | (ans_13 & ans_14);
-               }
-               {
-                   const unsigned int ans_11_12 = ans_11 & ans_12;
-                   possible &= ans_3 | (ans_10 & ans_11_12 & ((ans_8 & ans_9) 
| (ans_13 & ans_14)));
-                   possible &= ans_1 | (ans_11_12) | (ans_6 & ans_7);
-                   possible &= ans_6 | (ans_0 & ans_1) | (ans_11_12);
-               }
-               {
-                   const unsigned int ans_3_4 = ans_3 & ans_4;
-                   possible &= ans_9 | (ans_3_4) | (ans_14 & ans_15);
-                   possible &= ans_14 | (ans_8 & ans_9) | (ans_3_4);
-               }
-               {
-                   const unsigned int ans_10_11 = ans_10 & ans_11;
-                   possible &= ans_5 | (ans_15 & ans_0) | (ans_10_11);
-                   possible &= ans_0 | (ans_10_11) | (ans_5 & ans_6);
-               }
-               if (!possible)
-                   continue;
-
-           }
-           passed.push_back(make_pair(p,(possible | (possible>>16))&0xFFFF));
-       }
-       corners.reserve(passed.size()*2);
-       int row = 3;
-       const byte* row_start = I[3];
-       // Check first 16
-       {
-           for (int j=3; j<16; ++j)
-               if (is_corner_9<Less>(row_start + j, w, row_start[j]-barrier) 
|| 
-                   is_corner_9<Greater>(row_start + j, w, 
row_start[j]+barrier))
-                   corners.push_back(ImageRef(j,3));
-       }
-       for (Passed::iterator it = passed.begin(); it != passed.end(); ++it) {
-           while (it->first >= row_start + w) {
-               ++row;
-               row_start += w;
-           }
-           int x = it->first - row_start;
-           unsigned int bits = it->second;
-           if (x == 0) {
-               x = 3;
-               bits >>= 3;
-           } else if (x >= w-18)
-               bits &= 0x1FFF;
-           for (;bits;bits>>=1, ++x) {
-               if (bits&0x1)
-                   corners.push_back(ImageRef(x,row));
-           }
-       }
-       // Check last 16
-       {
-           row_start = I[I.size().y-4];
-           for (int j=w-16; j<w-3; ++j)
-               if (is_corner_9<Less>(row_start + j, w, row_start[j]-barrier) 
|| 
-                   is_corner_9<Greater>(row_start + j, w, 
row_start[j]+barrier))
-                   corners.push_back(ImageRef(j,I.size().y-4));
-       }
-    }
-
-    void fast_corner_detect_9(const BasicImage<byte>& I, 
std::vector<ImageRef>& corners, int barrier)
-    {
-       if (I.size().x < 22) {
-           fast_corner_detect_plain_9(I,corners,barrier);
-           return;
-       } else if (I.size().x < 22 || I.size().y < 7)
-           return;
-
-       if (is_aligned<16>(I[0]) && is_aligned<16>(I[1]))
-           faster_corner_detect_9<true>(I, corners, barrier);
-       else
-           faster_corner_detect_9<false>(I, corners, barrier);
-    }
-
-    #endif
-
-}
-
-#endif

Index: cvd_src/slower_corner.cxx
===================================================================
RCS file: cvd_src/slower_corner.cxx
diff -N cvd_src/slower_corner.cxx
--- cvd_src/slower_corner.cxx   23 Jul 2007 20:43:23 -0000      1.2
+++ /dev/null   1 Jan 1970 00:00:00 -0000
@@ -1,28 +0,0 @@
-#include <cvd/fast_corner.h>
-
-using namespace std;
-
-namespace CVD
-{
-
-
-
-       void fast_corner_detect_plain_9(const SubImage<byte>& i, 
vector<ImageRef>& corners, int b);
-    void fast_corner_detect_plain_10(const SubImage<byte>& i, 
vector<ImageRef>& corners, int b);
-    void fast_corner_detect_plain_12(const SubImage<byte>& i, 
vector<ImageRef>& corners, int b);
-
-       void fast_corner_detect_9(const SubImage<byte>& i, vector<ImageRef>& 
corners, int b)
-       {
-               fast_corner_detect_plain_9(i, corners, b);
-       }
-
-       void fast_corner_detect_10(const SubImage<byte>& i, vector<ImageRef>& 
corners, int b)
-       {
-               fast_corner_detect_plain_9(i, corners, b);
-       }
-
-       void fast_corner_detect_2(const SubImage<byte>& i, vector<ImageRef>& 
corners, int b)
-       {
-               fast_corner_detect_plain_9(i, corners, b);
-       }
-}




reply via email to

[Prev in Thread] Current Thread [Next in Thread]