libcvd-members
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[libcvd-members] libcvd/cvd_src/i686 yuv411_to_stuff_MMX_64.C


From: Paul McIlroy
Subject: [libcvd-members] libcvd/cvd_src/i686 yuv411_to_stuff_MMX_64.C
Date: Thu, 12 Nov 2009 14:23:43 +0000

CVSROOT:        /sources/libcvd
Module name:    libcvd
Changes by:     Paul McIlroy <paulmcilroy>      09/11/12 14:23:43

Modified files:
        cvd_src/i686   : yuv411_to_stuff_MMX_64.C 

Log message:
        Fixed a seg fault resulting from the use of the ebp register as a 
counter. Refactored
        yuv411_to_rgb to free up ebx and yuv411_to_y to free ecx for use 
instead of ebp.

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/libcvd/cvd_src/i686/yuv411_to_stuff_MMX_64.C?cvsroot=libcvd&r1=1.2&r2=1.3

Patches:
Index: yuv411_to_stuff_MMX_64.C
===================================================================
RCS file: /sources/libcvd/libcvd/cvd_src/i686/yuv411_to_stuff_MMX_64.C,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -b -r1.2 -r1.3
--- yuv411_to_stuff_MMX_64.C    17 Jun 2009 12:22:56 -0000      1.2
+++ yuv411_to_stuff_MMX_64.C    12 Nov 2009 14:23:43 -0000      1.3
@@ -24,13 +24,13 @@
 
 #include <cvd/colourspace.h>
 #include <cvd/config.h>
+#include <cvd/timer.h>
 
 namespace CVD
 {
 namespace ColourSpace
 {
 
-
 void yuv411_to_rgb_y(const unsigned char* in, int size, unsigned char* out, 
unsigned char *lum_out)
 {
        const unsigned char* in_end = in + size * 6 / 4;
@@ -244,6 +244,7 @@
                : "eax", "ecx", "edx",  "rdi", "rsi", "mm0", "mm1", "mm2", 
"mm3", "mm4", "mm5", "mm6", "mm7", "r10"
        );
 }
+
 void yuv411_to_rgb(const unsigned  char* in, int size, unsigned char* out)
 {
        //Time: 5.0ms memcpy on number of luma pixels only takes 1.5 ms
@@ -256,7 +257,7 @@
                //Load out and in
                "mov            %0, %%rdi               \n\t"
                "mov            %1, %%rsi               \n\t"
-               "mov            %2, %%ebp               \n\t"
+               "mov            %2, %%ebx               \n\t"
 
 ".Lyuv411dec:                                          \n\t"
                
@@ -299,9 +300,9 @@
                "imul   $147, %%eax                     \n\t"
                "pinsrw $0, %%eax, %%mm0        \n\t"
 
-               "mov    %%ecx, %%ebx            \n\t"   //Calculate and insert 
bu = v * 256
-               "imul   $256, %%ebx                     \n\t"
-               "pinsrw $2, %%ebx, %%mm0        \n\t"
+               "mov    %%ecx, %%eax            \n\t"   //Calculate and insert 
bu = v * 256
+               "imul   $256, %%eax                     \n\t"
+               "pinsrw $2, %%eax, %%mm0        \n\t"
 
                "imul   $-38, %%ecx                     \n\t"   //Calculate and 
insert guv = -38u + -74v
                "imul   $-74, %%edx                     \n\t"
@@ -384,9 +385,9 @@
                "imul   $147, %%eax                     \n\t"
                "pinsrw $0, %%eax, %%mm0        \n\t"
 
-               "mov    %%ecx, %%ebx            \n\t"   //Calculate and insert 
bu = v * 256
-               "imul   $256, %%ebx                     \n\t"
-               "pinsrw $2, %%ebx, %%mm0        \n\t"
+               "mov    %%ecx, %%eax            \n\t"   //Calculate and insert 
bu = v * 256
+               "imul   $256, %%eax                     \n\t"
+               "pinsrw $2, %%eax, %%mm0        \n\t"
 
                "imul   $-38, %%ecx                     \n\t"   //Calculate and 
insert guv = -38u + -74v
                "imul   $-74, %%edx                     \n\t"
@@ -451,7 +452,7 @@
                //Increment counters
                "add            $24, %%rdi              \n\t"
                "add            $12, %%rsi              \n\t"
-               "dec            %%ebp                   \n\t"
+               "dec            %%ebx                   \n\t"
                "jnz            .Lyuv411dec             \n\t"
                "emms                                           \n\t"   //End 
mmx
        //   0          1        2      
@@ -464,30 +465,30 @@
 void yuv411_to_y(const unsigned char* in, int size, unsigned char* out)
 {
        //Time, 2.5ms. Comparison, memcpy on the same number of output 
bytes=1.5ms
-
        size /=16;
        __asm__ __volatile__(
                "mov %0, %%rdi                  \n\t"
                "mov %1, %%rsi                  \n\t"
-               "mov %2, %%ebp                  \n\t"
+               "mov %2, %%ecx                  \n\t"
 ".Lyuvtolum:                                   \n\t"
                "prefetchnta    64(%%rsi)       \n\t"
                "mov  2(%%rsi), %%eax   \n\t"
                "movw 1(%%rsi), %%ax    \n\t"
                "mov  8(%%rsi), %%ebx   \n\t"
                "movw 7(%%rsi), %%bx    \n\t"
-               "mov  14(%%rsi), %%ecx  \n\t"
-               "movw 13(%%rsi), %%cx   \n\t"
-               "mov  20(%%rsi), %%edx  \n\t"
-               "movw 19(%%rsi), %%dx   \n\t"
 
                "movd   %%eax, %%mm0    \n\t"
                "movd   %%ebx, %%mm1    \n\t"
                "psllq  $32, %%mm1              \n\t"
                "por    %%mm1, %%mm0    \n\t"  //mm0 = ebx,eax
 
-               "movd   %%ecx, %%mm1    \n\t"
-               "movd   %%edx, %%mm2    \n\t"
+               "mov  14(%%rsi), %%eax  \n\t"
+               "movw 13(%%rsi), %%ax   \n\t"
+               "mov  20(%%rsi), %%ebx  \n\t"
+               "movw 19(%%rsi), %%bx   \n\t"
+
+               "movd   %%eax, %%mm1    \n\t"
+               "movd   %%ebx, %%mm2    \n\t"
                "psllq  $32, %%mm2              \n\t"
                "por    %%mm2, %%mm1    \n\t"  //mm1 = ebx,eax
 
@@ -496,16 +497,14 @@
 
                "add  $24, %%rsi                \n\t"
                "add  $16, %%rdi                \n\t"
-               "dec  %%ebp                             \n\t"
+               "dec  %%ecx                     \n\t"
                "jnz .Lyuvtolum                 \n\t"
                "emms                                   \n\t"
        :
                : "m" (out), "m" (in), "g" (size)
-               : "rax", "rcx", "rdi", "rsi", "mm0", "mm1", "mm2", "r10", "rbx" 
  );
+               : "rdi", "rsi", "mm0", "mm1", "mm2", "eax", "ebx", "ecx", 
"memory");
 }
 
 }
 }
 
-
-




reply via email to

[Prev in Thread] Current Thread [Next in Thread]