commit-gnuradio
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commit-gnuradio] [gnuradio] 06/06: volk: remove unused spu_lib director


From: git
Subject: [Commit-gnuradio] [gnuradio] 06/06: volk: remove unused spu_lib directory from VOLK.
Date: Wed, 29 Jan 2014 00:04:26 +0000 (UTC)

This is an automated email from the git hooks/post-receive script.

trondeau pushed a commit to branch master
in repository gnuradio.

commit 19d111e2448a58e20ff5c1c80ca69751376b2544
Author: Doug Geiger <address@hidden>
Date:   Tue Jan 28 17:59:35 2014 -0500

    volk: remove unused spu_lib directory from VOLK.
    
    Addresses Issues #638.
---
 volk/spu_lib/gc_spu_macs.h                         | 380 ---------------------
 volk/spu_lib/spu_16s_cmpgt_unaligned.c             | 160 ---------
 volk/spu_lib/spu_16s_vector_subtract_unaligned.c   | 178 ----------
 volk/spu_lib/spu_16s_vector_sum_unaligned.c        | 178 ----------
 .../spu_32fc_pointwise_multiply_unaligned.c        | 222 ------------
 volk/spu_lib/spu_memcpy_unaligned.c                | 290 ----------------
 volk/spu_lib/spu_memset_unaligned.S                | 185 ----------
 7 files changed, 1593 deletions(-)

diff --git a/volk/spu_lib/gc_spu_macs.h b/volk/spu_lib/gc_spu_macs.h
deleted file mode 100644
index e86dce3..0000000
--- a/volk/spu_lib/gc_spu_macs.h
+++ /dev/null
@@ -1,380 +0,0 @@
-/* -*- asm -*- */
-/*
- * Copyright 2008 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#ifndef INCLUDED_GC_SPU_MACS_H
-#define INCLUDED_GC_SPU_MACS_H
-
-/*
- * This file contains a set of macros that are generally useful when
- * coding in SPU assembler
- *
- * Note that the multi-instruction macros in here may overwrite
- * registers 77, 78, and 79 without warning.
- */
-
-/*
- * defines for all registers
- */
-#define r0     $0
-#define r1     $1
-#define r2     $2
-#define r3     $3
-#define r4     $4
-#define r5     $5
-#define r6     $6
-#define r7     $7
-#define r8     $8
-#define r9     $9
-#define r10    $10
-#define r11    $11
-#define r12    $12
-#define r13    $13
-#define r14    $14
-#define r15    $15
-#define r16    $16
-#define r17    $17
-#define r18    $18
-#define r19    $19
-#define r20    $20
-#define r21    $21
-#define r22    $22
-#define r23    $23
-#define r24    $24
-#define r25    $25
-#define r26    $26
-#define r27    $27
-#define r28    $28
-#define r29    $29
-#define r30    $30
-#define r31    $31
-#define r32    $32
-#define r33    $33
-#define r34    $34
-#define r35    $35
-#define r36    $36
-#define r37    $37
-#define r38    $38
-#define r39    $39
-#define r40    $40
-#define r41    $41
-#define r42    $42
-#define r43    $43
-#define r44    $44
-#define r45    $45
-#define r46    $46
-#define r47    $47
-#define r48    $48
-#define r49    $49
-#define r50    $50
-#define r51    $51
-#define r52    $52
-#define r53    $53
-#define r54    $54
-#define r55    $55
-#define r56    $56
-#define r57    $57
-#define r58    $58
-#define r59    $59
-#define r60    $60
-#define r61    $61
-#define r62    $62
-#define r63    $63
-#define r64    $64
-#define r65    $65
-#define r66    $66
-#define r67    $67
-#define r68    $68
-#define r69    $69
-#define r70    $70
-#define r71    $71
-#define r72    $72
-#define r73    $73
-#define r74    $74
-#define r75    $75
-#define r76    $76
-#define r77    $77
-#define r78    $78
-#define r79    $79
-#define r80    $80
-#define r81    $81
-#define r82    $82
-#define r83    $83
-#define r84    $84
-#define r85    $85
-#define r86    $86
-#define r87    $87
-#define r88    $88
-#define r89    $89
-#define r90    $90
-#define r91    $91
-#define r92    $92
-#define r93    $93
-#define r94    $94
-#define r95    $95
-#define r96    $96
-#define r97    $97
-#define r98    $98
-#define r99    $99
-#define r100   $100
-#define r101   $101
-#define r102   $102
-#define r103   $103
-#define r104   $104
-#define r105   $105
-#define r106   $106
-#define r107   $107
-#define r108   $108
-#define r109   $109
-#define r110   $110
-#define r111   $111
-#define r112   $112
-#define r113   $113
-#define r114   $114
-#define r115   $115
-#define r116   $116
-#define r117   $117
-#define r118   $118
-#define r119   $119
-#define r120   $120
-#define r121   $121
-#define r122   $122
-#define r123   $123
-#define r124   $124
-#define r125   $125
-#define r126   $126
-#define r127   $127
-
-
-#define        lr      r0      // link register
-#define        sp      r1      // stack pointer
-                        // r2 is environment pointer for langs that need it 
(ALGOL)
-
-#define        retval  r3      // return values are passed in regs starting at 
r3
-
-#define        arg1    r3      // args are passed in regs starting at r3
-#define        arg2    r4
-#define        arg3    r5
-#define        arg4    r6
-#define        arg5    r7
-#define        arg6    r8
-#define        arg7    r9
-#define        arg8    r10
-#define        arg9    r11
-#define        arg10   r12
-
-//  r3 -  r74 are volatile (caller saves)
-// r74 -  r79 are volatile (scratch regs possibly destroyed by fct 
prolog/epilog)
-// r80 - r127 are non-volatile (caller-saves)
-
-// scratch registers reserved for use by the macros in this file.
-
-#define _gc_t0 r79
-#define        _gc_t1  r78
-#define        _gc_t2  r77
-
-/*
- * ----------------------------------------------------------------
- *                         pseudo ops
- * ----------------------------------------------------------------
- */
-#define PROC_ENTRY(name)               \
-        .text;                         \
-       .p2align 4;                     \
-       .global name;                   \
-       .type   name, @function;        \
-name:
-
-/*
- * ----------------------------------------------------------------
- *                 aliases for common operations
- * ----------------------------------------------------------------
- */
-
-// Move register (even pipe, 2 cycles)
-#define MR(rt, ra)                     or      rt, ra, ra;
-
-// Move register (odd pipe, 4 cycles)
-#define        LMR(rt, ra)                     rotqbyi rt, ra, 0;
-
-// return
-#define        RETURN()                        bi      lr;
-
-// hint for a return
-#define        HINT_RETURN(ret_label)          hbr     ret_label, lr;
-
-// return if zero
-#define BRZ_RETURN(rt)                 biz     rt, lr;
-
-// return if not zero
-#define BRNZ_RETURN(rt)                        binz    rt, lr;
-
-// return if halfword zero
-#define        BRHZ_RETURN(rt)                 bihz    rt, lr;
-
-// return if halfword not zero
-#define BRHNZ_RETURN(rt)               bihnz   rt, lr;
-
-
-/*
- * ----------------------------------------------------------------
- * modulo like things for constant moduli that are powers of 2
- * ----------------------------------------------------------------
- */
-
-// rt = ra & (pow2 - 1)
-#define MODULO(rt, ra, pow2) \
-       andi    rt, ra, (pow2)-1;
-
-// rt = pow2 - (ra & (pow2 - 1))
-#define MODULO_NEG(rt, ra, pow2) \
-       andi    rt, ra, (pow2)-1;               \
-       sfi     rt, rt, (pow2);
-
-// rt = ra & -(pow2)
-#define        ROUND_DOWN(rt, ra, pow2) \
-       andi    rt, ra, -(pow2);
-
-// rt = (ra + (pow2 - 1)) & -(pow2)
-#define ROUND_UP(rt, ra, pow2) \
-       ai      rt, ra, (pow2)-1;               \
-       andi    rt, rt, -(pow2);
-
-/*
- * ----------------------------------------------------------------
- * Splat - replicate a particular slot into all slots
- * Altivec analogs...
- * ----------------------------------------------------------------
- */
-
-// replicate byte from slot s [0,15]
-#define VSPLTB(rt, ra, s) \
-       ilh     _gc_t0, (s)*0x0101;             \
-       shufb   rt, ra, ra, _gc_t0;
-
-// replicate halfword from slot s [0,7]
-#define        VSPLTH(rt, ra, s) \
-       ilh     _gc_t0, 2*(s)*0x0101 + 0x0001;  \
-       shufb   rt, ra, ra, _gc_t0;
-
-// replicate word from slot s [0,3]
-#define VSPLTW(rt, ra, s) \
-       iluh    _gc_t0, 4*(s)*0x0101 + 0x0001;  \
-       iohl    _gc_t0, 4*(s)*0x0101 + 0x0203;  \
-       shufb   rt, ra, ra, _gc_t0;
-
-// replicate double from slot s [0,1]
-#define        VSPLTD(rt, ra, s) \
-       /* sp is always 16-byte aligned */ \
-       cdd     _gc_t0, 8(sp);          /* 0x10111213 14151617 00010203 
04050607 */ \
-       rotqbyi rt, ra, ra, (s) << 3;   /* rotate double into preferred slot    
 */ \
-       shufb   rt, rt, rt, _gc_t0;
-
-/*
- * ----------------------------------------------------------------
- * lots of min/max variations...
- *
- * On a slot by slot basis, compute the min or max
- *
- * U - unsigned, else signed
- * B,H,{} - byte, halfword, word
- * F float
- * ----------------------------------------------------------------
- */
-
-#define MIN_SELB(rt, ra, rb, rc)       selb    rt, ra, rb, rc;
-#define MAX_SELB(rt, ra, rb, rc)       selb    rt, rb, ra, rc;
-
-       // words
-
-#define MIN(rt, ra, rb) \
-       cgt     _gc_t0, ra, rb; \
-       MIN_SELB(rt, ra, rb, _gc_t0)
-
-#define        MAX(rt, ra, rb) \
-       cgt     _gc_t0, ra, rb; \
-       MAX_SELB(rt, ra, rb, _gc_t0)
-
-#define UMIN(rt, ra, rb) \
-       clgt    _gc_t0, ra, rb; \
-       MIN_SELB(rt, ra, rb, _gc_t0)
-
-#define        UMAX(rt, ra, rb) \
-       clgt    _gc_t0, ra, rb; \
-       MAX_SELB(rt, ra, rb, _gc_t0)
-
-       // bytes
-
-#define MINB(rt, ra, rb) \
-       cgtb    _gc_t0, ra, rb; \
-       MIN_SELB(rt, ra, rb, _gc_t0)
-
-#define        MAXB(rt, ra, rb) \
-       cgtb    _gc_t0, ra, rb; \
-       MAX_SELB(rt, ra, rb, _gc_t0)
-
-#define UMINB(rt, ra, rb) \
-       clgtb   _gc_t0, ra, rb; \
-       MIN_SELB(rt, ra, rb, _gc_t0)
-
-#define        UMAXB(rt, ra, rb) \
-       clgtb   _gc_t0, ra, rb; \
-       MAX_SELB(rt, ra, rb, _gc_t0)
-
-       // halfwords
-
-#define MINH(rt, ra, rb) \
-       cgth    _gc_t0, ra, rb; \
-       MIN_SELB(rt, ra, rb, _gc_t0)
-
-#define        MAXH(rt, ra, rb) \
-       cgth    _gc_t0, ra, rb; \
-       MAX_SELB(rt, ra, rb, _gc_t0)
-
-#define UMINH(rt, ra, rb) \
-       clgth   _gc_t0, ra, rb; \
-       MIN_SELB(rt, ra, rb, _gc_t0)
-
-#define        UMAXH(rt, ra, rb) \
-       clgth   _gc_t0, ra, rb; \
-       MAX_SELB(rt, ra, rb, _gc_t0)
-
-       // floats
-
-#define FMIN(rt, ra, rb) \
-       fcgt    _gc_t0, ra, rb; \
-       MIN_SELB(rt, ra, rb, _gc_t0)
-
-#define        FMAX(rt, ra, rb) \
-       fcgt    _gc_t0, ra, rb; \
-       MAX_SELB(rt, ra, rb, _gc_t0)
-
-// Ignoring the sign, select the values with the minimum magnitude
-#define FMINMAG(rt, ra, rb) \
-       fcmgt   _gc_t0, ra, rb; \
-       MIN_SELB(rt, ra, rb, _gc_t0)
-
-// Ignoring the sign, select the values with the maximum magnitude
-#define        FMAXMAG(rt, ra, rb) \
-       fcmgt   _gc_t0, ra, rb; \
-       MAX_SELB(rt, ra, rb, _gc_t0)
-
-
-#endif /* INCLUDED_GC_SPU_MACS_H */
diff --git a/volk/spu_lib/spu_16s_cmpgt_unaligned.c 
b/volk/spu_lib/spu_16s_cmpgt_unaligned.c
deleted file mode 100644
index 8811e68..0000000
--- a/volk/spu_lib/spu_16s_cmpgt_unaligned.c
+++ /dev/null
@@ -1,160 +0,0 @@
-#include<spu_intrinsics.h>
-
-void* libvector_16s_cmpgt_unaligned(void* target, void* src, signed short val, 
unsigned int num_bytes){
-       //loop iterator i
-       int i = 0;
-       void* retval = target;
-
-
-       //put the target and source addresses into qwords
-       vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 
0};
-       vector unsigned int address_counter_src = {(unsigned int)src, 0, 0 ,0};
-
-       //create shuffle masks
-
-       //shuffle mask building blocks:
-       //all from the first vector
-       vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 
0x07,
-                                                                 0x08, 0x09, 
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f};
-       //all from the second vector
-       vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 
0x15, 0x16, 0x17,
-                                                                               
 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
-
-
-
-       //gamma: second half of the second, first half of the first, break at 
(unsigned int)src%16
-       vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned 
int)src%16));
-       vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp);
-       vector unsigned char eq_res = spu_cmpeq(oneup, src_cmp);
-       vector unsigned char cmp_res = spu_or(gt_res, eq_res);
-       vector unsigned char sixteen_uchar = spu_splats((unsigned char)16);
-       vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res);
-       vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned 
int)phase_change,
-                                                                               
                 (vector unsigned int)oneup);
-       shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned 
int)src%16);
-
-
-
-
-       vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, 
-((unsigned int)target%16));
-       vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned 
int)target%16));
-
-       //alpha: first half of first, second half of second, break at (unsigned 
int)target%16
-       src_cmp = spu_splats((unsigned char)((unsigned int)target%16));
-       gt_res = spu_cmpgt(oneup, src_cmp);
-       eq_res = spu_cmpeq(oneup, src_cmp);
-       cmp_res = spu_or(gt_res, eq_res);
-       phase_change = spu_and(sixteen_uchar, cmp_res);
-       vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned 
int)phase_change,
-                                                                               
                         (vector unsigned int)oneup);
-
-       //delta: first half of first, first half of second, break at (unsigned 
int)target%16
-       vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, 
tgt_second, (vector unsigned char)shuffle_mask_alpha);
-       //epsilon: second half of second, second half of first, break at 
(unsigned int)target%16
-       vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, 
oneup, (vector unsigned char)shuffle_mask_alpha);
-       //zeta: second half of second, first half of first, break at 16 - 
(unsigned int)target%16
-       vector unsigned int shuffle_mask_zeta = 
spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16);
-
-       //beta: first half of first, second half of second, break at 
num_bytes%16
-       src_cmp = spu_splats((unsigned char)(num_bytes%16));
-       gt_res = spu_cmpgt(oneup, src_cmp);
-       eq_res = spu_cmpeq(oneup, src_cmp);
-       cmp_res = spu_or(gt_res, eq_res);
-       phase_change = spu_and(sixteen_uchar, cmp_res);
-       vector unsigned int shuffle_mask_beta = spu_add((vector unsigned 
int)phase_change,
-                                                                               
                         (vector unsigned int)oneup);
-
-
-
-
-
-
-       qword src_past;
-       qword src_present;
-       qword tgt_past;
-       qword tgt_present;
-
-       qword in_temp;
-       qword out_temp0;
-       qword out_temp1;
-
-       src_past = si_lqd((qword)address_counter_src, 0);
-       tgt_past = si_lqd((qword)address_counter_tgt, 0);
-
-       vector signed short vec_val = spu_splats(val);
-       vector unsigned short compare;
-       vector unsigned short ones = {1, 1, 1, 1, 1, 1, 1, 1};
-       vector unsigned short after_and;
-
-       for(i = 0; i < num_bytes/16; ++i) {
-
-               src_present = si_lqd((qword)address_counter_src, 16);
-               tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
-               in_temp = spu_shuffle(src_present, src_past, (vector unsigned 
char)shuffle_mask_gamma);
-
-               compare = spu_cmpgt((vector signed short) in_temp, vec_val);
-               after_and = spu_and(compare, ones);
-
-
-               out_temp0 = spu_shuffle(tgt_past, (qword)after_and, 
shuffle_mask_delta);
-               out_temp1 = spu_shuffle(tgt_present, (qword)after_and, 
shuffle_mask_epsilon);
-
-               si_stqd(out_temp0, (qword)address_counter_tgt, 0);
-               si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
-               tgt_past = out_temp1;
-               src_past = src_present;
-               address_counter_src = spu_add(address_counter_src, 16);
-               address_counter_tgt = spu_add(address_counter_tgt, 16);
-
-
-       }
-
-       src_present = si_lqd((qword)address_counter_src, 16);
-       tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
-
-       in_temp = spu_shuffle(src_present, src_past,(vector unsigned char) 
shuffle_mask_gamma);
-
-       compare = spu_cmpgt((vector signed short) in_temp, vec_val);
-       after_and = spu_and(compare, ones);
-
-
-       qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned 
char) shuffle_mask_zeta);
-       qword meld = spu_shuffle((qword)after_and, target_temp, (vector 
unsigned char)shuffle_mask_beta);
-
-
-
-       out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta);
-       out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon);
-
-       si_stqd(out_temp0, (qword)address_counter_tgt, 0);
-       si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
-       return retval;
-}
-
-
-
-/*
-int main(){
-
-       signed short pooh[48];
-       signed short bear[48];
-
-       int i = 0;
-       for(i = 0; i < 48; i += 2){
-               bear[i] = i;
-               bear[i + 1] = -i;
-       }
-
-       vector_gt_16bit(&pooh[0],&bear[0], 0, 48 * sizeof(signed short));
-
-       for(i = 0; i < 48; ++i) {
-               printf("%d, ", pooh[i]);
-       }
-       printf("\n");
-}
-*/
-
diff --git a/volk/spu_lib/spu_16s_vector_subtract_unaligned.c 
b/volk/spu_lib/spu_16s_vector_subtract_unaligned.c
deleted file mode 100644
index ea110c8..0000000
--- a/volk/spu_lib/spu_16s_vector_subtract_unaligned.c
+++ /dev/null
@@ -1,178 +0,0 @@
-#include<spu_intrinsics.h>
-
-void* libvector_16s_vector_subtract_unaligned(void* target,  void* src0, void* 
src1, unsigned int num_bytes){
-       //loop iterator i
-       int i = 0;
-       void* retval = target;
-
-
-       //put the target and source addresses into qwords
-       vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 
0};
-       vector unsigned int address_counter_src0 = {(unsigned int)src0, 0, 0 
,0};
-       vector unsigned int address_counter_src1 = {(unsigned int)src1, 0, 0, 
0};
-
-       //create shuffle masks
-
-       //shuffle mask building blocks:
-       //all from the first vector
-       vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 
0x07,
-                                                                 0x08, 0x09, 
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f};
-       //all from the second vector
-       vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 
0x15, 0x16, 0x17,
-                                                                               
 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
-
-
-
-       //gamma: second half of the second, first half of the first, break at 
(unsigned int)src0%16
-       vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned 
int)src0%16));
-       vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp);
-       vector unsigned char eq_res = spu_cmpeq(oneup, src_cmp);
-       vector unsigned char cmp_res = spu_or(gt_res, eq_res);
-       vector unsigned char sixteen_uchar = spu_splats((unsigned char)16);
-       vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res);
-       vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned 
int)phase_change,
-                                                                               
                 (vector unsigned int)oneup);
-       shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned 
int)src0%16);
-
-       //eta: second half of the second, first half of the first, break at 
(unsigned int)src1%16
-       src_cmp = spu_splats((unsigned char)((unsigned int)src1%16));
-       gt_res = spu_cmpgt(oneup, src_cmp);
-       eq_res = spu_cmpeq(oneup, src_cmp);
-       cmp_res = spu_or(gt_res, eq_res);
-       sixteen_uchar = spu_splats((unsigned char)16);
-       phase_change = spu_and(sixteen_uchar, cmp_res);
-       vector unsigned int shuffle_mask_eta = spu_add((vector unsigned 
int)phase_change,
-                                                                               
                 (vector unsigned int)oneup);
-       shuffle_mask_eta = spu_rlqwbyte(shuffle_mask_eta, (unsigned 
int)src1%16);
-
-
-
-
-
-       vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, 
-((unsigned int)target%16));
-       vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned 
int)target%16));
-
-       //alpha: first half of first, second half of second, break at (unsigned 
int)target%16
-       src_cmp = spu_splats((unsigned char)((unsigned int)target%16));
-       gt_res = spu_cmpgt(oneup, src_cmp);
-       eq_res = spu_cmpeq(oneup, src_cmp);
-       cmp_res = spu_or(gt_res, eq_res);
-       phase_change = spu_and(sixteen_uchar, cmp_res);
-       vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned 
int)phase_change,
-                                                                               
                         (vector unsigned int)oneup);
-
-       //delta: first half of first, first half of second, break at (unsigned 
int)target%16
-       vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, 
tgt_second, (vector unsigned char)shuffle_mask_alpha);
-       //epsilon: second half of second, second half of first, break at 
(unsigned int)target%16
-       vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, 
oneup, (vector unsigned char)shuffle_mask_alpha);
-       //zeta: second half of second, first half of first, break at 16 - 
(unsigned int)target%16
-       vector unsigned int shuffle_mask_zeta = 
spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16);
-
-       //beta: first half of first, second half of second, break at 
num_bytes%16
-       src_cmp = spu_splats((unsigned char)(num_bytes%16));
-       gt_res = spu_cmpgt(oneup, src_cmp);
-       eq_res = spu_cmpeq(oneup, src_cmp);
-       cmp_res = spu_or(gt_res, eq_res);
-       phase_change = spu_and(sixteen_uchar, cmp_res);
-       vector unsigned int shuffle_mask_beta = spu_add((vector unsigned 
int)phase_change,
-                                                                               
                         (vector unsigned int)oneup);
-
-
-
-
-
-
-       qword src0_past;
-       qword src0_present;
-       qword src1_past;
-       qword src1_present;
-       qword tgt_past;
-       qword tgt_present;
-
-       qword in_temp0;
-       qword in_temp1;
-       qword out_temp0;
-       qword out_temp1;
-
-    vector signed short sum;
-
-       src0_past = si_lqd((qword)address_counter_src0, 0);
-       src1_past = si_lqd((qword)address_counter_src1, 0);
-       tgt_past = si_lqd((qword)address_counter_tgt, 0);
-
-       for(i = 0; i < num_bytes/16; ++i) {
-
-               src0_present = si_lqd((qword)address_counter_src0, 16);
-               src1_present = si_lqd((qword)address_counter_src1, 16);
-               tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
-               in_temp0 = spu_shuffle(src0_present, src0_past, (vector 
unsigned char)shuffle_mask_gamma);
-               in_temp1 = spu_shuffle(src1_present, src1_past, (vector 
unsigned char)shuffle_mask_eta);
-
-               sum = spu_sub((vector signed short)in_temp0, (vector signed 
short)in_temp1);
-
-
-               out_temp0 = spu_shuffle(tgt_past, (qword)sum, 
shuffle_mask_delta);
-               out_temp1 = spu_shuffle(tgt_present, (qword)sum, 
shuffle_mask_epsilon);
-
-               si_stqd(out_temp0, (qword)address_counter_tgt, 0);
-               si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
-               tgt_past = out_temp1;
-               src0_past = src0_present;
-               src1_past = src1_present;
-               address_counter_src0 = spu_add(address_counter_src0, 16);
-               address_counter_src1 = spu_add(address_counter_src1, 16);
-               address_counter_tgt = spu_add(address_counter_tgt, 16);
-
-
-       }
-
-       src0_present = si_lqd((qword)address_counter_src0, 16);
-       src1_present = si_lqd((qword)address_counter_src1, 16);
-       tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
-
-       in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char) 
shuffle_mask_gamma);
-       in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char) 
shuffle_mask_eta);
-       sum = spu_sub((vector signed short)in_temp0, (vector signed 
short)in_temp1);
-       qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned 
char) shuffle_mask_zeta);
-       qword meld = spu_shuffle((qword)sum, target_temp, (vector unsigned 
char)shuffle_mask_beta);
-
-
-
-       out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta);
-       out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon);
-
-       si_stqd(out_temp0, (qword)address_counter_tgt, 0);
-       si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
-       return retval;
-}
-
-
-
-/*
-int main(){
-
-       signed short pooh[48];
-       signed short bear[48];
-       signed short res[48];
-
-       int i = 0;
-       for(i = 0; i < 48; ++i){
-               pooh[i] = i;
-       }
-       for(i = 48; i < 96; ++i){
-               bear[i - 48] = i;
-       }
-
-       vector_subtract_16bit(res, &pooh[0], &bear[0], 48 * sizeof(signed 
short));
-
-       for(i = 0; i < 48; ++i) {
-               printf("%d, ", res[i]);
-       }
-       printf("\n");
-}
-*/
-
diff --git a/volk/spu_lib/spu_16s_vector_sum_unaligned.c 
b/volk/spu_lib/spu_16s_vector_sum_unaligned.c
deleted file mode 100644
index 0097b4f..0000000
--- a/volk/spu_lib/spu_16s_vector_sum_unaligned.c
+++ /dev/null
@@ -1,178 +0,0 @@
-#include<spu_intrinsics.h>
-
-void* libvector_16s_vector_sum_unaligned(void* target,  void* src0, void* 
src1, unsigned int num_bytes){
-       //loop iterator i
-       int i = 0;
-       void* retval = target;
-
-
-       //put the target and source addresses into qwords
-       vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 
0};
-       vector unsigned int address_counter_src0 = {(unsigned int)src0, 0, 0 
,0};
-       vector unsigned int address_counter_src1 = {(unsigned int)src1, 0, 0, 
0};
-
-       //create shuffle masks
-
-       //shuffle mask building blocks:
-       //all from the first vector
-       vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 
0x07,
-                                                                 0x08, 0x09, 
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f};
-       //all from the second vector
-       vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 
0x15, 0x16, 0x17,
-                                                                               
 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
-
-
-
-       //gamma: second half of the second, first half of the first, break at 
(unsigned int)src0%16
-       vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned 
int)src0%16));
-       vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp);
-       vector unsigned char eq_res = spu_cmpeq(oneup, src_cmp);
-       vector unsigned char cmp_res = spu_or(gt_res, eq_res);
-       vector unsigned char sixteen_uchar = spu_splats((unsigned char)16);
-       vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res);
-       vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned 
int)phase_change,
-                                                                               
                 (vector unsigned int)oneup);
-       shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned 
int)src0%16);
-
-       //eta: second half of the second, first half of the first, break at 
(unsigned int)src1%16
-       src_cmp = spu_splats((unsigned char)((unsigned int)src1%16));
-       gt_res = spu_cmpgt(oneup, src_cmp);
-       eq_res = spu_cmpeq(oneup, src_cmp);
-       cmp_res = spu_or(gt_res, eq_res);
-       sixteen_uchar = spu_splats((unsigned char)16);
-       phase_change = spu_and(sixteen_uchar, cmp_res);
-       vector unsigned int shuffle_mask_eta = spu_add((vector unsigned 
int)phase_change,
-                                                                               
                 (vector unsigned int)oneup);
-       shuffle_mask_eta = spu_rlqwbyte(shuffle_mask_eta, (unsigned 
int)src1%16);
-
-
-
-
-
-       vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, 
-((unsigned int)target%16));
-       vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned 
int)target%16));
-
-       //alpha: first half of first, second half of second, break at (unsigned 
int)target%16
-       src_cmp = spu_splats((unsigned char)((unsigned int)target%16));
-       gt_res = spu_cmpgt(oneup, src_cmp);
-       eq_res = spu_cmpeq(oneup, src_cmp);
-       cmp_res = spu_or(gt_res, eq_res);
-       phase_change = spu_and(sixteen_uchar, cmp_res);
-       vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned 
int)phase_change,
-                                                                               
                         (vector unsigned int)oneup);
-
-       //delta: first half of first, first half of second, break at (unsigned 
int)target%16
-       vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, 
tgt_second, (vector unsigned char)shuffle_mask_alpha);
-       //epsilon: second half of second, second half of first, break at 
(unsigned int)target%16
-       vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, 
oneup, (vector unsigned char)shuffle_mask_alpha);
-       //zeta: second half of second, first half of first, break at 16 - 
(unsigned int)target%16
-       vector unsigned int shuffle_mask_zeta = 
spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16);
-
-       //beta: first half of first, second half of second, break at 
num_bytes%16
-       src_cmp = spu_splats((unsigned char)(num_bytes%16));
-       gt_res = spu_cmpgt(oneup, src_cmp);
-       eq_res = spu_cmpeq(oneup, src_cmp);
-       cmp_res = spu_or(gt_res, eq_res);
-       phase_change = spu_and(sixteen_uchar, cmp_res);
-       vector unsigned int shuffle_mask_beta = spu_add((vector unsigned 
int)phase_change,
-                                                                               
                         (vector unsigned int)oneup);
-
-
-
-
-
-
-       qword src0_past;
-       qword src0_present;
-       qword src1_past;
-       qword src1_present;
-       qword tgt_past;
-       qword tgt_present;
-
-       qword in_temp0;
-       qword in_temp1;
-       qword out_temp0;
-       qword out_temp1;
-
-    vector signed int sum;
-
-       src0_past = si_lqd((qword)address_counter_src0, 0);
-       src1_past = si_lqd((qword)address_counter_src1, 0);
-       tgt_past = si_lqd((qword)address_counter_tgt, 0);
-
-       for(i = 0; i < num_bytes/16; ++i) {
-
-               src0_present = si_lqd((qword)address_counter_src0, 16);
-               src1_present = si_lqd((qword)address_counter_src1, 16);
-               tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
-               in_temp0 = spu_shuffle(src0_present, src0_past, (vector 
unsigned char)shuffle_mask_gamma);
-               in_temp1 = spu_shuffle(src1_present, src1_past, (vector 
unsigned char)shuffle_mask_eta);
-
-               sum = spu_add((vector signed int)in_temp0, (vector signed 
int)in_temp1);
-
-
-               out_temp0 = spu_shuffle(tgt_past, (qword)sum, 
shuffle_mask_delta);
-               out_temp1 = spu_shuffle(tgt_present, (qword)sum, 
shuffle_mask_epsilon);
-
-               si_stqd(out_temp0, (qword)address_counter_tgt, 0);
-               si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
-               tgt_past = out_temp1;
-               src0_past = src0_present;
-               src1_past = src1_present;
-               address_counter_src0 = spu_add(address_counter_src0, 16);
-               address_counter_src1 = spu_add(address_counter_src1, 16);
-               address_counter_tgt = spu_add(address_counter_tgt, 16);
-
-
-       }
-
-       src0_present = si_lqd((qword)address_counter_src0, 16);
-       src1_present = si_lqd((qword)address_counter_src1, 16);
-       tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
-
-       in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char) 
shuffle_mask_gamma);
-       in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char) 
shuffle_mask_eta);
-       sum = spu_add((vector signed int)in_temp0, (vector signed int)in_temp1);
-       qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned 
char) shuffle_mask_zeta);
-       qword meld = spu_shuffle((qword)sum, target_temp, (vector unsigned 
char)shuffle_mask_beta);
-
-
-
-       out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta);
-       out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon);
-
-       si_stqd(out_temp0, (qword)address_counter_tgt, 0);
-       si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
-       return retval;
-}
-
-
-
-/*
-int main(){
-
-       signed short pooh[48];
-       signed short bear[48];
-       signed short res[48];
-
-       int i = 0;
-       for(i = 0; i < 48; ++i){
-               pooh[i] = i;
-       }
-       for(i = 48; i < 96; ++i){
-               bear[i - 48] = i;
-       }
-
-       vector_sum(&pooh[9], &pooh[9], &bear[3], 30);
-
-       for(i = 0; i < 48; ++i) {
-               printf("%d, ", pooh[i]);
-       }
-       printf("\n");
-}
-*/
-
diff --git a/volk/spu_lib/spu_32fc_pointwise_multiply_unaligned.c 
b/volk/spu_lib/spu_32fc_pointwise_multiply_unaligned.c
deleted file mode 100644
index d1c9604..0000000
--- a/volk/spu_lib/spu_32fc_pointwise_multiply_unaligned.c
+++ /dev/null
@@ -1,222 +0,0 @@
-#include<spu_intrinsics.h>
-
-
-
-
-void* libvector_pointwise_multiply_32fc_unaligned(void* target,  void* src0, 
void* src1, unsigned int num_bytes){
-       //loop iterator i
-       int i = 0;
-       void* retval = target;
-
-
-       //put the target and source addresses into qwords
-       vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 
0};
-       vector unsigned int address_counter_src0 = {(unsigned int)src0, 0, 0 
,0};
-       vector unsigned int address_counter_src1 = {(unsigned int)src1, 0, 0, 
0};
-
-       //create shuffle masks
-
-       //shuffle mask building blocks:
-       //all from the first vector
-       vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 
0x07,
-                                                                 0x08, 0x09, 
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f};
-       //all from the second vector
-       vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 
0x15, 0x16, 0x17,
-                                                                               
 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
-
-
-
-       //gamma: second half of the second, first half of the first, break at 
(unsigned int)src0%16
-       vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned 
int)src0%16));
-       vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp);
-       vector unsigned char eq_res = spu_cmpeq(oneup, src_cmp);
-       vector unsigned char cmp_res = spu_or(gt_res, eq_res);
-       vector unsigned char sixteen_uchar = spu_splats((unsigned char)16);
-       vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res);
-       vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned 
int)phase_change,
-                                                                               
                 (vector unsigned int)oneup);
-       shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned 
int)src0%16);
-
-       //eta: second half of the second, first half of the first, break at 
(unsigned int)src1%16
-       src_cmp = spu_splats((unsigned char)((unsigned int)src1%16));
-       gt_res = spu_cmpgt(oneup, src_cmp);
-       eq_res = spu_cmpeq(oneup, src_cmp);
-       cmp_res = spu_or(gt_res, eq_res);
-       sixteen_uchar = spu_splats((unsigned char)16);
-       phase_change = spu_and(sixteen_uchar, cmp_res);
-       vector unsigned int shuffle_mask_eta = spu_add((vector unsigned 
int)phase_change,
-                                                                               
                 (vector unsigned int)oneup);
-       shuffle_mask_eta = spu_rlqwbyte(shuffle_mask_eta, (unsigned 
int)src1%16);
-
-
-
-
-
-       vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, 
-((unsigned int)target%16));
-       vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned 
int)target%16));
-
-       //alpha: first half of first, second half of second, break at (unsigned 
int)target%16
-       src_cmp = spu_splats((unsigned char)((unsigned int)target%16));
-       gt_res = spu_cmpgt(oneup, src_cmp);
-       eq_res = spu_cmpeq(oneup, src_cmp);
-       cmp_res = spu_or(gt_res, eq_res);
-       phase_change = spu_and(sixteen_uchar, cmp_res);
-       vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned 
int)phase_change,
-                                                                               
                         (vector unsigned int)oneup);
-
-       //delta: first half of first, first half of second, break at (unsigned 
int)target%16
-       vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, 
tgt_second, (vector unsigned char)shuffle_mask_alpha);
-       //epsilon: second half of second, second half of first, break at 
(unsigned int)target%16
-       vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, 
oneup, (vector unsigned char)shuffle_mask_alpha);
-       //zeta: second half of second, first half of first, break at 16 - 
(unsigned int)target%16
-       vector unsigned int shuffle_mask_zeta = 
spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16);
-
-       //beta: first half of first, second half of second, break at 
num_bytes%16
-       src_cmp = spu_splats((unsigned char)(num_bytes%16));
-       gt_res = spu_cmpgt(oneup, src_cmp);
-       eq_res = spu_cmpeq(oneup, src_cmp);
-       cmp_res = spu_or(gt_res, eq_res);
-       phase_change = spu_and(sixteen_uchar, cmp_res);
-       vector unsigned int shuffle_mask_beta = spu_add((vector unsigned 
int)phase_change,
-                                                                               
                         (vector unsigned int)oneup);
-
-
-
-
-
-
-       qword src0_past;
-       qword src0_present;
-       qword src1_past;
-       qword src1_present;
-       qword tgt_past;
-       qword tgt_present;
-
-       qword in_temp0;
-       qword in_temp1;
-       qword out_temp0;
-       qword out_temp1;
-
-
-       src0_past = si_lqd((qword)address_counter_src0, 0);
-       src1_past = si_lqd((qword)address_counter_src1, 0);
-       tgt_past = si_lqd((qword)address_counter_tgt, 0);
-
-       vector unsigned char shuffle_mask_complexprod0 = {0x04, 0x05, 0x06, 
0x07, 0x00, 0x01, 0x02, 0x03,
-                                                                               
                          0x0c, 0x0d, 0x0e, 0x0f, 0x08, 0x09, 0x0a, 0x0b};
-       vector unsigned char shuffle_mask_complexprod1 = {0x00, 0x01, 0x02, 
0x03, 0x10, 0x11, 0x12, 0x13,
-                                                                               
                          0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b};
-       vector unsigned char shuffle_mask_complexprod2 = {0x04, 0x05, 0x06, 
0x07, 0x14, 0x15, 0x16, 0x17,
-                                                                               
                          0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f};
-       vector unsigned char sign_changer = {0x00, 0x00, 0x00, 0x00, 0x80, 
0x00, 0x00, 0x00,
-                                                                               
 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00};
-
-       vector float prod0;
-       qword shuf0;
-       vector float prod1;
-       vector float sign_change;
-       qword summand0;
-       qword summand1;
-       vector float sum;
-
-
-       for(i = 0; i < num_bytes/16; ++i) {
-
-               src0_present = si_lqd((qword)address_counter_src0, 16);
-               src1_present = si_lqd((qword)address_counter_src1, 16);
-               tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
-               in_temp0 = spu_shuffle(src0_present, src0_past, (vector 
unsigned char)shuffle_mask_gamma);
-               in_temp1 = spu_shuffle(src1_present, src1_past, (vector 
unsigned char)shuffle_mask_eta);
-
-               prod0 = spu_mul((vector float)in_temp0, (vector float)in_temp1);
-               shuf0 = spu_shuffle((qword)in_temp1, (qword)in_temp1, 
shuffle_mask_complexprod0);
-               prod1 = spu_mul((vector float)in_temp0, (vector float)shuf0);
-               sign_change = spu_xor(prod0, (vector float)sign_changer);
-
-               summand0 = spu_shuffle((qword)sign_change, (qword)prod1, 
shuffle_mask_complexprod1);
-
-               summand1 = spu_shuffle((qword)sign_change, (qword)prod1, 
shuffle_mask_complexprod2);
-
-               sum = spu_add((vector float)summand0, (vector float)summand1);
-
-
-               out_temp0 = spu_shuffle(tgt_past, (qword)sum, 
shuffle_mask_delta);
-               out_temp1 = spu_shuffle(tgt_present, (qword)sum, 
shuffle_mask_epsilon);
-
-               si_stqd(out_temp0, (qword)address_counter_tgt, 0);
-               si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
-               tgt_past = out_temp1;
-               src0_past = src0_present;
-               src1_past = src1_present;
-               address_counter_src0 = spu_add(address_counter_src0, 16);
-               address_counter_src1 = spu_add(address_counter_src1, 16);
-               address_counter_tgt = spu_add(address_counter_tgt, 16);
-
-
-       }
-
-       src0_present = si_lqd((qword)address_counter_src0, 16);
-       src1_present = si_lqd((qword)address_counter_src1, 16);
-       tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
-
-       in_temp0 = spu_shuffle(src0_present, src0_past, (vector unsigned char) 
shuffle_mask_gamma);
-       in_temp1 = spu_shuffle(src1_present, src1_past, (vector unsigned char) 
shuffle_mask_eta);
-
-
-       prod0 = spu_mul((vector float)in_temp0, (vector float)in_temp1);
-       shuf0 = spu_shuffle((qword)in_temp1, (qword)in_temp1, 
shuffle_mask_complexprod0);
-       prod1 = spu_mul(prod0, (vector float)shuf0);
-       sign_change = spu_xor(prod0, (vector float)sign_changer);
-       summand0 = spu_shuffle((qword)sign_change, (qword)prod1, 
shuffle_mask_complexprod1);
-       summand1 = spu_shuffle((qword)sign_change, (qword)prod1, 
shuffle_mask_complexprod2);
-       sum = spu_add((vector float)summand0, (vector float)summand1);
-
-
-
-       qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned 
char) shuffle_mask_zeta);
-       qword meld = spu_shuffle((qword)sum, target_temp, (vector unsigned 
char)shuffle_mask_beta);
-
-
-
-       out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta);
-       out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon);
-
-       si_stqd(out_temp0, (qword)address_counter_tgt, 0);
-       si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
-       return retval;
-}
-
-
-
-/*
-int main(){
-
-       float pooh[48];
-       float bear[48];
-       float res[48];
-
-       int i = 0;
-       for(i = 0; i < 48; ++i){
-               pooh[i] = (float) i;
-       }
-       for(i = 48; i < 96; ++i){
-               bear[i - 48] = (float) i;
-       }
-
-       vector_product_complex(res, pooh, bear, 48*sizeof(float));
-
-
-
-       for(i = 0; i < 48; ++i) {
-               printf("%f, ", res[i]);
-       }
-       printf("\n");
-
-
-}
-*/
-
diff --git a/volk/spu_lib/spu_memcpy_unaligned.c 
b/volk/spu_lib/spu_memcpy_unaligned.c
deleted file mode 100644
index 0f15b5d..0000000
--- a/volk/spu_lib/spu_memcpy_unaligned.c
+++ /dev/null
@@ -1,290 +0,0 @@
-#include<libvector/libvector_memcpy_unaligned.h
-#include<spu_intrinsics.h>
-
-void* libvector_memcpy_unaligned(void* target, void* src, unsigned int 
num_bytes){
-       //loop iterator i
-       int i = 0;
-       void* retval = target;
-
-
-       //put the target and source addresses into qwords
-       vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 
0};
-       vector unsigned int address_counter_src = {(unsigned int)src, 0, 0 ,0};
-
-       //create shuffle masks
-
-       //shuffle mask building blocks:
-       //all from the first vector
-       vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 
0x07,
-                                                                 0x08, 0x09, 
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f};
-       //all from the second vector
-       vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 
0x15, 0x16, 0x17,
-                                                                               
 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
-
-
-
-       //gamma: second half of the second, first half of the first, break at 
(unsigned int)src%16
-       vector unsigned char src_cmp = spu_splats((unsigned char)((unsigned 
int)src%16));
-       vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp);
-       vector unsigned char eq_res = spu_cmpeq(oneup, src_cmp);
-       vector unsigned char cmp_res = spu_or(gt_res, eq_res);
-       vector unsigned char sixteen_uchar = spu_splats((unsigned char)16);
-       vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res);
-       vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned 
int)phase_change,
-                                                                               
                 (vector unsigned int)oneup);
-       shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, (unsigned 
int)src%16);
-
-
-
-
-       vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, 
-((unsigned int)target%16));
-       vector unsigned char tgt_first = spu_rlqwbyte(oneup, -((unsigned 
int)target%16));
-
-       //alpha: first half of first, second half of second, break at (unsigned 
int)target%16
-       src_cmp = spu_splats((unsigned char)((unsigned int)target%16));
-       gt_res = spu_cmpgt(oneup, src_cmp);
-       eq_res = spu_cmpeq(oneup, src_cmp);
-       cmp_res = spu_or(gt_res, eq_res);
-       phase_change = spu_and(sixteen_uchar, cmp_res);
-       vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned 
int)phase_change,
-                                                                               
                         (vector unsigned int)oneup);
-
-       //delta: first half of first, first half of second, break at (unsigned 
int)target%16
-       vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, 
tgt_second, (vector unsigned char)shuffle_mask_alpha);
-       //epsilon: second half of second, second half of first, break at 
(unsigned int)target%16
-       vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, 
oneup, (vector unsigned char)shuffle_mask_alpha);
-       //zeta: second half of second, first half of first, break at 16 - 
(unsigned int)target%16
-       vector unsigned int shuffle_mask_zeta = 
spu_rlqwbyte(shuffle_mask_alpha, (unsigned int)target%16);
-
-       //beta: first half of first, second half of second, break at 
num_bytes%16
-       src_cmp = spu_splats((unsigned char)(num_bytes%16));
-       gt_res = spu_cmpgt(oneup, src_cmp);
-       eq_res = spu_cmpeq(oneup, src_cmp);
-       cmp_res = spu_or(gt_res, eq_res);
-       phase_change = spu_and(sixteen_uchar, cmp_res);
-       vector unsigned int shuffle_mask_beta = spu_add((vector unsigned 
int)phase_change,
-                                                                               
                         (vector unsigned int)oneup);
-
-
-
-
-
-
-       qword src_past;
-       qword src_present;
-       qword tgt_past;
-       qword tgt_present;
-
-       qword in_temp;
-       qword out_temp0;
-       qword out_temp1;
-
-       src_past = si_lqd((qword)address_counter_src, 0);
-       tgt_past = si_lqd((qword)address_counter_tgt, 0);
-
-       for(i = 0; i < num_bytes/16; ++i) {
-
-               src_present = si_lqd((qword)address_counter_src, 16);
-               tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
-               in_temp = spu_shuffle(src_present, src_past, (vector unsigned 
char)shuffle_mask_gamma);
-
-               out_temp0 = spu_shuffle(tgt_past, in_temp, shuffle_mask_delta);
-               out_temp1 = spu_shuffle(tgt_present, in_temp, 
shuffle_mask_epsilon);
-
-               si_stqd(out_temp0, (qword)address_counter_tgt, 0);
-               si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
-               tgt_past = out_temp1;
-               src_past = src_present;
-               address_counter_src = spu_add(address_counter_src, 16);
-               address_counter_tgt = spu_add(address_counter_tgt, 16);
-
-
-       }
-
-       src_present = si_lqd((qword)address_counter_src, 16);
-       tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
-
-       in_temp = spu_shuffle(src_present, src_past,(vector unsigned char) 
shuffle_mask_gamma);
-       qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned 
char) shuffle_mask_zeta);
-       qword meld = spu_shuffle(in_temp, target_temp, (vector unsigned 
char)shuffle_mask_beta);
-
-
-
-       out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta);
-       out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon);
-
-       si_stqd(out_temp0, (qword)address_counter_tgt, 0);
-       si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
-       return retval;
-}
-
-
-
-/*
-void* mcpy(void* target, void* src, size_t num_bytes){
-       //loop iterator i
-       int i = 0;
-       void* retval = src;
-
-       //put the target and source addresses into qwords
-       vector unsigned int address_counter_tgt = {(unsigned int)target, 0, 0, 
0};
-       vector unsigned int address_counter_src = {(unsigned int)src, 0, 0 ,0};
-
-       //create shuffle masks
-
-       //shuffle mask building blocks:
-       //all from the first vector
-       vector unsigned char oneup = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 
0x07,
-                                                                 0x08, 0x09, 
0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f};
-       //all from the second vector
-       vector unsigned char second_oneup = {0x10, 0x11, 0x12, 0x13, 0x14, 
0x15, 0x16, 0x17,
-                                                                               
 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
-
-
-
-       //gamma: second half of the second, first half of the first, break at 
src%16
-       vector unsigned char src_cmp = spu_splats((unsigned char)(src%16));
-       vector unsigned char gt_res = spu_cmpgt(oneup, src_cmp);
-       vector unsigned char eq_res = spu_cmpeq(oneup, src_cmp);
-       vector unsigned char cmp_res = spu_or(gt_res, eq_res);
-       vector unsigned char sixteen_uchar = spu_splats((unsigned char)16);
-       vector unsigned char phase_change = spu_and(sixteen_uchar, cmp_res);
-       vector unsigned int shuffle_mask_gamma = spu_add((vector unsigned 
int)phase_change,
-                                                                               
                 (vector unsigned int)oneup);
-       shuffle_mask_gamma = spu_rlqwbyte(shuffle_mask_gamma, src%16);
-
-
-
-
-       vector unsigned char tgt_second = spu_rlqwbyte(second_oneup, 
-(target%16));
-       vector unsigned char tgt_first = spu_rlqwbyte(oneup, -(target%16));
-
-       //alpha: first half of first, second half of second, break at target%16
-       src_cmp = spu_splats((unsigned char)(target%16));
-       gt_res = spu_cmpgt(oneup, src_cmp);
-       eq_res = spu_cmpeq(oneup, src_cmp);
-       cmp_res = spu_or(gt_res, eq_res);
-       phase_change = spu_and(sixteen_uchar, cmp_res);
-       vector unsigned int shuffle_mask_alpha = spu_add((vector unsigned 
int)phase_change,
-                                                                               
                         (vector unsigned int)oneup);
-
-       //delta: first half of first, first half of second, break at target%16
-       vector unsigned char shuffle_mask_delta = spu_shuffle(oneup, 
tgt_second, (vector unsigned char)shuffle_mask_alpha);
-       //epsilon: second half of second, second half of first, break at 
target%16
-       vector unsigned char shuffle_mask_epsilon = spu_shuffle(tgt_second, 
oneup, (vector unsigned char)shuffle_mask_alpha);
-       //zeta: second half of second, first half of first, break at 16 - 
target%16
-       vector unsigned int shuffle_mask_zeta = 
spu_rlqwbyte(shuffle_mask_alpha, target%16);
-
-       //beta: first half of first, second half of second, break at 
num_bytes%16
-       src_cmp = spu_splats((unsigned char)(num_bytes%16));
-       gt_res = spu_cmpgt(oneup, src_cmp);
-       eq_res = spu_cmpeq(oneup, src_cmp);
-       cmp_res = spu_or(gt_res, eq_res);
-       phase_change = spu_and(sixteen_uchar, cmp_res);
-       vector unsigned int shuffle_mask_beta = spu_add((vector unsigned 
int)phase_change,
-                                                                               
                         (vector unsigned int)oneup);
-
-
-         printf("num_bytesmod16 %d\n", num_bytes%16);
-       printf("beta %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, 
%d, %d\n",
-                  spu_extract((vector unsigned char) shuffle_mask_beta, 0),
-                  spu_extract((vector unsigned char) shuffle_mask_beta, 1),
-                  spu_extract((vector unsigned char) shuffle_mask_beta, 2),
-                  spu_extract((vector unsigned char) shuffle_mask_beta, 3),
-                  spu_extract((vector unsigned char) shuffle_mask_beta, 4),
-                  spu_extract((vector unsigned char) shuffle_mask_beta, 5),
-                  spu_extract((vector unsigned char) shuffle_mask_beta, 6),
-                  spu_extract((vector unsigned char) shuffle_mask_beta, 7),
-                  spu_extract((vector unsigned char) shuffle_mask_beta, 8),
-                  spu_extract((vector unsigned char) shuffle_mask_beta, 9),
-                  spu_extract((vector unsigned char) shuffle_mask_beta, 10),
-                  spu_extract((vector unsigned char) shuffle_mask_beta, 11),
-                  spu_extract((vector unsigned char) shuffle_mask_beta, 12),
-                  spu_extract((vector unsigned char) shuffle_mask_beta, 13),
-                  spu_extract((vector unsigned char) shuffle_mask_beta, 14),
-                  spu_extract((vector unsigned char) shuffle_mask_beta, 15));
-
-
-
-
-
-
-
-       qword src_past;
-       qword src_present;
-       qword tgt_past;
-       qword tgt_present;
-
-       qword in_temp;
-       qword out_temp0;
-       qword out_temp1;
-
-       src_past = si_lqd((qword)address_counter_src, 0);
-       tgt_past = si_lqd((qword)address_counter_tgt, 0);
-
-       for(i = 0; i < num_bytes/16; ++i) {
-
-               src_present = si_lqd((qword)address_counter_src, 16);
-               tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
-               in_temp = spu_shuffle(src_present, src_past, (vector unsigned 
char)shuffle_mask_gamma);
-
-               out_temp0 = spu_shuffle(tgt_past, in_temp, shuffle_mask_delta);
-               out_temp1 = spu_shuffle(tgt_present, in_temp, 
shuffle_mask_epsilon);
-
-               si_stqd(out_temp0, (qword)address_counter_tgt, 0);
-               si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
-               tgt_past = out_temp1;
-               src_past = src_present;
-               address_counter_src = spu_add(address_counter_src, 16);
-               address_counter_tgt = spu_add(address_counter_tgt, 16);
-
-
-       }
-
-       src_present = si_lqd((qword)address_counter_src, 16);
-       tgt_present = si_lqd((qword)address_counter_tgt, 16);
-
-
-       in_temp = spu_shuffle(src_present, src_past,(vector unsigned char) 
shuffle_mask_gamma);
-       qword target_temp = spu_shuffle(tgt_present, tgt_past, (vector unsigned 
char) shuffle_mask_zeta);
-       qword meld = spu_shuffle(in_temp, target_temp, (vector unsigned 
char)shuffle_mask_beta);
-
-
-
-       out_temp0 = spu_shuffle(tgt_past, meld, shuffle_mask_delta);
-       out_temp1 = spu_shuffle(tgt_present, meld, shuffle_mask_epsilon);
-
-       si_stqd(out_temp0, (qword)address_counter_tgt, 0);
-       si_stqd(out_temp1, (qword)address_counter_tgt, 16);
-
-       return retval;
-
-}
-*/
-/*
-int main(){
-
-       unsigned char pooh[48];
-       unsigned char bear[48];
-
-       int i = 0;
-       for(i = 0; i < 48; ++i){
-               pooh[i] = i;
-               bear[i] = i;
-       }
-
-       spu_mcpy(&pooh[9],&bear[3], 15);
-
-       for(i = 0; i < 48; ++i) {
-               printf("%d, ", pooh[i]);
-       }
-       printf("\n");
-}
-
-*/
diff --git a/volk/spu_lib/spu_memset_unaligned.S 
b/volk/spu_lib/spu_memset_unaligned.S
deleted file mode 100644
index c260a12..0000000
--- a/volk/spu_lib/spu_memset_unaligned.S
+++ /dev/null
@@ -1,185 +0,0 @@
-/* -*- asm -*- */
-/*
- * Copyright 2008 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-#include "gc_spu_macs.h"
-
-       .file "spu_memset_unaligned.S"
-
-       /*
-        * Computes this, only a lot faster...
-        *
-        *      void *
-        *      libvector_memset_unaligned(void *pv, int c, size_t n)
-        *      {
-        *        unsigned char *p = (unsigned char *) pv;
-        *        size_t i;
-        *        for (i = 0; i < n; i++)
-        *          p[i] = c;
-        *
-        *        return pv;
-        *      }
-        */
-
-#define        p_arg   arg1    // we're going to clobber arg1 w/ the return 
value
-#define        c       arg2    // the constant we're writing
-#define        n       arg3    // how many bytes to write
-
-#define        p       r13     // where we're writing
-#define        t0      r14
-#define t1     r15
-#define        mask    r16
-#define        old     r17
-#define an     r18     // aligned n (n rounded down to mod 16 boundary)
-#define        next_p  r19
-#define        cond1   r20
-#define        cond2   r21
-#define m      r22
-#define r      r23
-
-       PROC_ENTRY(libvector_memset_unaligned)
-
-       // Hint the return from do_head, in case we go that way.
-       // There's pretty much nothing to can do to hint the branch to it.
-       hbrr    do_head_br, head_complete
-
-       MR(p, p_arg)    // leaves p, the return value, in the correct reg (r3)
-       BRZ_RETURN(n)
-
-       MODULO(t0, p, 16)       // is p%16 == 0?
-       VSPLTB(c, c, 3)         // splat byte in preferred slot of c into all 
slots
-       brnz    t0, do_head     // no, handle it
-head_complete:
-
-       /*
-        * preconditions:
-        *   p%16 == 0, n > 0
-        */
-       hbrr    middle_loop_br, middle_loop
-
-       ROUND_DOWN(an, n, 16)   // an is "aligned n"
-       MODULO(n, n, 16)        // what's left over in the last quad
-       brz     an, do_tail     // no whole quad words; skip to tail
-       clgti   t0, an, 127     // an >= 128?
-       brz     t0, middle2     // nope, go handle the cases between 0 and 112
-
-       /*
-        * 128 bytes / iteration
-        */
-       .p2align 4
-middle_loop:
-       ai      an, an, -128
-         stqd  c,  0*16(p)
-       ai      next_p, p, 128
-         stqd  c,  1*16(p)
-       cgti    cond1, an, 127
-         stqd  c,  2*16(p)
-
-         stqd  c,  3*16(p)
-         stqd  c,  4*16(p)
-         stqd  c,  5*16(p)
-         stqd  c,  6*16(p)
-
-       MR(p, next_p)
-         stqd  c,  7*16-128(next_p)
-       or      cond2, n, an
-middle_loop_br:
-         brnz  cond1, middle_loop
-
-       /*
-        * if an and n are both zero, return now
-        */
-       BRZ_RETURN(cond2)
-
-       /*
-        * otherwise handle last of full quad words
-        *
-        *   0 <= an < 128, p%16 == 0
-        */
-middle2:
-       /*
-        * if an == 0, go handle the final non-full quadword
-        */
-       brz     an, do_tail
-       hbrr    middle2_loop_br, middle2_loop
-
-       .p2align 3
-middle2_loop:
-       ai      next_p, p, 16
-         stqd  c, 0(p)
-       ai      an, an, -16
-         LMR(p, next_p)
-middle2_loop_br:
-         brnz  an, middle2_loop
-
-       /* We're done with the full quadwords. */
-
-       /*
-        * Handle the final partial quadword.
-        * We'll be modifying only the left hand portion of the quad.
-        *
-        * preconditions:
-        *   an == 0, 0 <= n < 16, p%16 == 0
-        */
-do_tail:
-       HINT_RETURN(do_tail_ret)
-       il      mask, -1
-       sfi     t1, n, 16               // t1 = 16 - n
-       lqd     old, 0(p)
-       shlqby  mask, mask, t1
-       selb    t0, old, c, mask
-       stqd    t0, 0(p)
-do_tail_ret:
-       RETURN()
-
-       /*
-        * ----------------------------------------------------------------
-        * Handle the first partial quadword
-        *
-        * preconditions:
-        *   p%16 != 0
-        *
-         * postconditions:
-         *   p%16 == 0 or n == 0
-         *
-         *        |-- m --|
-         *     +----------------+----------------+
-         *     |  ////////      |                |
-         *     +----------------+----------------+
-         *        |----- r -----|
-         *        p
-         * ----------------------------------------------------------------
-        */
-do_head:
-       lqd     old, 0(p)
-       MODULO_NEG(r, p, 16)
-       il      mask, -1
-       UMIN(m, r, n)
-       shlqby  mask, mask, m   // 1's in the top, m*8 0's in the bottom
-       MR(t1, p)
-       sf      t0, m, r        // t0 = r - m
-       a       p, p, m         // p += m
-       rotqby  mask, mask, t0  // rotate 0's to the right place
-       sf      n, m, n         // n -= m
-       selb    t0, c, old, mask // merge
-       stqd    t0, 0(t1)
-       BRZ_RETURN(n)
-do_head_br:
-       br      head_complete



reply via email to

[Prev in Thread] Current Thread [Next in Thread]