commit-gnuradio
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commit-gnuradio] r8152 - in gnuradio/branches/developers/ngoergen/spe_f


From: ngoergen
Subject: [Commit-gnuradio] r8152 - in gnuradio/branches/developers/ngoergen/spe_fir_fff: . spu
Date: Wed, 2 Apr 2008 08:47:30 -0600 (MDT)

Author: ngoergen
Date: 2008-04-02 08:47:29 -0600 (Wed, 02 Apr 2008)
New Revision: 8152

Added:
   gnuradio/branches/developers/ngoergen/spe_fir_fff/qa_spu_fir_ccc.cc
   gnuradio/branches/developers/ngoergen/spe_fir_fff/qa_spu_fir_fff.cc
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/fir_ccc_spu.cc
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/fir_fff_spu.cc
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/spu_fir_ccc_as.S
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/spu_fir_ccc_as.h
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/spu_fir_fff_as.S
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/spu_fir_fff_as.h
Removed:
   gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_ccc_spu.cc
   gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff64_spu.cc
   gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spu.cc
   gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff64_ppe.c
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_ccc_as.S
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_ccc_as.h
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff64_as.S
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff64_as.h
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_as.S
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_as.h
Log:
spe_fir: Emulating gcell directory layout

Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_ccc_spu.cc

Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff64_spu.cc

Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spu.cc

Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff64_ppe.c

Added: gnuradio/branches/developers/ngoergen/spe_fir_fff/qa_spu_fir_ccc.cc
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/qa_spu_fir_ccc.cc         
                (rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/qa_spu_fir_ccc.cc 
2008-04-02 14:47:29 UTC (rev 8152)
@@ -0,0 +1,64 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+/*
+octave:1> 
filter([-4.5434-34.323i,2.32-23.6564i,2.43+5.4i,2.32+12.3i,-12.32+13.2i],[1],[1+2i,3+4i,-5+6i,-7-8i,9-10i,-11.11-12.22i,13.33+14.44i,15.55+16.66i,2.32+54.23i,1.534+-23.24j,12.4354+24.65j,43.432+25.453j,234.645-123.23j,-23.23+12.65i,1.54+2.32i,-54.23+5.43i,1+2i,3+4i,-5+6i,-7-8i,9-10i,-11.11-12.22i,13.33+14.44i,15.55+16.66i,2.32+54.23i,1.534+-23.24j,12.4354+24.65j,43.432+25.453j,234.645-123.23j,-23.23+12.65i,1.54+2.32i,-54.23+5.43i]).'(5:end))
+ans =
+
+   -715.122 -   94.118i
+   -733.604 +   86.241i
+    260.634 -  508.940i
+   1248.406 -  887.283i
+   2381.054 -  471.156i
+    582.948 +  449.680i
+   -567.533 -  256.742i
+    344.364 - 1737.180i
+  -5150.720 - 9007.718i
+  -1849.455 - 4283.808i
+    862.728 + 1936.846i
+   1555.503 + 4570.876i
+  -1415.722 + 5625.155i
+    106.444 -  857.966i
+     79.671 -  569.760i
+    447.406 -  331.063i
+   -715.122 -   94.118i
+   -733.604 +   86.241i
+    260.634 -  508.940i
+   1248.406 -  887.283i
+   2381.054 -  471.156i
+    582.948 +  449.680i
+   -567.533 -  256.742i
+    344.364 - 1737.180i
+  -5150.720 - 9007.718i
+  -1849.455 - 4283.808i
+    862.728 + 1936.846i
+   1555.503 + 4570.876i
+
+ *
+*/
+
+int main(unsigned long long spu, unsigned long long argp,
+    unsigned long long envp)
+{
+
+  return 0;
+}


Property changes on: 
gnuradio/branches/developers/ngoergen/spe_fir_fff/qa_spu_fir_ccc.cc
___________________________________________________________________
Name: svn:executable
   + *

Added: gnuradio/branches/developers/ngoergen/spe_fir_fff/qa_spu_fir_fff.cc
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/qa_spu_fir_fff.cc         
                (rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/qa_spu_fir_fff.cc 
2008-04-02 14:47:29 UTC (rev 8152)
@@ -0,0 +1,63 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+/*
+filter([2.43,12.3,2.32,13.2,-12.32],[1],[1,2,3,4,-5,6,-7,-8,9,-10,-11.11,-12.22,13.33,14.44,15.55,16.66,2.32,54.23,1.534,-23.24,12.4354,24.65,43.432,25.453,234.645,-123.23,-23.23,12.65,1.54,2.32,-54.23,5.43]).'(5:end)
+ans =
+
+     58.090
+    -22.680
+     61.030
+   -206.900
+     48.030
+    -98.480
+   -148.477
+     27.812
+   -386.569
+    147.246
+    221.895
+    591.756
+    273.014
+    226.325
+    704.475
+    -86.419
+    435.179
+   -488.927
+    111.918
+   1103.716
+   1156.197
+   2915.350
+  -1226.904
+   2242.850
+  -4412.019
+   1265.485
+    353.503
+   -783.972
+
+*/
+
+int main(unsigned long long spu, unsigned long long argp,
+    unsigned long long envp)
+{
+
+  return 0;
+}


Property changes on: 
gnuradio/branches/developers/ngoergen/spe_fir_fff/qa_spu_fir_fff.cc
___________________________________________________________________
Name: svn:executable
   + *

Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/fir_ccc_spu.cc 
(from rev 8118, 
gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_ccc_spu.cc)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/fir_ccc_spu.cc        
                        (rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/fir_ccc_spu.cc        
2008-04-02 14:47:29 UTC (rev 8152)
@@ -0,0 +1,76 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include "gr_spu_dma_lock.h"
+#include "spu_fir_fff_params.h"
+#include "spu_fir_ccc_as.h"
+#include <spu_mfcio.h>
+
+#define MAX_BUFSIZE (128*100)
+
+//float inputs[MAX_BUFSIZE]  __attribute__((aligned(16))) = {1,  2,  1,  2, 1, 
 2, 1,  2, 1,  2, 1,  2, 1,  2, 1,  2};
+//float taps[MAX_BUFSIZE]  __attribute__((aligned(16))) = {2.32, -23.6563, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0};
+float inputs[MAX_BUFSIZE]  __attribute__((aligned(16)));
+float taps[MAX_BUFSIZE]  __attribute__((aligned(16)));
+float outputs[MAX_BUFSIZE] __attribute__((aligned(16)));
+
+int main(unsigned long long spu, unsigned long long argp,
+    unsigned long long envp)
+{
+  int tag = 1;
+  spu_fir_fff_params_t spu_fir_fff_params __attribute__((aligned(16)));
+
+  {
+    // NOTE: spu_fir_fff_params_t can be an _IN only param,
+    // it is sent back now for the time value only.
+    gr_spu_dma_lock_inout<spu_fir_fff_params_t> argp_lock(argp,
+        &spu_fir_fff_params, sizeof(spu_fir_fff_params_t), tag);
+    gr_spu_dma_lock_in<__vector float> inputs_lock(spu_fir_fff_params.ea_in1,
+        reinterpret_cast<__vector float *>(&inputs), spu_fir_fff_params.size
+            * sizeof(float), tag);
+    gr_spu_dma_lock_in<__vector float> taps_lock(spu_fir_fff_params.ea_in2,
+        reinterpret_cast<__vector float *>(&taps), spu_fir_fff_params.size
+            * sizeof(float), tag);
+    gr_spu_dma_lock_out<__vector float> outputs_lock(spu_fir_fff_params.ea_out,
+        reinterpret_cast<__vector float *>(&outputs), spu_fir_fff_params.size
+            * sizeof(float), tag);
+    //
+    //         spu_fir_fff_params.offset = 0; 
+    //         spu_fir_fff_params.nsamples = 7; 
+    //         spu_fir_fff_params.ntaps = 16;
+
+    spu_write_decrementer(~0);
+    int start(spu_read_decrementer());
+    
+    spu_fir_ccc( reinterpret_cast<__vector float *>(&inputs),
+        reinterpret_cast<__vector float *>(&taps),
+        reinterpret_cast<__vector float *>(&outputs), 0,
+        spu_fir_fff_params.offset, spu_fir_fff_params.nsamples,
+        spu_fir_fff_params.ntaps);
+    
+    spu_fir_fff_params.pad[0] = start - spu_read_decrementer();
+
+  }
+
+  return 0;
+}

Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/fir_fff_spu.cc 
(from rev 8118, 
gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spu.cc)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/fir_fff_spu.cc        
                        (rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/fir_fff_spu.cc        
2008-04-02 14:47:29 UTC (rev 8152)
@@ -0,0 +1,70 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include "gr_spu_dma_lock.h"
+#include "spu_fir_fff_params.h"
+#include "spu_fir_fff_as.h"
+#include <spu_mfcio.h>
+
+#define MAX_BUFSIZE (128*100)
+
+float inputs[MAX_BUFSIZE]  __attribute__((aligned(16)));
+float taps[MAX_BUFSIZE]  __attribute__((aligned(16)));
+float outputs[MAX_BUFSIZE] __attribute__((aligned(16)));
+
+int main(unsigned long long spu, unsigned long long argp,
+    unsigned long long envp)
+{
+  int tag = 1;
+  spu_fir_fff_params_t spu_fir_fff_params __attribute__((aligned(16)));
+
+  {
+    // NOTE: spu_fir_fff_params_t can be an _IN only param,
+    // it is sent back now for the time value only.
+    gr_spu_dma_lock_inout<spu_fir_fff_params_t> argp_lock(argp,
+        &spu_fir_fff_params, sizeof(spu_fir_fff_params_t), tag);
+    gr_spu_dma_lock_in<__vector float> inputs_lock(spu_fir_fff_params.ea_in1,
+        reinterpret_cast<__vector float *>(&inputs), spu_fir_fff_params.size
+            * sizeof(float), tag);
+    gr_spu_dma_lock_in<__vector float> taps_lock(spu_fir_fff_params.ea_in2,
+        reinterpret_cast<__vector float *>(&taps), spu_fir_fff_params.size
+            * sizeof(float), tag);
+    gr_spu_dma_lock_out<__vector float> outputs_lock(spu_fir_fff_params.ea_out,
+        reinterpret_cast<__vector float *>(&outputs), spu_fir_fff_params.size
+            * sizeof(float), tag);
+
+    spu_write_decrementer(~0);
+    int start(spu_read_decrementer());
+    
+    spu_fir_fff( reinterpret_cast<__vector float *>(&inputs),
+        reinterpret_cast<__vector float *>(&taps),
+        reinterpret_cast<__vector float *>(&outputs), 0,
+        spu_fir_fff_params.offset, spu_fir_fff_params.nsamples,
+        spu_fir_fff_params.ntaps);
+    
+    spu_fir_fff_params.pad[0] = start - spu_read_decrementer();
+  
+  }
+
+  return 0;
+}

Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/spu_fir_ccc_as.S 
(from rev 8118, 
gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_ccc_as.S)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/spu_fir_ccc_as.S      
                        (rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/spu_fir_ccc_as.S      
2008-04-02 14:47:29 UTC (rev 8152)
@@ -0,0 +1,277 @@
+#
+# Copyright 2008 Free Software Foundation, Inc.
+# 
+# This file is part of GNU Radio
+# 
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING.  If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+# 
+
+
+# taps are guarenteed to be 16 byte aligned.
+# ntaps != 0
+
+#  void spu_fir_ccc (  
+#      const float *input,
+#      const float *taps, 
+#      float *output,
+#      const float *delayline,
+#      const unsigned int offset,
+#      const unsigned int nsamples,
+#      const unsigned int ntaps
+#  )
+#  {
+#    unsigned int o_index = 0;
+#
+#    do {  
+#      float sum0 = 0;
+#      float sum1 = 0;
+#      float sum2 = 0;
+#      float sum3 = 0;
+#
+#         unsigned int n_2_complex_blocks = ntaps / 2;
+#
+#         unsigned int cur_tap = 0;
+#      unsigned int cur_input = input + offset + o_index;
+#
+#      do {
+#  
+#        sum0 += cur_input[0] * cur_tap[0] - cur_input[1] * cur_tap[1];
+#        sum1 += cur_input[0] * cur_tap[1] + cur_input[1] * cur_tap[0];
+#        sum2 += cur_input[2] * cur_tap[2] - cur_input[3] * cur_tap[3];
+#        sum3 += cur_input[2] * cur_tap[3] + cur_input[3] * cur_tap[2];
+#  
+#        cur_input += 4;
+#        cur_tap += 4;
+#  
+#      } while ((n_2_complex_blocks -= 2) != 0);
+#
+#              output[o_index] = sum0 + sum2;
+#              output[o_index+1] = sum1 + sum3;
+#
+#    } while ((o_index += 2) != nsamples * 2);
+#  
+#  }
+#      
+
+       .file   "fir_ccc_spu.S"
+.text
+       .align  3
+       .global spu_fir_ccc
+       .type   spu_fir_ccc, @function
+spu_fir_ccc:
+                               ori     $32,$5,0        # 0-2
+                               lqa $41, spu_fir_ccc_hightapmask        # 1-6 
expand masks for taps
+                               ori     $33,$7,0        # 0-2                   
        
+                               lqa $42, spu_fir_ccc_lowtapmask # 1-6 expand 
masks for taps
+                               ori     $31,$8,0        # 0-2                   
        
+                               lqa $43, spu_fir_ccc_xormask    # 1-6 mask for 
inverse of bd
+                               nop
+                               lqa $44, spu_fir_ccc_leftexpand # 1-6 mask to 
shift bd bc to prefered slot
+                       .spu_fir_ccc_start2: 
+                               xor $30,$30,$30         # 0-2 initilize the 
current output vector                               
+                               il  $34, 16             # 0-2 shift mask for 
output insertion
+                       
+                       .spu_fir_ccc_start1: 
+                               andi    $37,$33,0x0c     # 0-2 find index into 
masks [0-4]
+                               xor     $5,$5,$5         # 0-2 
+                               shlqbii $38,$37,2        # 1-4 mult by 4
+                               xor     $6,$6,$6         # 0-2
+                               xor     $7,$7,$7         # 0-2
+                               ori     $36,$9,0         # 0-2
+                               xor     $8,$8,$8         # 0-2
+                               lqd     $39,spu_fir_ccc_highshiftmasks($38)     
 # 1-6 load the right shift mask                        
+                               a       $35, $33, $3     # 0-2
+                               lqd     $40,spu_fir_ccc_lowshiftmasks($38)      
 # 1-6 load the right shift mask
+                               ori     $38,$4,0         # 0-2
+               
+                               hbra    .spu_fir_ccc_inner_loop_branch, 
.spu_fir_ccc_inner_loop         # inner-loop hint
+                               
+                               lqd     $10,0($35)       # 1-6
+                               lqd     $11,16($35)      # 1-6
+                               nop
+                               lqd     $12,32($35)      # 1-6
+                               
+                       .spu_fir_ccc_inner_loop:                
+                               ai      $36,$36,-4       # 0-2
+                               lqd     $14,0($38)       # 1-6
+                               ai      $35,$35,32       # 0-2
+                               lqd     $15,16($38)      # 1-6
+                               shufb   $13,$10,$11,$40         # 1-4
+                               shufb   $16,$14,$14,$42         # 1-4
+                               ai      $38,$38,32       # 0-2
+
+
+                               shufb   $10,$10,$11,$39         # 1-4
+                               nop
+                               # expensive, but needed
+                               shufb   $14,$14,$14,$41         # 1-4
+                               fma     $6, $13, $16, $6        # 0-6
+                               shufb   $18,$11,$12,$40         # 1-4
+                               fma     $5, $10, $14, $5        # 0-6
+
+                               shufb   $17,$15,$15,$42         # 1-4
+                               lqd     $10,0($35)       # 1-6
+                               shufb   $15,$15,$15,$41         # 1-4
+                               nop
+                               shufb   $11,$11,$12,$39         # 1-4   
+                               fma     $8, $18, $17, $8        # 0-6
+                       
+                               lqd     $12,32($35)      # 1-6
+                               fma     $7, $11, $15, $7        # 0-6
+                               
+                               lqd     $11,16($35)      # 1-6
+
+
+                       .spu_fir_ccc_inner_loop_branch: 
+                               brnz    $36,.spu_fir_ccc_inner_loop
+                               
+                               fsmbi   $10,0xFF00       # 1-4
+                               fa      $18,$5,$6                # 0-6
+                               hbra    .outter_loop_branch, 
.spu_fir_ccc_start1         # 1-
+                               fa      $19,$7,$8                # 0-6
+                               hbra    .spu_fir_ccc_finish_branch, 
.spu_fir_ccc_finish_branch_targ      # 1-
+                               fa      $5,$18,$19               # 0-6          
                
+
+                               shufb   $6, $5, $5, $44 # 1-4 expand 5 to 6
+                               xor     $6,$6,$43        # 0-2
+                               fa      $11, $5, $6  # 0-6      
+
+                               and     $12,$10,$11      # 0-2
+
+                               rotqby  $11, $12, $34    # 1-4
+                               or      $30,$11,$30      # 0-2
+                               ai      $31,$31,-1       # 0-2
+
+                       .spu_fir_ccc_finish_branch:     
+                               brz     $31,.spu_fir_ccc_finish4         
+                       .spu_fir_ccc_finish_branch_targ:        
+
+                               
+                               ai      $33,$33,8        # 0-2
+                               ai      $34,$34,-8       # 0-2
+                       
+                       .outter_loop_branch:            
+                               brnz    $34, .spu_fir_ccc_start1
+                               hbra    .spu_fir_ccc_outter_outter_loop_branch, 
.spu_fir_ccc_start2     
+# Stores r5 in output
+                       .spu_fir_ccc_finish4:   
+                               stqd    $30,0($32)              
+                               ai      $32,$32,16                              
# increment output pointer by 1 new vector.
+
+                       .spu_fir_ccc_outter_outter_loop_branch: 
+                               brnz    $31,.spu_fir_ccc_start2                 
# start another output vector if needed
+
+                               bi      $lr
+       .size   spu_fir_ccc, .-spu_fir_ccc
+       
+.text
+        .global spu_fir_ccc_highshiftmasks
+        .align  4
+        .type   spu_fir_ccc_highshiftmasks, @object
+        .size   spu_fir_ccc_highshiftmasks, 64
+       
+spu_fir_ccc_highshiftmasks:
+       .long 0x00010203
+       .long 0x00010203
+       .long 0x04050607
+       .long 0x04050607
+
+       .long 0x04050607
+       .long 0x04050607
+       .long 0x08090a0b
+       .long 0x08090a0b
+
+       .long 0x08090a0b
+       .long 0x08090a0b
+       .long 0x0c0d0e0f
+       .long 0x0c0d0e0f
+
+       .long 0x0c0d0e0f
+       .long 0x0c0d0e0f
+       .long 0x10111213
+       .long 0x10111213
+
+        .global spu_fir_ccc_lowshiftmasks
+        .align  4
+        .type   spu_fir_ccc_lowshiftmasks, @object
+        .size   spu_fir_ccc_lowshiftmasks, 64
+        
+spu_fir_ccc_lowshiftmasks:
+       .long 0x08090a0b
+       .long 0x08090a0b
+       .long 0x0c0d0e0f
+       .long 0x0c0d0e0f
+
+       .long 0x0c0d0e0f
+       .long 0x0c0d0e0f
+       .long 0x10111213
+       .long 0x10111213
+
+       .long 0x10111213
+       .long 0x10111213
+       .long 0x14151617
+       .long 0x14151617
+
+       .long 0x14151617
+       .long 0x14151617
+       .long 0x18191a1b
+       .long 0x18191a1b
+
+        .global spu_fir_ccc_hightapmask
+        .align  4
+        .type   spu_fir_ccc_hightapmask, @object
+        .size   spu_fir_ccc_hightapmask, 16
+        
+spu_fir_ccc_hightapmask:
+       .long 0x00010203
+       .long 0x04050607
+       .long 0x04050607
+       .long 0x00010203
+
+        .global spu_fir_ccc_lowtapmask
+        .align  4
+        .type   spu_fir_ccc_lowtapmask, @object
+        .size   spu_fir_ccc_lowtapmask, 16
+        
+spu_fir_ccc_lowtapmask:
+       .long 0x08090a0b
+       .long 0x0c0d0e0f
+       .long 0x0c0d0e0f
+       .long 0x08090a0b
+
+        .global spu_fir_ccc_xormask
+        .align  4
+        .type   spu_fir_ccc_xormask, @object
+        .size   spu_fir_ccc_xormask, 16
+        
+spu_fir_ccc_xormask:
+       .long 0x80000000
+       .long 0x00000000
+       .long 0x00000000
+       .long 0x00000000
+       
+        .global spu_fir_ccc_leftexpand
+        .align  4
+        .type   spu_fir_ccc_leftexpand, @object
+        .size   spu_fir_ccc_leftexpand, 16
+        
+spu_fir_ccc_leftexpand:
+       .long 0x08090a0b
+       .long 0x0c0d0e0f
+       .long 0x80808080
+       .long 0x80808080                
+       
+       .ident  "Hand coded Cell SPU assembly"

Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/spu_fir_ccc_as.h 
(from rev 8118, 
gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_ccc_as.h)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/spu_fir_ccc_as.h      
                        (rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/spu_fir_ccc_as.h      
2008-04-02 14:47:29 UTC (rev 8152)
@@ -0,0 +1,64 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef SPU_FIR_CCC_AS_H_
+#define SPU_FIR_CCC_AS_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/*!
+ * \brief compute an array of N interleaved complex output values from 
+ * interleaved complex inputs and taps.
+ *
+ * \param input must have (nsamples - 1 + ntaps()) valid entries.  input does 
+ * NOT need to be aligned. input[0] .. input[nsamples - 1 + ntaps() - 1] are 
+ * referenced to compute the output values.
+ * 
+ * \param taps a pointer to the pre-reversed and aligned set of taps.  Taps 
+ * must be a multiple of 4 and padded accordingly.
+ * 
+ * \param output a pointer to the filter output buffer
+ * 
+ * \param delayline NOTE: currently not used
+ * 
+ * \param offset the initial offset into input buffer to start the filter. If
+ * input is aligned, this should be zero.  If input is not aligned, this is
+ * the difference in allignment.
+ * 
+ * \param nsamples number of samples to produce for output
+ * 
+ * \param ntaps length of the tap vector.  Must be a multiple of 4. 
+ */
+
+extern void spu_fir_ccc(const qword input,
+    const qword taps, qword output,
+    const qword delayline, const unsigned int offset,
+    const unsigned int nsamples, const unsigned int ntaps);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //SPU_FIR_CCC_AS_H_

Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/spu_fir_fff_as.S 
(from rev 8118, 
gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_as.S)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/spu_fir_fff_as.S      
                        (rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/spu_fir_fff_as.S      
2008-04-02 14:47:29 UTC (rev 8152)
@@ -0,0 +1,197 @@
+#
+# Copyright 2008 Free Software Foundation, Inc.
+# 
+# This file is part of GNU Radio
+# 
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING.  If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+# 
+
+
+# taps are guarenteed to be 16 byte aligned.
+# ntaps != 0
+
+#  void spu_fir_fff (  
+#      const __vector float *input,
+#      const __vector float *taps, 
+#      __vector float *output,
+#      const __vector float *delayline,
+#      const unsigned int offset,
+#      const unsigned int nsamples,
+#      const unsigned int ntaps
+#  )
+#  {
+#    unsigned int o_index = 0;
+#
+#    do {  
+#      float sum0 = 0;
+#      float sum1 = 0;
+#      float sum2 = 0;
+#      float sum3 = 0;
+#
+#         unsigned int n_4_float_blocks = ntaps / 4;
+#
+#         unsigned int cur_tap = 0;
+#      unsigned int cur_input = input + offset + o_index;
+#
+#      do {
+#  
+#        sum0 += cur_input[0] * cur_tap[0];
+#        sum1 += cur_input[1] * cur_tap[1];
+#        sum2 += cur_input[2] * cur_tap[2];
+#        sum3 += cur_input[3] * cur_tap[3];
+#  
+#        cur_input += 4;
+#        cur_tap += 4;
+#  
+#      } while ((n_4_float_blocks -= 4) != 0);
+#
+#              output[o_index] = sum0 + sum1 + sum2 + sum3;
+#
+#    } while (o_index++ != nsamples);
+#  
+#  }
+#      
+
+       .file   "fir_fff_spu.S"
+.text
+       .align  3
+       .global spu_fir_fff
+       .type   spu_fir_fff, @function
+spu_fir_fff:
+                               ori     $32,$5,0        # 0-2
+                               ori     $33,$7,0        # 0-2
+                               ori     $31,$8,0        # 0-2
+                       .spu_fir_fff_start4: 
+                               xor $30,$30,$30         # 0-2 initilize the 
current output vector                               
+                               il  $34, 16             # 0-2 shift mask for 
output insertion
+                       
+                       .spu_fir_fff_start1: 
+                               andi    $37,$33,0x0c     # 0-2 find index into 
masks [0-4]
+                               xor     $5,$5,$5         # 0-2 
+                               xor     $6,$6,$6         # 0-2
+                                       shlqbii $38,$37,2        # 1-4 mult by 4
+                               xor     $7,$7,$7         # 0-2
+                               ori     $36,$9,0         # 0-2
+                       
+                               xor     $8,$8,$8         # 0-2
+                                       lqd     $39,spu_fir_fff_shiftmasks($38) 
 # 1-6 load the right shift mask
+                               a       $35, $33, $3     # 0-2
+                               ori     $38,$4,0         # 0-2
+               
+                               hbra    .spu_fir_fff_inner_loop_branch, 
.spu_fir_fff_inner_loop         # inner-loop hint       
+
+                                       lqd     $10,0($35)       # 1-6
+                                       lqd     $11,16($35)      # 1-6
+                                       lqd     $12,32($35)      # 1-6
+                       .spu_fir_fff_inner_loop:                
+
+                                       lqd     $13,48($35)      # 1-6          
                
+                                       lqd     $18,64($35)      # 1-6
+                                       lqd     $14,0($38)       # 1-6
+                               nop
+                                       lqd     $15,16($38)      # 1-6
+
+                               ai      $36,$36,-16      # 0-2
+                                       shufb   $10,$10,$11,$39         # 1-4 
+                                       lqd     $16,32($38)      # 1-6
+                               nop
+                               ai      $35,$35,64       # 0-2
+                                       shufb   $11,$11,$12,$39         # 1-4
+                                       lqd     $17,48($38)      # 1-6
+                                       shufb   $12,$12,$13,$39         # 1-4
+                               ai      $38,$38,64       # 0-2
+
+                                       shufb   $13,$13,$18,$39         # 1-4
+                               fma     $5, $10, $14, $5        # 0-6 
+                               nop                             
+                               fma     $6, $11, $15, $6        # 0-6
+                                       lqd     $10,0($35)       # 1-6
+                               fma     $7, $12, $16, $7        # 0-6           
        
+                                       lqd     $11,16($35)      # 1-6
+                               fma     $8, $13, $17, $8        # 0-6
+                                       lqd     $12,32($35)      # 1-6
+                               
+                               
+                               
+                       .spu_fir_fff_inner_loop_branch: 
+                               brnz    $36,.spu_fir_fff_inner_loop     
+                                       fsmbi   $10,0xF000       # 1-4
+                               fa      $18,$5,$6                # 0-6
+                                       hbra    
.spu_fir_fff_outter_loop_branch, .spu_fir_fff_start1     # 1-
+                               fa      $19,$7,$8                # 0-6
+                                       hbra    .spu_fir_fff_finish_branch, 
.spu_fir_fff_finish_branch_targ      # 1-
+                               fa      $5,$18,$19               # 0-6
+
+# accumulate word elements in r5 into first element in r5      
+                               ori     $6,$5,0          # 0-2          
+                                       shlqbyi $6,$6,4          # 1-4
+                               ai      $31,$31,-1       # 0-2
+                               fa      $5,$6,$5         # 0-2
+                                       shlqbyi $6,$6,4          # 1-4
+                               fa      $5,$6,$5         # 0-2
+                                       shlqbyi $6,$6,4          # 1-4
+                               fa      $5,$6,$5         # 0-2
+                               and     $11,$10,$5       # 0-2
+                                       rotqby  $12, $11, $34    # 1-4
+                               or      $30,$12,$30      # 0-2
+                       
+                               ai      $34,$34,-4       # 0-2
+                       .spu_fir_fff_finish_branch:     
+                               brz     $31,.spu_fir_fff_finish4         
+                       .spu_fir_fff_finish_branch_targ:        
+
+                               ai      $33,$33,4        # 0-2
+
+                       .spu_fir_fff_outter_loop_branch:                
+                               brnz    $34, .spu_fir_fff_start1        
+                               hbra    .spu_fir_fff_outter_outter_loop_branch, 
.spu_fir_fff_start4     
+# Stores r5 in output
+                       .spu_fir_fff_finish4:   
+                               stqd    $30,0($32)              
+                               ai      $32,$32,16                              
# increment output pointer by 1 new vector.
+
+                       .spu_fir_fff_outter_outter_loop_branch: 
+                               brnz    $31,.spu_fir_fff_start4                 
# start another output vector if needed
+
+       bi      $lr
+       .size   spu_fir_fff, .-spu_fir_fff
+.text
+        .global spu_fir_fff_shiftmasks
+        .align  4
+        .type   spu_fir_fff_shiftmasks, @object
+        .size   spu_fir_fff_shiftmasks, 64
+spu_fir_fff_shiftmasks:
+       .long 0x00010203
+       .long 0x04050607
+       .long 0x08090a0b
+       .long 0x0c0d0e0f
+
+       .long 0x04050607
+       .long 0x08090a0b
+       .long 0x0c0d0e0f
+       .long 0x10111213
+
+       .long 0x08090a0b
+       .long 0x0c0d0e0f
+       .long 0x10111213
+       .long 0x14151617
+
+       .long 0x0c0d0e0f
+       .long 0x10111213
+       .long 0x14151617
+       .long 0x18191a1b
+
+       .ident  "Hand coded Cell SPU assembly"

Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/spu_fir_fff_as.h 
(from rev 8118, 
gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_as.h)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/spu_fir_fff_as.h      
                        (rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spu/spu_fir_fff_as.h      
2008-04-02 14:47:29 UTC (rev 8152)
@@ -0,0 +1,63 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef SPU_FIR_FFF_AS_H_
+#define SPU_FIR_FFF_AS_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/*!
+ * \brief compute an array of N output values.
+ *
+ * \param input must have (nsamples - 1 + ntaps()) valid entries.  input does 
+ * NOT need to be aligned. input[0] .. input[nsamples - 1 + ntaps() - 1] are 
+ * referenced to compute the output values.
+ * 
+ * \param taps a pointer to the pre-reversed and aligned set of taps.  Taps 
+ * must be a multiple of 16 and padded accordingly.
+ * 
+ * \param output a pointer to the filter output buffer
+ * 
+ * \param delayline NOTE: currently not used
+ * 
+ * \param offset the initial offset into input buffer to start the filter. If
+ * input is aligned, this should be zero.  If input is not aligned, this is
+ * the difference in allignment.
+ * 
+ * \param nsamples number of samples to produce for output
+ * 
+ * \param ntaps length of the tap vector.  Must be a multiple of 16. 
+ */
+
+extern void spu_fir_fff(const qword input,
+    const qword taps, qword output,
+    const qword delayline, const unsigned int offset,
+    const unsigned int nsamples, const unsigned int ntaps);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //SPU_FIR_FFF_AS_H_

Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_ccc_as.S

Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_ccc_as.h

Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff64_as.S

Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff64_as.h

Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_as.S

Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_as.h





reply via email to

[Prev in Thread] Current Thread [Next in Thread]