[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commit-gnuradio] [gnuradio] 22/28: filter: added 32f decimating fir + t
From: |
git |
Subject: |
[Commit-gnuradio] [gnuradio] 22/28: filter: added 32f decimating fir + tweaks |
Date: |
Mon, 15 Aug 2016 00:47:07 +0000 (UTC) |
This is an automated email from the git hooks/post-receive script.
nwest pushed a commit to annotated tag gr_basic_work
in repository gnuradio.
commit 5f01af5ad325753d5266970cb89322c3a75d553f
Author: Josh Blum <address@hidden>
Date: Sun Nov 13 22:56:45 2011 -0800
filter: added 32f decimating fir + tweaks
---
gr-filter/grc/filter_decim_fir.xml | 5 +
gr-filter/include/CMakeLists.txt | 1 +
gr-filter/include/gr_filter_decim_fir.h | 7 +-
.../filter_swig.i => include/gr_filter_types.h} | 19 ++--
gr-filter/lib/gr_filter_decim_fir.cc | 94 ++++++++++++-----
gr-filter/swig/filter_swig.i | 1 +
volk/include/volk/volk_32fc_x2_dot_prod_32fc_u.h | 116 +++++++++++++++++++++
7 files changed, 201 insertions(+), 42 deletions(-)
diff --git a/gr-filter/grc/filter_decim_fir.xml
b/gr-filter/grc/filter_decim_fir.xml
index 8154027..50034d1 100644
--- a/gr-filter/grc/filter_decim_fir.xml
+++ b/gr-filter/grc/filter_decim_fir.xml
@@ -21,6 +21,11 @@ self.$(id).set_taps($taps)</make>
<key>FILTER_FC32_IO_FC32_TAPS</key>
<opt>input:fc32</opt><opt>output:fc32</opt>
</option>
+ <option>
+ <name>F32 IO, F32 Taps</name>
+ <key>FILTER_F32_IO_F32_TAPS</key>
+ <opt>input:f32</opt><opt>output:f32</opt>
+ </option>
</param>
<param>
<name>Decimation</name>
diff --git a/gr-filter/include/CMakeLists.txt b/gr-filter/include/CMakeLists.txt
index 3f6ac87..9f3b9b4 100644
--- a/gr-filter/include/CMakeLists.txt
+++ b/gr-filter/include/CMakeLists.txt
@@ -22,6 +22,7 @@
########################################################################
install(FILES
gr_filter_api.h
+ gr_filter_types.h
gr_filter_decim_fir.h
DESTINATION ${GR_INCLUDE_DIR}/gnuradio
COMPONENT "filter_devel"
diff --git a/gr-filter/include/gr_filter_decim_fir.h
b/gr-filter/include/gr_filter_decim_fir.h
index fe087e7..2235dfc 100644
--- a/gr-filter/include/gr_filter_decim_fir.h
+++ b/gr-filter/include/gr_filter_decim_fir.h
@@ -24,14 +24,9 @@
#include <gr_filter_api.h>
#include <gr_sync_decimator.h>
+#include <gr_filter_types.h>
#include <complex>
-//TODO move this to common include
-enum filter_type{
- FILTER_FC32_IO_FC32_TAPS,
- FILTER_F32_IO_F32_TAPS,
-};
-
class GR_FILTER_API gr_filter_decim_fir : virtual public gr_sync_decimator{
public:
typedef boost::shared_ptr<gr_filter_decim_fir> sptr;
diff --git a/gr-filter/swig/filter_swig.i b/gr-filter/include/gr_filter_types.h
similarity index 64%
copy from gr-filter/swig/filter_swig.i
copy to gr-filter/include/gr_filter_types.h
index f8f0ddc..fa798c9 100644
--- a/gr-filter/swig/filter_swig.i
+++ b/gr-filter/include/gr_filter_types.h
@@ -19,17 +19,12 @@
* Boston, MA 02110-1301, USA.
*/
-#define GR_FILTER_API
+#ifndef INCLUDED_GR_FILTER_TYPES_H
+#define INCLUDED_GR_FILTER_TYPES_H
-%ignore gr_sync_block;
-%ignore gr_sync_decimator;
+enum filter_type{
+ FILTER_FC32_IO_FC32_TAPS,
+ FILTER_F32_IO_F32_TAPS,
+};
-////////////////////////////////////////////////////////////////////////
-// standard includes
-////////////////////////////////////////////////////////////////////////
-%include <gnuradio.i>
-
-////////////////////////////////////////////////////////////////////////
-// block includes
-////////////////////////////////////////////////////////////////////////
-%include <filter_fir.i>
+#endif /* INCLUDED_GR_FILTER_TYPES_H */
diff --git a/gr-filter/lib/gr_filter_decim_fir.cc
b/gr-filter/lib/gr_filter_decim_fir.cc
index 9023a0d..ac2b362 100644
--- a/gr-filter/lib/gr_filter_decim_fir.cc
+++ b/gr-filter/lib/gr_filter_decim_fir.cc
@@ -30,7 +30,6 @@
#include <stdexcept>
#include <volk/volk.h>
#include <iostream>
-#include <boost/shared_array.hpp>
/***********************************************************************
* FIR filter FC32 implementation
@@ -47,8 +46,7 @@ public:
decim
)
{
- const int alignment_multiple = volk_get_alignment() / sizeof(type);
- set_output_multiple(std::max(16, alignment_multiple));
+ //NOP
}
int work(
@@ -57,46 +55,94 @@ public:
gr_vector_void_star &output_items
){
gruel::scoped_lock lock(_taps_mutex);
+ const size_t num_taps = this->history();
type *out = reinterpret_cast<type *>(output_items[0]);
const type *in = reinterpret_cast<const type *>(input_items[0]);
- const size_t num_bytes = this->history() * sizeof(type);
+ const type *taps = reinterpret_cast<const type *>(&_taps.front());
for (size_t i = 0; i < size_t(noutput_items); i++){
- volk_32fc_x2_conjugate_dot_prod_32fc_a(out, in, _taps, num_bytes);
- out++;
+ volk_32fc_x2_dot_prod_32fc_u(out+i, in, taps, num_taps);
in += this->decimation();
}
return noutput_items;
}
- void set_taps(const std::vector<std::complex<double> > &taps_){
+ void set_taps(const std::vector<std::complex<double> > &taps){
gruel::scoped_lock lock(_taps_mutex);
//copy the new taps in and update history
- std::vector<type> taps(taps_.size());
+ _taps.resize(taps.size());
for (size_t i = 0; i < taps.size(); i++){
- taps[i] = type(taps_[i]);
+ _taps[i] = type(taps[i]);
}
- if (taps.size() % 2 == 1){ //pad to 2x multiple because volk
- taps.push_back(0.0);
+ while (_taps.size() % (volk_get_alignment() / sizeof(type)) != 0){
+ _taps.push_back(0.0);
}
- std::reverse(taps.begin(), taps.end());
- this->set_history(taps.size());
-
- //copy taps in aligned memory
- //TODO the blob work can easily allocate managed aligned memory (so
use that when its merged)
- const size_t num_bytes = this->history() * sizeof(type);
- const size_t align_pad = volk_get_alignment() - 1;
- _taps_mem = boost::shared_array<char>(new char[num_bytes + align_pad]);
- _taps = reinterpret_cast<type *>(size_t(_taps_mem.get() + align_pad) &
~align_pad);
- std::memcpy(_taps, &taps.front(), num_bytes);
+ std::reverse(_taps.begin(), _taps.end());
+ this->set_history(_taps.size());
}
private:
gruel::mutex _taps_mutex;
- boost::shared_array<char> _taps_mem;
- type *_taps;
+ std::vector<type> _taps;
+};
+
+/***********************************************************************
+ * FIR filter F32 implementation
+ **********************************************************************/
+class gr_filter_decim_fir_f32 : public gr_filter_decim_fir{
+public:
+ typedef float type;
+
+ gr_filter_decim_fir_f32(const size_t decim):
+ gr_sync_decimator(
+ "FIR filter F32",
+ gr_make_io_signature (1, 1, sizeof(type)),
+ gr_make_io_signature (1, 1, sizeof(type)),
+ decim
+ )
+ {
+ //NOP
+ }
+
+ int work(
+ int noutput_items,
+ gr_vector_const_void_star &input_items,
+ gr_vector_void_star &output_items
+ ){
+ gruel::scoped_lock lock(_taps_mutex);
+ const size_t num_taps = this->history();
+ type *out = reinterpret_cast<type *>(output_items[0]);
+ const type *in = reinterpret_cast<const type *>(input_items[0]);
+ const type *taps = reinterpret_cast<const type *>(&_taps.front());
+
+ for (size_t i = 0; i < size_t(noutput_items); i++){
+ volk_32f_x2_dot_prod_32f_u(out+i, in, taps, num_taps);
+ in += this->decimation();
+ }
+
+ return noutput_items;
+ }
+
+ void set_taps(const std::vector<std::complex<double> > &taps){
+ gruel::scoped_lock lock(_taps_mutex);
+
+ //copy the new taps in and update history
+ _taps.resize(taps.size());
+ for (size_t i = 0; i < taps.size(); i++){
+ _taps[i] = type(taps[i].real());
+ }
+ while (_taps.size() % (volk_get_alignment() / sizeof(type)) != 0){
+ _taps.push_back(0.0);
+ }
+ std::reverse(_taps.begin(), _taps.end());
+ this->set_history(_taps.size());
+ }
+
+private:
+ gruel::mutex _taps_mutex;
+ std::vector<type> _taps;
};
/***********************************************************************
@@ -105,7 +151,7 @@ private:
gr_filter_decim_fir::sptr gr_filter_decim_fir::make(filter_type type, const
size_t decim){
switch(type){
case FILTER_FC32_IO_FC32_TAPS: return sptr(new
gr_filter_decim_fir_fc32(decim));
- case FILTER_F32_IO_F32_TAPS://TODO
+ case FILTER_F32_IO_F32_TAPS: return sptr(new
gr_filter_decim_fir_f32(decim));
default: throw std::invalid_argument("make FIR filter got unknown type");
}
}
diff --git a/gr-filter/swig/filter_swig.i b/gr-filter/swig/filter_swig.i
index f8f0ddc..18e5dbe 100644
--- a/gr-filter/swig/filter_swig.i
+++ b/gr-filter/swig/filter_swig.i
@@ -32,4 +32,5 @@
////////////////////////////////////////////////////////////////////////
// block includes
////////////////////////////////////////////////////////////////////////
+%include <gr_filter_types.h>
%include <filter_fir.i>
diff --git a/volk/include/volk/volk_32fc_x2_dot_prod_32fc_u.h
b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_u.h
new file mode 100644
index 0000000..9a39c26
--- /dev/null
+++ b/volk/include/volk/volk_32fc_x2_dot_prod_32fc_u.h
@@ -0,0 +1,116 @@
+#ifndef INCLUDED_volk_32fc_x2_dot_prod_32fc_u_H
+#define INCLUDED_volk_32fc_x2_dot_prod_32fc_u_H
+
+#include <volk/volk_common.h>
+#include <volk/volk_complex.h>
+#include <stdio.h>
+#include <string.h>
+
+
+#ifdef LV_HAVE_GENERIC
+
+
+static inline void volk_32fc_x2_dot_prod_32fc_u_generic(lv_32fc_t* result,
const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
+
+ float * res = (float*) result;
+ float * in = (float*) input;
+ float * tp = (float*) taps;
+ unsigned int n_2_ccomplex_blocks = num_points/2;
+ unsigned int isodd = num_points &1;
+
+
+
+ float sum0[2] = {0,0};
+ float sum1[2] = {0,0};
+ unsigned int i = 0;
+
+
+ for(i = 0; i < n_2_ccomplex_blocks; ++i) {
+
+
+ sum0[0] += in[0] * tp[0] - in[1] * tp[1];
+ sum0[1] += in[0] * tp[1] + in[1] * tp[0];
+ sum1[0] += in[2] * tp[2] - in[3] * tp[3];
+ sum1[1] += in[2] * tp[3] + in[3] * tp[2];
+
+
+ in += 4;
+ tp += 4;
+
+ }
+
+
+ res[0] = sum0[0] + sum1[0];
+ res[1] = sum0[1] + sum1[1];
+
+
+
+ for(i = 0; i < isodd; ++i) {
+
+
+ *result += input[num_points - 1] * taps[num_points - 1];
+
+ }
+
+}
+
+#endif /*LV_HAVE_GENERIC*/
+
+#ifdef LV_HAVE_SSE3
+
+#include <pmmintrin.h>
+
+static inline void volk_32fc_x2_dot_prod_32fc_u_sse3(lv_32fc_t* result, const
lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
+
+
+ lv_32fc_t dotProduct;
+ memset(&dotProduct, 0x0, 2*sizeof(float));
+
+ unsigned int number = 0;
+ const unsigned int halfPoints = num_points/2;
+
+ __m128 x, y, yl, yh, z, tmp1, tmp2, dotProdVal;
+
+ const lv_32fc_t* a = input;
+ const lv_32fc_t* b = taps;
+
+ dotProdVal = _mm_setzero_ps();
+
+ for(;number < halfPoints; number++){
+
+ x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
+ y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
+
+ yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
+ yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
+
+ tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
+
+ x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
+
+ tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
+
+ z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di,
bi*dr+br*di
+
+ dotProdVal = _mm_add_ps(dotProdVal, z); // Add the complex multiplication
results together
+
+ a += 2;
+ b += 2;
+ }
+
+ __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector[2];
+
+ _mm_storeu_ps((float*)dotProductVector,dotProdVal); // Store the results
back into the dot product vector
+
+ dotProduct += ( dotProductVector[0] + dotProductVector[1] );
+
+ if(num_points % 1 != 0) {
+ dotProduct += (*a) * (*b);
+ }
+
+ *result = dotProduct;
+}
+
+#endif /*LV_HAVE_SSE3*/
+
+#endif /*INCLUDED_volk_32fc_x2_dot_prod_32fc_u_H*/
- [Commit-gnuradio] [gnuradio] 16/28: basic: performance tweak for sig source index mod, (continued)
- [Commit-gnuradio] [gnuradio] 16/28: basic: performance tweak for sig source index mod, git, 2016/08/14
- [Commit-gnuradio] [gnuradio] 04/28: basic: added other basic operators, git, 2016/08/14
- [Commit-gnuradio] [gnuradio] 01/28: basic: attempt at new component, partial adder implementation, git, 2016/08/14
- [Commit-gnuradio] [gnuradio] 27/28: basic: added dynamic delay block, git, 2016/08/14
- [Commit-gnuradio] [gnuradio] 13/28: Volk: whoops, git, 2016/08/14
- [Commit-gnuradio] [gnuradio] 17/28: core: squashed in-place block work, git, 2016/08/14
- [Commit-gnuradio] [gnuradio] 07/28: basic add/mult const, needs test, git, 2016/08/14
- [Commit-gnuradio] [gnuradio] 15/28: basic: added super fast signal source, git, 2016/08/14
- [Commit-gnuradio] [gnuradio] 26/28: basic: use set_output_alignment in the basic math blocks, git, 2016/08/14
- [Commit-gnuradio] [gnuradio] 19/28: core: make in-place buffering enabled per port, git, 2016/08/14
- [Commit-gnuradio] [gnuradio] 22/28: filter: added 32f decimating fir + tweaks,
git <=
- [Commit-gnuradio] [gnuradio] 18/28: basic: added stream selector block, git, 2016/08/14
- [Commit-gnuradio] [gnuradio] 05/28: basic: whoops, wrong operator, git, 2016/08/14
- [Commit-gnuradio] [gnuradio] 12/28: Volk: 32f_s32f_multiply_32f, git, 2016/08/14
- [Commit-gnuradio] [gnuradio] 03/28: basic: added int16 data types and filled in float32, git, 2016/08/14
- [Commit-gnuradio] [gnuradio] 08/28: basic: working add/mult const blocks, block magic2 for static make, git, 2016/08/14
- [Commit-gnuradio] [gnuradio] 20/28: core: enable inplace on some core math blocks, git, 2016/08/14
- [Commit-gnuradio] [gnuradio] 28/28: just a bunch of tweaks, git, 2016/08/14
- [Commit-gnuradio] [gnuradio] 21/28: filter: initial commit of gr filter, made fir decim block, git, 2016/08/14