commit-gnuradio
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commit-gnuradio] [gnuradio] 01/03: gr-dtv: Add generic version of DVB-T


From: git
Subject: [Commit-gnuradio] [gnuradio] 01/03: gr-dtv: Add generic version of DVB-T Viterbi decoder for non-x86 platforms.
Date: Wed, 14 Oct 2015 02:48:38 +0000 (UTC)

This is an automated email from the git hooks/post-receive script.

jcorgan pushed a commit to branch master
in repository gnuradio.

commit 0288cd0a351c74bfd604fbd8415a2a32135f6bb9
Author: Ron Economos <address@hidden>
Date:   Tue Oct 13 14:47:54 2015 -0700

    gr-dtv: Add generic version of DVB-T Viterbi decoder for non-x86 platforms.
---
 gr-dtv/CMakeLists.txt                        |   4 -
 gr-dtv/lib/CMakeLists.txt                    |   8 +-
 gr-dtv/lib/dvbt/dvbt_viterbi_decoder_impl.cc | 270 +++++++++++++++++++++++++++
 gr-dtv/lib/dvbt/dvbt_viterbi_decoder_impl.h  |  28 ++-
 4 files changed, 304 insertions(+), 6 deletions(-)

diff --git a/gr-dtv/CMakeLists.txt b/gr-dtv/CMakeLists.txt
index 3a0d116..fc7ab56 100644
--- a/gr-dtv/CMakeLists.txt
+++ b/gr-dtv/CMakeLists.txt
@@ -27,11 +27,7 @@ include(GrBoost)
 ########################################################################
 include(GrComponent)
 
-include (CheckCCompilerFlag)
-CHECK_C_COMPILER_FLAG ("-msse2" SSE2_SUPPORTED)
-
 GR_REGISTER_COMPONENT("gr-dtv" ENABLE_GR_DTV
-    SSE2_SUPPORTED
     Boost_FOUND
     ENABLE_GNURADIO_RUNTIME
     ENABLE_GR_ANALOG
diff --git a/gr-dtv/lib/CMakeLists.txt b/gr-dtv/lib/CMakeLists.txt
index f044469..868205c 100644
--- a/gr-dtv/lib/CMakeLists.txt
+++ b/gr-dtv/lib/CMakeLists.txt
@@ -127,7 +127,13 @@ list(APPEND dtv_libs
     ${VOLK_LIBRARIES}
 )
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2")
+include (CheckCCompilerFlag)
+CHECK_C_COMPILER_FLAG ("-msse2" SSE2_SUPPORTED)
+
+if(SSE2_SUPPORTED)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2")
+    ADD_DEFINITIONS(-DDTV_SSE2)
+endif(SSE2_SUPPORTED)
 
 #Add Windows DLL resource file if using MSVC
 if(MSVC)
diff --git a/gr-dtv/lib/dvbt/dvbt_viterbi_decoder_impl.cc 
b/gr-dtv/lib/dvbt/dvbt_viterbi_decoder_impl.cc
index 4f2362c..3b0f9fd 100644
--- a/gr-dtv/lib/dvbt/dvbt_viterbi_decoder_impl.cc
+++ b/gr-dtv/lib/dvbt/dvbt_viterbi_decoder_impl.cc
@@ -70,22 +70,40 @@ namespace gr {
       1, 0, 0, 1, 0, 1, 1, 0,
     };
 
+#ifdef DTV_SSE2
     __m128i dvbt_viterbi_decoder_impl::d_metric0[4] __attribute__ 
((aligned(16)));
     __m128i dvbt_viterbi_decoder_impl::d_metric1[4] __attribute__ 
((aligned(16)));
     __m128i dvbt_viterbi_decoder_impl::d_path0[4] __attribute__ 
((aligned(16)));
     __m128i dvbt_viterbi_decoder_impl::d_path1[4] __attribute__ 
((aligned(16)));
+#else
+    unsigned char dvbt_viterbi_decoder_impl::d_metric0_generic[64] 
__attribute__ ((aligned(16)));
+    unsigned char dvbt_viterbi_decoder_impl::d_metric1_generic[64] 
__attribute__ ((aligned(16)));
+    unsigned char dvbt_viterbi_decoder_impl::d_path0_generic[64] __attribute__ 
((aligned(16)));
+    unsigned char dvbt_viterbi_decoder_impl::d_path1_generic[64] __attribute__ 
((aligned(16)));
+#endif
 
+#ifdef DTV_SSE2
     branchtab27 dvbt_viterbi_decoder_impl::Branchtab27_sse2[2] __attribute__ 
((aligned(16)));
+#else
+    branchtab27 dvbt_viterbi_decoder_impl::Branchtab27_generic[2] 
__attribute__ ((aligned(16)));
+#endif
 
     unsigned char dvbt_viterbi_decoder_impl::mmresult[64] 
__attribute__((aligned(16)));
     unsigned char dvbt_viterbi_decoder_impl::ppresult[TRACEBACK_MAX][64] 
__attribute__((aligned(16)));
 
+#ifdef DTV_SSE2
     void
     dvbt_viterbi_decoder_impl::dvbt_viterbi_chunks_init_sse2(__m128i *mm0, 
__m128i *pp0)
     {
+#else
+    void
+    dvbt_viterbi_decoder_impl::dvbt_viterbi_chunks_init_generic(unsigned char 
*mm0, unsigned char *pp0)
+    {
+#endif
       // Initialize starting metrics to prefer 0 state
       int i, j;
 
+#ifdef DTV_SSE2
       for (i = 0; i < 4; i++) {
         mm0[i] = _mm_setzero_si128();
         pp0[i] = _mm_setzero_si128();
@@ -96,6 +114,18 @@ namespace gr {
         Branchtab27_sse2[0].c[i] = (polys[0] < 0) ^ d_Partab[(2*i) & 
abs(polys[0])] ? 1 : 0;
         Branchtab27_sse2[1].c[i] = (polys[1] < 0) ^ d_Partab[(2*i) & 
abs(polys[1])] ? 1 : 0;
       }
+#else
+      for (i = 0; i < 64; i++) {
+        mm0[i] = 0;
+        pp0[i] = 0;
+      }
+
+      int polys[2] = { POLYA, POLYB };
+      for (i = 0; i < 32; i++) {
+        Branchtab27_generic[0].c[i] = (polys[0] < 0) ^ d_Partab[(2*i) & 
abs(polys[0])] ? 1 : 0;
+        Branchtab27_generic[1].c[i] = (polys[1] < 0) ^ d_Partab[(2*i) & 
abs(polys[1])] ? 1 : 0;
+      }
+#endif
 
       for (i = 0; i < 64; i++) {
         mmresult[i] = 0;
@@ -105,6 +135,7 @@ namespace gr {
       }
     }
 
+#ifdef DTV_SSE2
     void
     dvbt_viterbi_decoder_impl::dvbt_viterbi_butterfly2_sse2(unsigned char 
*symbols, __m128i *mm0, __m128i *mm1, __m128i *pp0, __m128i *pp1)
     {
@@ -213,24 +244,235 @@ namespace gr {
         path1[2*i+1] = _mm_unpackhi_epi8(tmp0, tmp1);
       }
     }
+#else
+    void
+    dvbt_viterbi_decoder_impl::dvbt_viterbi_butterfly2_generic(unsigned char 
*symbols, unsigned char *mm0, unsigned char *mm1, unsigned char *pp0, unsigned 
char *pp1)
+    {
+      int i, j, k;
 
+      unsigned char *metric0, *metric1;
+      unsigned char *path0, *path1;
+
+      metric0 = mm0;
+      path0 = pp0;
+      metric1 = mm1;
+      path1 = pp1;
+
+      // Operate on 4 symbols (2 bits) at a time
+
+      unsigned char m0[16], m1[16], m2[16], m3[16], decision0[16], 
decision1[16], survivor0[16], survivor1[16];
+      unsigned char metsv[16], metsvm[16];
+      unsigned char shift0[16], shift1[16];
+      unsigned char tmp0[16], tmp1[16];
+      unsigned char sym0v[16], sym1v[16];
+      unsigned short simd_epi16;
+
+      for (j = 0; j < 16; j++) {
+        sym0v[j] = symbols[0];
+        sym1v[j] = symbols[1];
+      }
+
+      for (i = 0; i < 2; i++) {
+        if (symbols[0] == 2) {
+          for (j = 0; j < 16; j++) {
+            metsvm[j] = Branchtab27_generic[1].c[(i*16) + j] ^ sym1v[j];
+            metsv[j] = 1 - metsvm[j];
+          }
+        }
+        else if (symbols[1] == 2) {
+          for (j = 0; j < 16; j++) {
+            metsvm[j] = Branchtab27_generic[0].c[(i*16) + j] ^ sym0v[j];
+            metsv[j] = 1 - metsvm[j];
+          }
+        }
+        else {
+          for (j = 0; j < 16; j++) {
+            metsvm[j] = (Branchtab27_generic[0].c[(i*16) + j] ^ sym0v[j]) + 
(Branchtab27_generic[1].c[(i*16) + j] ^ sym1v[j]);
+            metsv[j] = 2 - metsvm[j];
+          }
+        }
+
+        for (j = 0; j < 16; j++) {
+          m0[j] = metric0[(i*16) + j] + metsv[j];
+          m1[j] = metric0[((i+2)*16) + j] + metsvm[j];
+          m2[j] = metric0[(i*16) + j] + metsvm[j];
+          m3[j] = metric0[((i+2)*16) + j] + metsv[j];
+        }
+
+        for (j = 0; j < 16; j++) {
+          decision0[j] = ((m0[j] - m1[j]) > 0) ? 0xff : 0x0;
+          decision1[j] = ((m2[j] - m3[j]) > 0) ? 0xff : 0x0;
+          survivor0[j] = (decision0[j] & m0[j]) | ((~decision0[j]) & m1[j]);
+          survivor1[j] = (decision1[j] & m2[j]) | ((~decision1[j]) & m3[j]);
+        }
+
+        for (j = 0; j < 16; j += 2) {
+          simd_epi16 = path0[(i*16) + j];
+          simd_epi16 |= path0[(i*16) + (j+1)] << 8;
+          simd_epi16 <<= 1;
+          shift0[j] = simd_epi16;
+          shift0[j+1] = simd_epi16 >> 8;
+
+          simd_epi16 = path0[((i+2)*16) + j];
+          simd_epi16 |= path0[((i+2)*16) + (j+1)] << 8;
+          simd_epi16 <<= 1;
+          shift1[j] = simd_epi16;
+          shift1[j+1] = simd_epi16 >> 8;
+        }
+        for (j = 0; j < 16; j++) {
+          shift1[j] = shift1[j] + 1;
+        }
+
+        for (j = 0, k = 0; j < 16; j += 2, k++) {
+          metric1[(2*i*16) + j] = survivor0[k];
+          metric1[(2*i*16) + (j+1)] = survivor1[k];
+        }
+        for (j = 0; j < 16; j++) {
+          tmp0[j] = (decision0[j] & shift0[j]) | ((~decision0[j]) & shift1[j]);
+        }
+
+        for (j = 0, k = 8; j < 16; j += 2, k++) {
+          metric1[((2*i+1)*16) + j] = survivor0[k];
+          metric1[((2*i+1)*16) + (j+1)] = survivor1[k];
+        }
+        for (j = 0; j < 16; j++) {
+          tmp1[j] = (decision1[j] & shift0[j]) | ((~decision1[j]) & shift1[j]);
+        }
+
+        for (j = 0, k = 0; j < 16; j += 2, k++) {
+          path1[(2*i*16) + j] = tmp0[k];
+          path1[(2*i*16) + (j+1)] = tmp1[k];
+        }
+        for (j = 0, k = 8; j < 16; j += 2, k++) {
+          path1[((2*i+1)*16) + j] = tmp0[k];
+          path1[((2*i+1)*16) + (j+1)] = tmp1[k];
+        }
+      }
+
+      metric0 = mm1;
+      path0 = pp1;
+      metric1 = mm0;
+      path1 = pp0;
+
+      for (j = 0; j < 16; j++) {
+        sym0v[j] = symbols[2];
+        sym1v[j] = symbols[3];
+      }
+
+      for (i = 0; i < 2; i++) {
+        if (symbols[2] == 2) {
+          for (j = 0; j < 16; j++) {
+            metsvm[j] = Branchtab27_generic[1].c[(i*16) + j] ^ sym1v[j];
+            metsv[j] = 1 - metsvm[j];
+          }
+        }
+        else if (symbols[3] == 2) {
+          for (j = 0; j < 16; j++) {
+            metsvm[j] = Branchtab27_generic[0].c[(i*16) + j] ^ sym0v[j];
+            metsv[j] = 1 - metsvm[j];
+          }
+        }
+        else {
+          for (j = 0; j < 16; j++) {
+            metsvm[j] = (Branchtab27_generic[0].c[(i*16) + j] ^ sym0v[j]) + 
(Branchtab27_generic[1].c[(i*16) + j] ^ sym1v[j]);
+            metsv[j] = 2 - metsvm[j];
+          }
+        }
+
+        for (j = 0; j < 16; j++) {
+          m0[j] = metric0[(i*16) + j] + metsv[j];
+          m1[j] = metric0[((i+2)*16) + j] + metsvm[j];
+          m2[j] = metric0[(i*16) + j] + metsvm[j];
+          m3[j] = metric0[((i+2)*16) + j] + metsv[j];
+        }
+
+        for (j = 0; j < 16; j++) {
+          decision0[j] = ((m0[j] - m1[j]) > 0) ? 0xff : 0x0;
+          decision1[j] = ((m2[j] - m3[j]) > 0) ? 0xff : 0x0;
+          survivor0[j] = (decision0[j] & m0[j]) | ((~decision0[j]) & m1[j]);
+          survivor1[j] = (decision1[j] & m2[j]) | ((~decision1[j]) & m3[j]);
+        }
+
+        for (j = 0; j < 16; j += 2) {
+          simd_epi16 = path0[(i*16) + j];
+          simd_epi16 |= path0[(i*16) + (j+1)] << 8;
+          simd_epi16 <<= 1;
+          shift0[j] = simd_epi16;
+          shift0[j+1] = simd_epi16 >> 8;
+
+          simd_epi16 = path0[((i+2)*16) + j];
+          simd_epi16 |= path0[((i+2)*16) + (j+1)] << 8;
+          simd_epi16 <<= 1;
+          shift1[j] = simd_epi16;
+          shift1[j+1] = simd_epi16 >> 8;
+        }
+        for (j = 0; j < 16; j++) {
+          shift1[j] = shift1[j] + 1;
+        }
+
+        for (j = 0, k = 0; j < 16; j += 2, k++) {
+          metric1[(2*i*16) + j] = survivor0[k];
+          metric1[(2*i*16) + (j+1)] = survivor1[k];
+        }
+        for (j = 0; j < 16; j++) {
+          tmp0[j] = (decision0[j] & shift0[j]) | ((~decision0[j]) & shift1[j]);
+        }
+
+        for (j = 0, k = 8; j < 16; j += 2, k++) {
+          metric1[((2*i+1)*16) + j] = survivor0[k];
+          metric1[((2*i+1)*16) + (j+1)] = survivor1[k];
+        }
+        for (j = 0; j < 16; j++) {
+          tmp1[j] = (decision1[j] & shift0[j]) | ((~decision1[j]) & shift1[j]);
+        }
+
+        for (j = 0, k = 0; j < 16; j += 2, k++) {
+          path1[(2*i*16) + j] = tmp0[k];
+          path1[(2*i*16) + (j+1)] = tmp1[k];
+        }
+        for (j = 0, k = 8; j < 16; j += 2, k++) {
+          path1[((2*i+1)*16) + j] = tmp0[k];
+          path1[((2*i+1)*16) + (j+1)] = tmp1[k];
+        }
+      }
+    }
+#endif
+
+#ifdef DTV_SSE2
     unsigned char
     dvbt_viterbi_decoder_impl::dvbt_viterbi_get_output_sse2(__m128i *mm0, 
__m128i *pp0, int ntraceback, unsigned char *outbuf)
     {
+#else
+    unsigned char
+    dvbt_viterbi_decoder_impl::dvbt_viterbi_get_output_generic(unsigned char 
*mm0, unsigned char *pp0, int ntraceback, unsigned char *outbuf)
+    {
+#endif
       //  Find current best path
       int i;
       int bestmetric, minmetric;
       int beststate = 0;
       int pos = 0;
+#ifndef DTV_SSE2
+      int j;
+#endif
 
       // Implement a circular buffer with the last ntraceback paths
       store_pos = (store_pos + 1) % ntraceback;
 
+#ifdef DTV_SSE2
       // TODO - find another way to extract the value
       for (i = 0; i < 4; i++) {
         _mm_store_si128((__m128i *) &mmresult[i*16], mm0[i]);
         _mm_store_si128((__m128i *) &ppresult[store_pos][i*16], pp0[i]);
       }
+#else
+      for (i = 0; i < 4; i++) {
+        for (j = 0; j < 16; j++) {
+          mmresult[(i*16) + j] = mm0[(i*16) + j];
+          ppresult[store_pos][(i*16) + j] = pp0[(i*16) + j];
+        }
+      }
+#endif
 
       // Find out the best final state
       bestmetric = mmresult[beststate];
@@ -258,12 +500,21 @@ namespace gr {
       // Store output byte
       *outbuf = ppresult[pos][beststate];
 
+#ifdef DTV_SSE2
       // Zero out the path variable
       // and prevent metric overflow
       for (i = 0; i < 4; i++) {
         pp0[i] = _mm_setzero_si128();
         mm0[i] = _mm_sub_epi8(mm0[i], _mm_set1_epi8(minmetric));
       }
+#else
+      for (i = 0; i < 4; i++) {
+        for (j = 0; j < 16; j++) {
+          pp0[(i*16) + j] = 0;
+          mm0[(i*16) + j] = mm0[(i*16) + j] - minmetric;
+        }
+      }
+#endif
 
       return bestmetric;
     }
@@ -357,7 +608,12 @@ namespace gr {
       mettab[1][0] = 0;
       mettab[1][1] = 1;
 
+#ifdef DTV_SSE2
       dvbt_viterbi_chunks_init_sse2(d_metric0, d_path0);
+#else
+      dvbt_viterbi_chunks_init_generic(d_metric0_generic, d_path0_generic);
+#endif
+
     }
 
     /*
@@ -404,7 +660,12 @@ namespace gr {
 
         if (tags.size()) {
           d_init = 0;
+
+#ifdef DTV_SSE2
           dvbt_viterbi_chunks_init_sse2(d_metric0, d_path0);
+#else
+          dvbt_viterbi_chunks_init_generic(d_metric0_generic, d_path0_generic);
+#endif
 
           if (tags[0].offset - nread) {
             consume_each(tags[0].offset - nread);
@@ -443,12 +704,21 @@ namespace gr {
            */
           for (int in_count = 0; in_count < d_nbits; in_count++) {
             if ((in_count % 4) == 0) { // 0 or 3
+
+#ifdef DTV_SSE2
               dvbt_viterbi_butterfly2_sse2(&d_inbits[in_count & 0xfffffffc], 
d_metric0, d_metric1, d_path0, d_path1);
+#else
+              dvbt_viterbi_butterfly2_generic(&d_inbits[in_count & 
0xfffffffc], d_metric0_generic, d_metric1_generic, d_path0_generic, 
d_path1_generic);
+#endif
 
               if ((in_count > 0) && (in_count % 16) == 8) { // 8 or 11
                 unsigned char c;
 
+#ifdef DTV_SSE2
                 dvbt_viterbi_get_output_sse2(d_metric0, d_path0, d_ntraceback, 
&c);
+#else
+                dvbt_viterbi_get_output_generic(d_metric0_generic, 
d_path0_generic, d_ntraceback, &c);
+#endif
 
                 if (d_init == 0) {
                   if (out_count >= d_ntraceback) {
diff --git a/gr-dtv/lib/dvbt/dvbt_viterbi_decoder_impl.h 
b/gr-dtv/lib/dvbt/dvbt_viterbi_decoder_impl.h
index 8870a79..22b040c 100644
--- a/gr-dtv/lib/dvbt/dvbt_viterbi_decoder_impl.h
+++ b/gr-dtv/lib/dvbt/dvbt_viterbi_decoder_impl.h
@@ -23,7 +23,10 @@
 
 #include <gnuradio/dtv/dvbt_viterbi_decoder.h>
 #include "dvbt_configure.h"
+
+#ifdef DTV_SSE2
 #include <xmmintrin.h>
+#endif
 
 /* The two generator polynomials for the NASA Standard K=7 code.
  * Since these polynomials are known to be optimal for this constraint
@@ -35,10 +38,16 @@
 // Maximum number of traceback bytes
 #define TRACEBACK_MAX 24
 
+#ifdef DTV_SSE2
 union branchtab27 {
   unsigned char c[32];
   __m128i v[2];
 };
+#else
+struct branchtab27 {
+  unsigned char c[32];
+};
+#endif
 
 namespace gr {
   namespace dtv {
@@ -56,12 +65,23 @@ namespace gr {
       static const unsigned char d_puncture_7_8[];
       static const unsigned char d_Partab[];
 
+#ifdef DTV_SSE2
       static __m128i d_metric0[4];
       static __m128i d_metric1[4];
       static __m128i d_path0[4];
       static __m128i d_path1[4];
-
+#else
+      static unsigned char d_metric0_generic[64];
+      static unsigned char d_metric1_generic[64];
+      static unsigned char d_path0_generic[64];
+      static unsigned char d_path1_generic[64];
+#endif
+
+#ifdef DTV_SSE2
       static branchtab27 Branchtab27_sse2[2];
+#else
+      static branchtab27 Branchtab27_generic[2];
+#endif
 
       // Metrics for each state
       static unsigned char mmresult[64];
@@ -101,9 +121,15 @@ namespace gr {
       // Position in circular buffer where the current decoded byte is stored
       int store_pos;
 
+#ifdef DTV_SSE2
       void dvbt_viterbi_chunks_init_sse2(__m128i *mm0, __m128i *pp0);
       void dvbt_viterbi_butterfly2_sse2(unsigned char *symbols, __m128i m0[], 
__m128i m1[], __m128i p0[], __m128i p1[]);
       unsigned char dvbt_viterbi_get_output_sse2(__m128i *mm0, __m128i *pp0, 
int ntraceback, unsigned char *outbuf);
+#else
+      void dvbt_viterbi_chunks_init_generic(unsigned char *mm0, unsigned char 
*pp0);
+      void dvbt_viterbi_butterfly2_generic(unsigned char *symbols, unsigned 
char m0[], unsigned char m1[], unsigned char p0[], unsigned char p1[]);
+      unsigned char dvbt_viterbi_get_output_generic(unsigned char *mm0, 
unsigned char *pp0, int ntraceback, unsigned char *outbuf);
+#endif
 
      public:
       dvbt_viterbi_decoder_impl(dvb_constellation_t constellation, \



reply via email to

[Prev in Thread] Current Thread [Next in Thread]