commit-gnuradio
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commit-gnuradio] [gnuradio] 01/05: remove volk to prepare for submodule


From: git
Subject: [Commit-gnuradio] [gnuradio] 01/05: remove volk to prepare for submodule
Date: Mon, 30 Mar 2015 05:25:09 +0000 (UTC)

This is an automated email from the git hooks/post-receive script.

jcorgan pushed a commit to branch master
in repository gnuradio.

commit 33b086993ece0d9b5941140d8367af37ce737848
Author: Johnathan Corgan <address@hidden>
Date:   Sun Mar 29 13:05:43 2015 -0700

    remove volk to prepare for submodule
    
    Conflicts:
        volk/cmake/GrPython.cmake
---
 volk/CMakeLists.txt                                |  183 ----
 volk/apps/CMakeLists.txt                           |   61 --
 volk/apps/volk-config-info.cc                      |   96 --
 volk/apps/volk_profile.cc                          |  280 -----
 volk/cmake/CMakeParseArgumentsCopy.cmake           |  138 ---
 volk/cmake/FindORC.cmake                           |   36 -
 volk/cmake/GrPython.cmake                          |  234 ----
 volk/cmake/VolkBoost.cmake                         |   98 --
 volk/cmake/VolkConfig.cmake                        |   26 -
 volk/cmake/msvc/config.h                           |   58 -
 volk/cmake/msvc/inttypes.h                         |  301 ------
 volk/cmake/msvc/stdbool.h                          |   45 -
 volk/cmake/msvc/stdint.h                           |  251 -----
 volk/gen/archs.xml                                 |  204 ----
 volk/gen/machines.xml                              |   55 -
 volk/gen/volk_arch_defs.py                         |   85 --
 volk/gen/volk_compile_utils.py                     |   58 -
 volk/gen/volk_kernel_defs.py                       |  209 ----
 volk/gen/volk_machine_defs.py                      |   74 --
 volk/gen/volk_tmpl_utils.py                        |   74 --
 volk/include/volk/constants.h                      |   39 -
 volk/include/volk/volk_common.h                    |   96 --
 volk/include/volk/volk_complex.h                   |   86 --
 volk/include/volk/volk_malloc.h                    |   66 --
 volk/include/volk/volk_prefs.h                     |   28 -
 volk/kernels/README.txt                            |   67 --
 .../asm/neon/volk_16i_max_star_horizontal_16i.s    |   52 -
 .../asm/neon/volk_32f_s32f_multiply_32f_neonasm.s  |   52 -
 .../volk/asm/neon/volk_32f_x2_add_32f_a_neonasm.s  |   54 -
 .../asm/neon/volk_32f_x2_add_32f_a_neonpipeline.s  |   68 --
 .../asm/neon/volk_32f_x2_dot_prod_32f_neonasm.s    |   58 -
 .../neon/volk_32f_x2_dot_prod_32f_neonasm_opts.s   |  116 --
 .../neon/volk_32fc_32f_dot_prod_32fc_a_neonasm.s   |   79 --
 ...volk_32fc_32f_dot_prod_32fc_a_neonasmpipeline.s |   86 --
 .../volk_32fc_32f_dot_prod_32fc_a_neonasmvmla.s    |   74 --
 .../neon/volk_32fc_32f_dot_prod_32fc_unrollasm.s   |  146 ---
 .../asm/neon/volk_32fc_x2_dot_prod_32fc_neonasm.s  |   98 --
 .../volk_32fc_x2_dot_prod_32fc_neonasm_opttests.s  |   96 --
 .../asm/neon/volk_32fc_x2_multiply_32fc_neonasm.s  |   45 -
 ...olk_arm_32fc_32f_dot_prod_32fc_a_neonpipeline.s |   92 --
 volk/kernels/volk/volk_16i_32fc_dot_prod_32fc.h    |  274 -----
 volk/kernels/volk/volk_16i_branch_4_state_8.h      |  216 ----
 volk/kernels/volk/volk_16i_convert_8i.h            |  199 ----
 volk/kernels/volk/volk_16i_max_star_16i.h          |  170 ---
 .../volk/volk_16i_max_star_horizontal_16i.h        |  190 ----
 .../kernels/volk/volk_16i_permute_and_scalar_add.h |  164 ---
 volk/kernels/volk/volk_16i_s32f_convert_32f.h      |  423 --------
 volk/kernels/volk/volk_16i_x4_quad_max_star_16i.h  |  274 -----
 volk/kernels/volk/volk_16i_x5_add_quad_16i_x4.h    |  205 ----
 volk/kernels/volk/volk_16ic_deinterleave_16i_x2.h  |  180 ----
 .../kernels/volk/volk_16ic_deinterleave_real_16i.h |  142 ---
 volk/kernels/volk/volk_16ic_deinterleave_real_8i.h |  116 --
 volk/kernels/volk/volk_16ic_magnitude_16i.h        |  213 ----
 .../volk/volk_16ic_s32f_deinterleave_32f_x2.h      |  131 ---
 .../volk/volk_16ic_s32f_deinterleave_real_32f.h    |  148 ---
 volk/kernels/volk/volk_16ic_s32f_magnitude_32f.h   |  202 ----
 volk/kernels/volk/volk_16u_byteswap.h              |  244 -----
 volk/kernels/volk/volk_16u_byteswappuppet_16u.h    |   55 -
 volk/kernels/volk/volk_32f_accumulator_s32f.h      |   90 --
 volk/kernels/volk/volk_32f_acos_32f.h              |  194 ----
 volk/kernels/volk/volk_32f_asin_32f.h              |  189 ----
 volk/kernels/volk/volk_32f_atan_32f.h              |  183 ----
 volk/kernels/volk/volk_32f_binary_slicer_32i.h     |  259 -----
 volk/kernels/volk/volk_32f_binary_slicer_8i.h      |  289 -----
 volk/kernels/volk/volk_32f_convert_64f.h           |  234 ----
 volk/kernels/volk/volk_32f_cos_32f.h               |  211 ----
 volk/kernels/volk/volk_32f_expfast_32f.h           |  216 ----
 volk/kernels/volk/volk_32f_index_max_16u.h         |  171 ---
 volk/kernels/volk/volk_32f_invsqrt_32f.h           |  196 ----
 volk/kernels/volk/volk_32f_log2_32f.h              |  332 ------
 .../kernels/volk/volk_32f_s32f_32f_fm_detect_32f.h |  142 ---
 .../volk_32f_s32f_calc_spectral_noise_floor_32f.h  |  190 ----
 volk/kernels/volk/volk_32f_s32f_convert_16i.h      |  438 --------
 volk/kernels/volk/volk_32f_s32f_convert_32i.h      |  353 -------
 volk/kernels/volk/volk_32f_s32f_convert_8i.h       |  334 ------
 volk/kernels/volk/volk_32f_s32f_multiply_32f.h     |  270 -----
 volk/kernels/volk/volk_32f_s32f_normalize.h        |  103 --
 volk/kernels/volk/volk_32f_s32f_power_32f.h        |  166 ---
 volk/kernels/volk/volk_32f_s32f_stddev_32f.h       |  167 ---
 volk/kernels/volk/volk_32f_sin_32f.h               |  211 ----
 volk/kernels/volk/volk_32f_sqrt_32f.h              |  130 ---
 .../kernels/volk/volk_32f_stddev_and_mean_32f_x2.h |  192 ----
 volk/kernels/volk/volk_32f_tan_32f.h               |  220 ----
 volk/kernels/volk/volk_32f_tanh_32f.h              |  318 ------
 volk/kernels/volk/volk_32f_x2_add_32f.h            |  212 ----
 volk/kernels/volk/volk_32f_x2_divide_32f.h         |  104 --
 volk/kernels/volk/volk_32f_x2_dot_prod_16i.h       |  187 ----
 volk/kernels/volk/volk_32f_x2_dot_prod_32f.h       |  692 ------------
 volk/kernels/volk/volk_32f_x2_interleave_32fc.h    |  130 ---
 volk/kernels/volk/volk_32f_x2_max_32f.h            |  143 ---
 volk/kernels/volk/volk_32f_x2_min_32f.h            |  145 ---
 volk/kernels/volk/volk_32f_x2_multiply_32f.h       |  277 -----
 volk/kernels/volk/volk_32f_x2_pow_32f.h            |  298 ------
 .../volk/volk_32f_x2_s32f_interleave_16ic.h        |  178 ----
 volk/kernels/volk/volk_32f_x2_subtract_32f.h       |  139 ---
 volk/kernels/volk/volk_32f_x3_sum_of_poly_32f.h    |  452 --------
 volk/kernels/volk/volk_32fc_32f_dot_prod_32fc.h    |  539 ----------
 volk/kernels/volk/volk_32fc_32f_multiply_32fc.h    |  213 ----
 volk/kernels/volk/volk_32fc_conjugate_32fc.h       |  261 -----
 volk/kernels/volk/volk_32fc_deinterleave_32f_x2.h  |  179 ----
 volk/kernels/volk/volk_32fc_deinterleave_64f_x2.h  |  280 -----
 .../kernels/volk/volk_32fc_deinterleave_imag_32f.h |  161 ---
 .../kernels/volk/volk_32fc_deinterleave_real_32f.h |  116 --
 .../kernels/volk/volk_32fc_deinterleave_real_64f.h |   88 --
 volk/kernels/volk/volk_32fc_index_max_16u.h        |  240 -----
 volk/kernels/volk/volk_32fc_magnitude_32f.h        |  472 ---------
 .../kernels/volk/volk_32fc_magnitude_squared_32f.h |  380 -------
 volk/kernels/volk/volk_32fc_s32f_atan2_32f.h       |  180 ----
 .../volk/volk_32fc_s32f_deinterleave_real_16i.h    |  103 --
 volk/kernels/volk/volk_32fc_s32f_magnitude_16i.h   |  181 ----
 volk/kernels/volk/volk_32fc_s32f_power_32fc.h      |  133 ---
 .../volk/volk_32fc_s32f_power_spectrum_32f.h       |  148 ---
 .../volk_32fc_s32f_x2_power_spectral_density_32f.h |  248 -----
 volk/kernels/volk/volk_32fc_s32fc_multiply_32fc.h  |  325 ------
 .../volk/volk_32fc_s32fc_rotatorpuppet_32fc.h      |  103 --
 .../kernels/volk/volk_32fc_s32fc_x2_rotator_32fc.h |  484 ---------
 .../volk/volk_32fc_x2_conjugate_dot_prod_32fc.h    |  569 ----------
 volk/kernels/volk/volk_32fc_x2_dot_prod_32fc.h     | 1116 --------------------
 volk/kernels/volk/volk_32fc_x2_multiply_32fc.h     |  401 -------
 .../volk/volk_32fc_x2_multiply_conjugate_32fc.h    |  343 ------
 ...volk_32fc_x2_s32f_square_dist_scalar_mult_32f.h |  152 ---
 volk/kernels/volk/volk_32fc_x2_square_dist_32f.h   |  168 ---
 volk/kernels/volk/volk_32i_s32f_convert_32f.h      |  170 ---
 volk/kernels/volk/volk_32i_x2_and_32i.h            |  137 ---
 volk/kernels/volk/volk_32i_x2_or_32i.h             |  137 ---
 volk/kernels/volk/volk_32u_byteswap.h              |  227 ----
 volk/kernels/volk/volk_32u_byteswappuppet_32u.h    |   45 -
 volk/kernels/volk/volk_32u_popcnt.h                |   58 -
 volk/kernels/volk/volk_32u_popcntpuppet_32u.h      |   47 -
 volk/kernels/volk/volk_64f_convert_32f.h           |  156 ---
 volk/kernels/volk/volk_64f_x2_max_64f.h            |   93 --
 volk/kernels/volk/volk_64f_x2_min_64f.h            |   93 --
 volk/kernels/volk/volk_64u_byteswap.h              |  253 -----
 volk/kernels/volk/volk_64u_byteswappuppet_64u.h    |   46 -
 volk/kernels/volk/volk_64u_popcnt.h                |   94 --
 volk/kernels/volk/volk_64u_popcntpuppet_64u.h      |   58 -
 volk/kernels/volk/volk_8i_convert_16i.h            |  217 ----
 volk/kernels/volk/volk_8i_s32f_convert_32f.h       |  222 ----
 volk/kernels/volk/volk_8ic_deinterleave_16i_x2.h   |  166 ---
 volk/kernels/volk/volk_8ic_deinterleave_real_16i.h |  136 ---
 volk/kernels/volk/volk_8ic_deinterleave_real_8i.h  |  175 ---
 .../volk/volk_8ic_s32f_deinterleave_32f_x2.h       |  187 ----
 .../volk/volk_8ic_s32f_deinterleave_real_32f.h     |  156 ---
 .../volk/volk_8ic_x2_multiply_conjugate_16ic.h     |  123 ---
 .../volk_8ic_x2_s32f_multiply_conjugate_32fc.h     |  144 ---
 volk/kernels/volk/volk_8u_conv_k7_r2puppet_8u.h    |  264 -----
 volk/kernels/volk/volk_8u_x4_conv_k7_r2_8u.h       |  422 --------
 volk/lib/CMakeLists.txt                            |  566 ----------
 volk/lib/constants.c.in                            |   63 --
 volk/lib/gcc_x86_cpuid.h                           |  188 ----
 volk/lib/qa_16s_add_quad_aligned16.cc              |   89 --
 volk/lib/qa_16s_add_quad_aligned16.h               |   18 -
 volk/lib/qa_16s_branch_4_state_8_aligned16.cc      |  106 --
 volk/lib/qa_16s_branch_4_state_8_aligned16.h       |   18 -
 .../lib/qa_16s_permute_and_scalar_add_aligned16.cc |   78 --
 volk/lib/qa_16s_permute_and_scalar_add_aligned16.h |   18 -
 volk/lib/qa_16s_quad_max_star_aligned16.cc         |   60 --
 volk/lib/qa_16s_quad_max_star_aligned16.h          |   18 -
 volk/lib/qa_32f_fm_detect_aligned16.cc             |   61 --
 volk/lib/qa_32f_fm_detect_aligned16.h              |   18 -
 volk/lib/qa_32f_index_max_aligned16.cc             |  103 --
 volk/lib/qa_32f_index_max_aligned16.h              |   18 -
 volk/lib/qa_32fc_index_max_aligned16.cc            |   89 --
 volk/lib/qa_32fc_index_max_aligned16.h             |   18 -
 ...qa_32fc_power_spectral_density_32f_aligned16.cc |   64 --
 .../qa_32fc_power_spectral_density_32f_aligned16.h |   18 -
 volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc   |  138 ---
 volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h    |   18 -
 volk/lib/qa_32u_popcnt_aligned16.cc                |   62 --
 volk/lib/qa_32u_popcnt_aligned16.h                 |   18 -
 volk/lib/qa_64u_popcnt_aligned16.cc                |   62 --
 volk/lib/qa_64u_popcnt_aligned16.h                 |   18 -
 volk/lib/qa_utils.cc                               |  584 ----------
 volk/lib/qa_utils.h                                |   80 --
 volk/lib/testqa.cc                                 |  126 ---
 volk/lib/volk_malloc.c                             |  142 ---
 volk/lib/volk_prefs.c                              |   50 -
 volk/lib/volk_rank_archs.c                         |  119 ---
 volk/lib/volk_rank_archs.h                         |   50 -
 .../volk_16ic_deinterleave_16i_x2_a_orc_impl.orc   |    5 -
 .../volk_16ic_deinterleave_real_8i_a_orc_impl.orc  |    6 -
 volk/orc/volk_16ic_magnitude_16i_a_orc_impl.orc    |   23 -
 ...lk_16ic_s32f_deinterleave_32f_x2_a_orc_impl.orc |   12 -
 .../volk_16sc_magnitude_32f_aligned16_orc_impl.orc |   25 -
 volk/orc/volk_16u_byteswap_a_orc_impl.orc          |    3 -
 volk/orc/volk_32f_s32f_multiply_32f_a_orc_impl.orc |    5 -
 volk/orc/volk_32f_s32f_normalize_a_orc_impl.orc    |    5 -
 volk/orc/volk_32f_sqrt_32f_a_orc_impl.orc          |    4 -
 volk/orc/volk_32f_x2_add_32f_a_orc_impl.orc        |    5 -
 volk/orc/volk_32f_x2_divide_32f_a_orc_impl.orc     |    5 -
 volk/orc/volk_32f_x2_dot_prod_32f_a_orc_impl.orc   |    6 -
 volk/orc/volk_32f_x2_max_32f_a_orc_impl.orc        |    5 -
 volk/orc/volk_32f_x2_min_32f_a_orc_impl.orc        |    5 -
 volk/orc/volk_32f_x2_multiply_32f_a_orc_impl.orc   |    5 -
 volk/orc/volk_32f_x2_subtract_32f_a_orc_impl.orc   |    5 -
 .../orc/volk_32fc_32f_multiply_32fc_a_orc_impl.orc |    7 -
 volk/orc/volk_32fc_magnitude_32f_a_orc_impl.orc    |   13 -
 .../volk_32fc_s32f_magnitude_16i_a_orc_impl.orc    |   23 -
 .../volk_32fc_s32fc_multiply_32fc_a_orc_impl.orc   |   18 -
 volk/orc/volk_32fc_x2_multiply_32fc_a_orc_impl.orc |   18 -
 volk/orc/volk_32i_x2_and_32i_a_orc_impl.orc        |    5 -
 volk/orc/volk_32i_x2_or_32i_a_orc_impl.orc         |    5 -
 volk/orc/volk_8i_convert_16i_a_orc_impl.orc        |    6 -
 volk/orc/volk_8i_s32f_convert_32f_a_orc_impl.orc   |   11 -
 volk/python/volk_modtool/CMakeLists.txt            |   39 -
 volk/python/volk_modtool/README                    |  114 --
 volk/python/volk_modtool/__init__.py               |   24 -
 volk/python/volk_modtool/cfg.py                    |  104 --
 volk/python/volk_modtool/volk_modtool              |  128 ---
 volk/python/volk_modtool/volk_modtool_generate.py  |  330 ------
 volk/tmpl/volk.tmpl.c                              |  212 ----
 volk/tmpl/volk.tmpl.h                              |   94 --
 volk/tmpl/volk_config_fixed.tmpl.h                 |   29 -
 volk/tmpl/volk_cpu.tmpl.c                          |  191 ----
 volk/tmpl/volk_cpu.tmpl.h                          |   42 -
 volk/tmpl/volk_machine_xxx.tmpl.c                  |   79 --
 volk/tmpl/volk_machines.tmpl.c                     |   34 -
 volk/tmpl/volk_machines.tmpl.h                     |   55 -
 volk/tmpl/volk_typedefs.tmpl.h                     |   32 -
 volk/volk.pc.in                                    |   14 -
 220 files changed, 32603 deletions(-)

diff --git a/volk/CMakeLists.txt b/volk/CMakeLists.txt
deleted file mode 100644
index 2bf70bb..0000000
--- a/volk/CMakeLists.txt
+++ /dev/null
@@ -1,183 +0,0 @@
-#
-# Copyright 2011 Free Software Foundation, Inc.
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-
-########################################################################
-# Project setup
-########################################################################
-cmake_minimum_required(VERSION 2.6)
-if(NOT DEFINED CMAKE_BUILD_TYPE)
-    set(CMAKE_BUILD_TYPE Release)
-endif()
-set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "Choose build type: None 
Debug Release RelWithDebInfo MinSizeRel")
-project(volk)
-enable_language(CXX)
-enable_language(C)
-enable_testing()
-set(VERSION 0.1)
-set(LIBVER 0.0.0)
-
-set(CMAKE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) #allows this to be a 
sub-project
-set(CMAKE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) #allows this to be a 
sub-project
-set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) #location for custom 
"Modules"
-
-########################################################################
-# Environment setup
-########################################################################
-IF(NOT DEFINED BOOST_ROOT)
-    SET(BOOST_ROOT ${CMAKE_INSTALL_PREFIX})
-ENDIF()
-
-IF(NOT DEFINED CROSSCOMPILE_MULTILIB)
-    SET(CROSSCOMPILE_MULTILIB "")
-ENDIF()
-SET(CROSSCOMPILE_MULTILIB ${CROSSCOMPILE_MULTILIB} CACHE STRING "Define 
\"true\" if you have and want to use multiple C development libs installed for 
cross compile")
-
-
-########################################################################
-# Dependencies setup
-########################################################################
-include(GrPython) #sets PYTHON_EXECUTABLE and PYTHON_DASH_B
-VOLK_PYTHON_CHECK_MODULE("python >= 2.5" sys "sys.version.split()[0] >= '2.5'" 
PYTHON_MIN_VER_FOUND)
-VOLK_PYTHON_CHECK_MODULE("Cheetah >= 2.0.0" Cheetah "Cheetah.Version >= 
'2.0.0'" CHEETAH_FOUND)
-
-if(NOT PYTHON_MIN_VER_FOUND)
-    message(FATAL_ERROR "Python 2.5 or greater required to build VOLK")
-endif()
-
-if(NOT CHEETAH_FOUND)
-    message(FATAL_ERROR "Cheetah templates required to build VOLK")
-endif()
-
-if(MSVC)
-    if (NOT DEFINED BOOST_ALL_DYN_LINK)
-        set(BOOST_ALL_DYN_LINK TRUE)
-    endif()
-    set(BOOST_ALL_DYN_LINK "${BOOST_ALL_DYN_LINK}" CACHE BOOL "boost enable 
dynamic linking")
-    if(BOOST_ALL_DYN_LINK)
-        add_definitions(-DBOOST_ALL_DYN_LINK) #setup boost auto-linking in msvc
-    else(BOOST_ALL_DYN_LINK)
-        unset(BOOST_REQUIRED_COMPONENTS) #empty components list for static link
-    endif(BOOST_ALL_DYN_LINK)
-endif(MSVC)
-include(VolkBoost)
-
-if(NOT Boost_FOUND)
-    message(FATAL_ERROR "VOLK Requires boost to build")
-endif()
-
-option(ENABLE_ORC "Enable Orc" True)
-if(ENABLE_ORC)
-  find_package(ORC)
-else(ENABLE_ORC)
-  message(STATUS "Disabling use of ORC")
-endif(ENABLE_ORC)
-
-########################################################################
-# Setup the package config file
-########################################################################
-#set variables found in the pc.in file
-set(prefix ${CMAKE_INSTALL_PREFIX})
-set(exec_prefix "\${prefix}")
-set(libdir "\${exec_prefix}/lib${LIB_SUFFIX}")
-set(includedir "\${prefix}/include")
-
-configure_file(
-    ${CMAKE_CURRENT_SOURCE_DIR}/volk.pc.in
-    ${CMAKE_CURRENT_BINARY_DIR}/volk.pc
address@hidden)
-
-install(
-    FILES ${CMAKE_CURRENT_BINARY_DIR}/volk.pc
-    DESTINATION lib${LIB_SUFFIX}/pkgconfig
-    COMPONENT "volk_devel"
-)
-
-########################################################################
-# Install all headers in the include directories
-########################################################################
-set(VOLK_RUNTIME_DIR   bin)
-set(VOLK_LIBRARY_DIR   lib${LIB_SUFFIX})
-set(VOLK_INCLUDE_DIR   include)
-
-install(
-    DIRECTORY ${CMAKE_SOURCE_DIR}/kernels/volk
-    DESTINATION include COMPONENT "volk_devel"
-    FILES_MATCHING PATTERN "*.h"
-)
-
-install(FILES
-    ${CMAKE_SOURCE_DIR}/include/volk/volk_prefs.h
-    ${CMAKE_SOURCE_DIR}/include/volk/volk_complex.h
-    ${CMAKE_SOURCE_DIR}/include/volk/volk_common.h
-    ${CMAKE_BINARY_DIR}/include/volk/volk.h
-    ${CMAKE_BINARY_DIR}/include/volk/volk_cpu.h
-    ${CMAKE_BINARY_DIR}/include/volk/volk_config_fixed.h
-    ${CMAKE_BINARY_DIR}/include/volk/volk_typedefs.h
-    ${CMAKE_SOURCE_DIR}/include/volk/volk_malloc.h
-    DESTINATION include/volk
-    COMPONENT "volk_devel"
-)
-
-########################################################################
-# Install cmake search routine for external use
-########################################################################
-
-if(NOT CMAKE_MODULES_DIR)
-    set(CMAKE_MODULES_DIR lib${LIB_SUFFIX}/cmake)
-endif(NOT CMAKE_MODULES_DIR)
-
-install(
-    FILES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/VolkConfig.cmake
-    DESTINATION ${CMAKE_MODULES_DIR}/volk
-    COMPONENT "volk_devel"
-)
-
-########################################################################
-# On Apple only, set install name and use rpath correctly, if not already set
-########################################################################
-if(APPLE)
-    if(NOT CMAKE_INSTALL_NAME_DIR)
-        set(CMAKE_INSTALL_NAME_DIR
-            ${CMAKE_INSTALL_PREFIX}/${GR_LIBRARY_DIR} CACHE
-            PATH "Library Install Name Destination Directory" FORCE)
-    endif(NOT CMAKE_INSTALL_NAME_DIR)
-    if(NOT CMAKE_INSTALL_RPATH)
-        set(CMAKE_INSTALL_RPATH
-            ${CMAKE_INSTALL_PREFIX}/${GR_LIBRARY_DIR} CACHE
-            PATH "Library Install RPath" FORCE)
-    endif(NOT CMAKE_INSTALL_RPATH)
-    if(NOT CMAKE_BUILD_WITH_INSTALL_RPATH)
-        set(CMAKE_BUILD_WITH_INSTALL_RPATH ON CACHE
-            BOOL "Do Build Using Library Install RPath" FORCE)
-    endif(NOT CMAKE_BUILD_WITH_INSTALL_RPATH)
-endif(APPLE)
-
-########################################################################
-# Setup the library
-########################################################################
-add_subdirectory(lib)
-
-########################################################################
-# And the utility apps
-########################################################################
-add_subdirectory(apps)
-add_subdirectory(python/volk_modtool)
-
-########################################################################
-# Print summary
-########################################################################
-message(STATUS "Using install prefix: ${CMAKE_INSTALL_PREFIX}")
diff --git a/volk/apps/CMakeLists.txt b/volk/apps/CMakeLists.txt
deleted file mode 100644
index e3d25f0..0000000
--- a/volk/apps/CMakeLists.txt
+++ /dev/null
@@ -1,61 +0,0 @@
-#
-# Copyright 2011-2013 Free Software Foundation, Inc.
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-
-########################################################################
-# Setup profiler
-########################################################################
-if(Boost_FOUND)
-
-if(MSVC)
-    include_directories(${CMAKE_SOURCE_DIR}/cmake/msvc)
-endif(MSVC)
-
-include_directories(
-    ${CMAKE_CURRENT_SOURCE_DIR}
-    ${CMAKE_CURRENT_BINARY_DIR}
-    ${CMAKE_SOURCE_DIR}/include
-    ${CMAKE_BINARY_DIR}/include
-    ${CMAKE_SOURCE_DIR}/lib
-    ${CMAKE_BINARY_DIR}/lib
-    ${Boost_INCLUDE_DIRS}
-)
-
-# MAKE volk_profile
-add_executable(volk_profile
-    ${CMAKE_CURRENT_SOURCE_DIR}/volk_profile.cc
-    ${CMAKE_SOURCE_DIR}/lib/qa_utils.cc
-)
-
-target_link_libraries(volk_profile volk ${Boost_LIBRARIES})
-
-install(
-    TARGETS volk_profile
-    DESTINATION bin
-    COMPONENT "volk"
-)
-
-# MAKE volk-config-info
-add_executable(volk-config-info volk-config-info.cc)
-target_link_libraries(volk-config-info volk ${Boost_LIBRARIES})
-
-install(
-    TARGETS volk-config-info
-    DESTINATION bin
-    COMPONENT "volk"
-)
-
-endif(Boost_FOUND)
diff --git a/volk/apps/volk-config-info.cc b/volk/apps/volk-config-info.cc
deleted file mode 100644
index 11011d2..0000000
--- a/volk/apps/volk-config-info.cc
+++ /dev/null
@@ -1,96 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2013 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#if HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <volk/constants.h>
-#include "volk/volk.h"
-#include <boost/program_options.hpp>
-#include <iostream>
-
-namespace po = boost::program_options;
-
-int
-main(int argc, char **argv)
-{
-  po::options_description desc("Program options: volk-config-info [options]");
-  po::variables_map vm;
-
-  desc.add_options()
-    ("help,h", "print help message")
-    ("prefix", "print VOLK installation prefix")
-    ("builddate", "print VOLK build date (RFC2822 format)")
-    ("cc", "print VOLK C compiler version")
-    ("cflags", "print VOLK CFLAGS")
-    ("all-machines", "print VOLK machines built into library")
-    ("avail-machines", "print VOLK machines the current platform can use")
-    ("machine", "print the VOLK machine that will be used")
-    ("version,v", "print VOLK version")
-    ;
-
-  try {
-    po::store(po::parse_command_line(argc, argv, desc), vm);
-    po::notify(vm);
-  }
-  catch (po::error& error){
-    std::cerr << "Error: " << error.what() << std::endl << std::endl;
-    std::cerr << desc << std::endl;
-    return 1;
-  }
-
-  if(vm.size() == 0 || vm.count("help")) {
-    std::cout << desc << std::endl;
-    return 1;
-  }
-
-  if(vm.count("prefix"))
-    std::cout << volk_prefix() << std::endl;
-
-  if(vm.count("builddate"))
-    std::cout << volk_build_date() << std::endl;
-
-  if(vm.count("version"))
-    std::cout << volk_version() << std::endl;
-
-  if(vm.count("cc"))
-    std::cout << volk_c_compiler() << std::endl;
-
-  if(vm.count("cflags"))
-    std::cout << volk_compiler_flags() << std::endl;
-
-  // stick an extra ';' to make output of this and avail-machines the
-  // same structure for easier parsing
-  if(vm.count("all-machines"))
-    std::cout << volk_available_machines() << ";" << std::endl;
-
-  if(vm.count("avail-machines")) {
-    volk_list_machines();
-  }
-
-  if(vm.count("machine")) {
-    std::cout << volk_get_machine() << std::endl;
-  }
-
-  return 0;
-}
diff --git a/volk/apps/volk_profile.cc b/volk/apps/volk_profile.cc
deleted file mode 100644
index 9bc1842..0000000
--- a/volk/apps/volk_profile.cc
+++ /dev/null
@@ -1,280 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2012-2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#include "qa_utils.h"
-
-#include <volk/volk.h>
-#include <volk/volk_prefs.h>
-
-#include <ciso646>
-#include <vector>
-#include <boost/foreach.hpp>
-#include <boost/filesystem.hpp>
-#include <boost/program_options.hpp>
-#include <iostream>
-#include <fstream>
-#include <sys/stat.h>
-#include <sys/types.h>
-
-namespace fs = boost::filesystem;
-
-void write_json(std::ofstream &json_file, std::vector<volk_test_results_t> 
results) {
-    json_file << "{" << std::endl;
-    json_file << " \"volk_tests\": [" << std::endl;
-    size_t len = results.size();
-    size_t i = 0;
-    BOOST_FOREACH(volk_test_results_t &result, results) {
-        json_file << "  {" << std::endl;
-        json_file << "   \"name\": \"" << result.name << "\"," << std::endl;
-        json_file << "   \"vlen\": " << result.vlen << "," << std::endl;
-        json_file << "   \"iter\": " << result.iter << "," << std::endl;
-        json_file << "   \"best_arch_a\": \"" << result.best_arch_a
-            << "\"," << std::endl;
-        json_file << "   \"best_arch_u\": \"" << result.best_arch_u
-            << "\"," << std::endl;
-        json_file << "   \"results\": {" << std::endl;
-        size_t results_len = result.results.size();
-        size_t ri = 0;
-        typedef std::pair<std::string, volk_test_time_t> tpair;
-        BOOST_FOREACH(tpair pair, result.results) {
-            volk_test_time_t time = pair.second;
-            json_file << "    \"" << time.name << "\": {" << std::endl;
-            json_file << "     \"name\": \"" << time.name << "\"," << 
std::endl;
-            json_file << "     \"time\": " << time.time << "," << std::endl;
-            json_file << "     \"units\": \"" << time.units << "\"" << 
std::endl;
-            json_file << "    }" ;
-            if(ri+1 != results_len) {
-                json_file << ",";
-            }
-            json_file << std::endl;
-            ri++;
-        }
-        json_file << "   }" << std::endl;
-        json_file << "  }";
-        if(i+1 != len) {
-            json_file << ",";
-        }
-        json_file << std::endl;
-        i++;
-    }
-    json_file << " ]" << std::endl;
-    json_file << "}" << std::endl;
-}
-
-int main(int argc, char *argv[]) {
-    // Adding program options
-    boost::program_options::options_description desc("Options");
-    desc.add_options()
-      ("help,h", "Print help messages")
-      ("benchmark,b",
-            boost::program_options::value<bool>()->default_value( false )
-                                                ->implicit_value( true ),
-            "Run all kernels (benchmark mode)")
-      ("tests-regex,R",
-            boost::program_options::value<std::string>(),
-            "Run tests matching regular expression.")
-      ("json,j",
-            boost::program_options::value<std::string>(),
-            "JSON output file")
-      ;
-
-    // Handle the options that were given
-    boost::program_options::variables_map vm;
-    bool benchmark_mode;
-    std::string kernel_regex;
-    bool store_results = true;
-    std::ofstream json_file;
-
-    try {
-        
boost::program_options::store(boost::program_options::parse_command_line(argc, 
argv, desc), vm);
-        boost::program_options::notify(vm);
-        benchmark_mode = 
vm.count("benchmark")?vm["benchmark"].as<bool>():false;
-        if ( vm.count("tests-regex" ) ) {
-            kernel_regex = vm["tests-regex"].as<std::string>();
-            store_results = false;
-            std::cout << "Warning: using a regexp will not save results to a 
config" << std::endl;
-        }
-        else {
-            kernel_regex = ".*";
-            store_results = true;
-        }
-    } catch (boost::program_options::error& error) {
-        std::cerr << "Error: " << error.what() << std::endl << std::endl;
-        std::cerr << desc << std::endl;
-        return 1;
-    }
-    /** --help option
-*/
-    if ( vm.count("help") )
-    {
-      std::cout << "The VOLK profiler." << std::endl
-                << desc << std::endl;
-      return 0;
-    }
-
-    if ( vm.count("json") )
-    {
-        json_file.open( vm["json"].as<std::string>().c_str() );
-    }
-
-
-    // Run tests
-    std::vector<volk_test_results_t> results;
-
-    //VOLK_PROFILE(volk_16i_x5_add_quad_16i_x4, 1e-4, 2046, 10000, &results, 
benchmark_mode, kernel_regex);
-    //VOLK_PROFILE(volk_16i_branch_4_state_8, 1e-4, 2046, 10000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PUPPET_PROFILE(volk_8u_conv_k7_r2puppet_8u, volk_8u_x4_conv_k7_r2_8u, 
0, 0, 2060, 10000, &results, benchmark_mode, kernel_regex);
-    VOLK_PUPPET_PROFILE(volk_32fc_s32fc_rotatorpuppet_32fc, 
volk_32fc_s32fc_x2_rotator_32fc, 1e-2, (lv_32fc_t)lv_cmake(0.953939201, 0.3), 
20462, 10000, &results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_16ic_s32f_deinterleave_real_32f, 1e-5, 32768.0, 204602, 
10000, &results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_16ic_deinterleave_real_8i, 0, 0, 204602, 10000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_16ic_deinterleave_16i_x2, 0, 0, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_16ic_s32f_deinterleave_32f_x2, 1e-4, 32768.0, 204602, 
1000, &results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_16ic_deinterleave_real_16i, 0, 0, 204602, 10000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_16ic_magnitude_16i, 1, 0, 204602, 100, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_16ic_s32f_magnitude_32f, 1e-5, 32768.0, 204602, 1000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_16i_s32f_convert_32f, 1e-4, 32768.0, 204602, 10000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_16i_convert_8i, 0, 0, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
-    //VOLK_PROFILE(volk_16i_max_star_16i, 0, 0, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
-    //VOLK_PROFILE(volk_16i_max_star_horizontal_16i, 0, 0, 204602, 10000, 
&results, benchmark_mode, kernel_regex);
-    //VOLK_PROFILE(volk_16i_permute_and_scalar_add, 1e-4, 0, 2046, 10000, 
&results, benchmark_mode, kernel_regex);
-    //VOLK_PROFILE(volk_16i_x4_quad_max_star_16i, 1e-4, 0, 2046, 10000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PUPPET_PROFILE(volk_16u_byteswappuppet_16u, volk_16u_byteswap, 0, 0, 
204602, 10000, &results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_16i_32fc_dot_prod_32fc, 1e-4, 0, 204602, 10000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_accumulator_s32f, 1e-4, 0, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_x2_add_32f, 1e-4, 0, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_32f_multiply_32fc, 1e-4, 0, 204602, 1000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_log2_32f, 1.5e-1, 0, 204602, 1000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_expfast_32f, 1e-1, 0, 204602, 1000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_x2_pow_32f, 1e-2, 0, 204602, 1000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_sin_32f, 1e-6, 0, 204602, 1000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_cos_32f, 1e-6, 0, 204602, 1000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_tan_32f, 1e-6, 0, 204602, 1000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_atan_32f, 1e-3, 0, 204602, 1000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_asin_32f, 1e-3, 0, 204602, 1000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_acos_32f, 1e-3, 0, 204602, 1000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_s32f_power_32fc, 1e-4, 0, 204602, 50, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_s32f_calc_spectral_noise_floor_32f, 1e-4, 20.0, 
204602, 1000, &results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_s32f_atan2_32f, 1e-4, 10.0, 204602, 100, &results, 
benchmark_mode, kernel_regex);
-    //VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc, 1e-4, 0, 2046, 10000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_x2_conjugate_dot_prod_32fc, 1e-4, 0, 204602, 100, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_deinterleave_32f_x2, 1e-4, 0, 204602, 1000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_deinterleave_64f_x2, 1e-4, 0, 204602, 1000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_s32f_deinterleave_real_16i, 0, 32768, 204602, 
10000, &results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_deinterleave_imag_32f, 1e-4, 0, 204602, 5000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_deinterleave_real_32f, 1e-4, 0, 204602, 5000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_deinterleave_real_64f, 1e-4, 0, 204602, 1000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_x2_dot_prod_32fc, 1e-4, 0, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_32f_dot_prod_32fc, 1e-4, 0, 204602, 10000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_index_max_16u, 3, 0, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_s32f_magnitude_16i, 1, 32768, 204602, 100, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_magnitude_32f, 1e-4, 0, 204602, 1000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_magnitude_squared_32f, 1e-4, 0, 204602, 1000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_x2_multiply_32fc, 1e-4, 0, 204602, 1000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_x2_multiply_conjugate_32fc, 1e-4, 0, 204602, 1000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_conjugate_32fc, 1e-4, 0, 204602, 1000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_s32f_convert_16i, 1, 32768, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_s32f_convert_32i, 1, 1<<31, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_convert_64f, 1e-4, 0, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_s32f_convert_8i, 1, 128, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
-    //VOLK_PROFILE(volk_32fc_s32f_x2_power_spectral_density_32f, 1e-4, 2046, 
10000, &results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_s32f_power_spectrum_32f, 1e-4, 0, 20462, 100, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_x2_square_dist_32f, 1e-4, 0, 204602, 10000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_x2_s32f_square_dist_scalar_mult_32f, 1e-4, 10, 
204602, 10000, &results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_x2_divide_32f, 1e-4, 0, 204602, 2000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_x2_dot_prod_32f, 1e-4, 0, 204602, 5000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_x2_dot_prod_16i, 1e-4, 0, 204602, 5000, &results, 
benchmark_mode, kernel_regex);
-    //VOLK_PROFILE(volk_32f_s32f_32f_fm_detect_32f, 1e-4, 2046, 10000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_index_max_16u, 3, 0, 204602, 5000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_x2_s32f_interleave_16ic, 1, 32768, 204602, 3000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_x2_interleave_32fc, 0, 0, 204602, 5000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_x2_max_32f, 1e-4, 0, 204602, 2000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_x2_min_32f, 1e-4, 0, 204602, 2000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_x2_multiply_32f, 1e-4, 0, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_s32f_normalize, 1e-4, 100, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_s32f_power_32f, 1e-4, 4, 204602, 100, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_sqrt_32f, 1e-4, 0, 204602, 100, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_s32f_stddev_32f, 1e-4, 100, 204602, 3000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_stddev_and_mean_32f_x2, 1e-4, 0, 204602, 3000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_x2_subtract_32f, 1e-4, 0, 204602, 5000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_x3_sum_of_poly_32f, 1e-2, 0, 204602, 5000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32i_x2_and_32i, 0, 0, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32i_s32f_convert_32f, 1e-4, 100, 204602, 10000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32i_x2_or_32i, 0, 0, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PUPPET_PROFILE(volk_32u_byteswappuppet_32u, volk_32u_byteswap, 0, 0, 
204602, 2000, &results, benchmark_mode, kernel_regex);
-    VOLK_PUPPET_PROFILE(volk_32u_popcntpuppet_32u, volk32u_popcnt_32u,  0, 0, 
2046, 10000, &results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_64f_convert_32f, 1e-4, 0, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_64f_x2_max_64f, 1e-4, 0, 204602, 1000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_64f_x2_min_64f, 1e-4, 0, 204602, 1000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PUPPET_PROFILE(volk_64u_byteswappuppet_64u, volk_64u_byteswap, 0, 0, 
204602, 1000, &results, benchmark_mode, kernel_regex);
-    VOLK_PUPPET_PROFILE(volk_64u_popcntpuppet_64u, volk_64u_popcnt, 0, 0, 
2046, 10000, &results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_8ic_deinterleave_16i_x2, 0, 0, 204602, 3000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_8ic_s32f_deinterleave_32f_x2, 1e-4, 100, 204602, 3000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_8ic_deinterleave_real_16i, 0, 256, 204602, 3000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_8ic_s32f_deinterleave_real_32f, 1e-4, 100, 204602, 3000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_8ic_deinterleave_real_8i, 0, 0, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_8ic_x2_multiply_conjugate_16ic, 0, 0, 204602, 400, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_8ic_x2_s32f_multiply_conjugate_32fc, 1e-4, 100, 204602, 
400, &results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_8i_convert_16i, 0, 0, 204602, 20000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_8i_s32f_convert_32f, 1e-4, 100, 204602, 2000, &results, 
benchmark_mode, kernel_regex);
-    //VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc, 1e-4, lv_32fc_t(1.0, 0.5), 
204602, 1000, &results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32fc_s32fc_multiply_32fc, 1e-4, 0, 204602, 1000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_s32f_multiply_32f, 1e-4, 1.0, 204602, 10000, 
&results, benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_binary_slicer_32i, 0, 1.0, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_binary_slicer_8i, 0, 1.0, 204602, 10000, &results, 
benchmark_mode, kernel_regex);
-    VOLK_PROFILE(volk_32f_tanh_32f, 1e-6, 0, 204602, 1000, &results, 
benchmark_mode, kernel_regex);
-
-    // Until we can update the config on a kernel by kernel basis
-    // do not overwrite volk_config when using a regex.
-    if(store_results) {
-        char path[1024];
-        volk_get_config_path(path);
-
-        const fs::path config_path(path);
-
-        if (not fs::exists(config_path.branch_path()))
-        {
-            std::cout << "Creating " << config_path.branch_path() << "..." << 
std::endl;
-            fs::create_directories(config_path.branch_path());
-        }
-
-        std::cout << "Writing " << config_path << "..." << std::endl;
-        std::ofstream config(config_path.string().c_str());
-        if(!config.is_open()) { //either we don't have write access or we 
don't have the dir yet
-            std::cout << "Error opening file " << config_path << std::endl;
-        }
-
-        config << "\
-#this file is generated by volk_profile.\n\
-#the function name is followed by the preferred architecture.\n\
-";
-
-        BOOST_FOREACH(volk_test_results_t result, results) {
-            config << result.config_name << " "
-                << result.best_arch_a << " "
-                << result.best_arch_u << std::endl;
-        }
-        config.close();
-    }
-    else {
-        std::cout << "Warning: config not generated" << std::endl;
-    }
-}
diff --git a/volk/cmake/CMakeParseArgumentsCopy.cmake 
b/volk/cmake/CMakeParseArgumentsCopy.cmake
deleted file mode 100644
index 7ce4c49..0000000
--- a/volk/cmake/CMakeParseArgumentsCopy.cmake
+++ /dev/null
@@ -1,138 +0,0 @@
-# CMAKE_PARSE_ARGUMENTS(<prefix> <options> <one_value_keywords> 
<multi_value_keywords> args...)
-#
-# CMAKE_PARSE_ARGUMENTS() is intended to be used in macros or functions for
-# parsing the arguments given to that macro or function.
-# It processes the arguments and defines a set of variables which hold the
-# values of the respective options.
-#
-# The <options> argument contains all options for the respective macro,
-# i.e. keywords which can be used when calling the macro without any value
-# following, like e.g. the OPTIONAL keyword of the install() command.
-#
-# The <one_value_keywords> argument contains all keywords for this macro
-# which are followed by one value, like e.g. DESTINATION keyword of the
-# install() command.
-#
-# The <multi_value_keywords> argument contains all keywords for this macro
-# which can be followed by more than one value, like e.g. the TARGETS or
-# FILES keywords of the install() command.
-#
-# When done, CMAKE_PARSE_ARGUMENTS() will have defined for each of the
-# keywords listed in <options>, <one_value_keywords> and
-# <multi_value_keywords> a variable composed of the given <prefix>
-# followed by "_" and the name of the respective keyword.
-# These variables will then hold the respective value from the argument list.
-# For the <options> keywords this will be TRUE or FALSE.
-#
-# All remaining arguments are collected in a variable
-# <prefix>_UNPARSED_ARGUMENTS, this can be checked afterwards to see whether
-# your macro was called with unrecognized parameters.
-#
-# As an example here a my_install() macro, which takes similar arguments as the
-# real install() command:
-#
-#   function(MY_INSTALL)
-#     set(options OPTIONAL FAST)
-#     set(oneValueArgs DESTINATION RENAME)
-#     set(multiValueArgs TARGETS CONFIGURATIONS)
-#     cmake_parse_arguments(MY_INSTALL "${options}" "${oneValueArgs}" 
"${multiValueArgs}" ${ARGN} )
-#     ...
-#
-# Assume my_install() has been called like this:
-#   my_install(TARGETS foo bar DESTINATION bin OPTIONAL blub)
-#
-# After the cmake_parse_arguments() call the macro will have set the following
-# variables:
-#   MY_INSTALL_OPTIONAL = TRUE
-#   MY_INSTALL_FAST = FALSE (this option was not used when calling my_install()
-#   MY_INSTALL_DESTINATION = "bin"
-#   MY_INSTALL_RENAME = "" (was not used)
-#   MY_INSTALL_TARGETS = "foo;bar"
-#   MY_INSTALL_CONFIGURATIONS = "" (was not used)
-#   MY_INSTALL_UNPARSED_ARGUMENTS = "blub" (no value expected after "OPTIONAL"
-#
-# You can the continue and process these variables.
-#
-# Keywords terminate lists of values, e.g. if directly after a 
one_value_keyword
-# another recognized keyword follows, this is interpreted as the beginning of
-# the new option.
-# E.g. my_install(TARGETS foo DESTINATION OPTIONAL) would result in
-# MY_INSTALL_DESTINATION set to "OPTIONAL", but MY_INSTALL_DESTINATION would
-# be empty and MY_INSTALL_OPTIONAL would be set to TRUE therefor.
-
-#=============================================================================
-# Copyright 2010 Alexander Neundorf <address@hidden>
-#
-# Distributed under the OSI-approved BSD License (the "License");
-# see accompanying file Copyright.txt for details.
-#
-# This software is distributed WITHOUT ANY WARRANTY; without even the
-# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-# See the License for more information.
-#=============================================================================
-# (To distribute this file outside of CMake, substitute the full
-#  License text for the above reference.)
-
-
-if(__CMAKE_PARSE_ARGUMENTS_INCLUDED)
-  return()
-endif()
-set(__CMAKE_PARSE_ARGUMENTS_INCLUDED TRUE)
-
-
-function(CMAKE_PARSE_ARGUMENTS prefix _optionNames _singleArgNames 
_multiArgNames)
-  # first set all result variables to empty/FALSE
-  foreach(arg_name ${_singleArgNames} ${_multiArgNames})
-    set(${prefix}_${arg_name})
-  endforeach(arg_name)
-
-  foreach(option ${_optionNames})
-    set(${prefix}_${option} FALSE)
-  endforeach(option)
-
-  set(${prefix}_UNPARSED_ARGUMENTS)
-
-  set(insideValues FALSE)
-  set(currentArgName)
-
-  # now iterate over all arguments and fill the result variables
-  foreach(currentArg ${ARGN})
-    list(FIND _optionNames "${currentArg}" optionIndex)  # ... then this marks 
the end of the arguments belonging to this keyword
-    list(FIND _singleArgNames "${currentArg}" singleArgIndex)  # ... then this 
marks the end of the arguments belonging to this keyword
-    list(FIND _multiArgNames "${currentArg}" multiArgIndex)  # ... then this 
marks the end of the arguments belonging to this keyword
-
-    if(${optionIndex} EQUAL -1  AND  ${singleArgIndex} EQUAL -1  AND  
${multiArgIndex} EQUAL -1)
-      if(insideValues)
-        if("${insideValues}" STREQUAL "SINGLE")
-          set(${prefix}_${currentArgName} ${currentArg})
-          set(insideValues FALSE)
-        elseif("${insideValues}" STREQUAL "MULTI")
-          list(APPEND ${prefix}_${currentArgName} ${currentArg})
-        endif()
-      else(insideValues)
-        list(APPEND ${prefix}_UNPARSED_ARGUMENTS ${currentArg})
-      endif(insideValues)
-    else()
-      if(NOT ${optionIndex} EQUAL -1)
-        set(${prefix}_${currentArg} TRUE)
-        set(insideValues FALSE)
-      elseif(NOT ${singleArgIndex} EQUAL -1)
-        set(currentArgName ${currentArg})
-        set(${prefix}_${currentArgName})
-        set(insideValues "SINGLE")
-      elseif(NOT ${multiArgIndex} EQUAL -1)
-        set(currentArgName ${currentArg})
-        set(${prefix}_${currentArgName})
-        set(insideValues "MULTI")
-      endif()
-    endif()
-
-  endforeach(currentArg)
-
-  # propagate the result variables to the caller:
-  foreach(arg_name ${_singleArgNames} ${_multiArgNames} ${_optionNames})
-    set(${prefix}_${arg_name}  ${${prefix}_${arg_name}} PARENT_SCOPE)
-  endforeach(arg_name)
-  set(${prefix}_UNPARSED_ARGUMENTS ${${prefix}_UNPARSED_ARGUMENTS} 
PARENT_SCOPE)
-
-endfunction(CMAKE_PARSE_ARGUMENTS _options _singleArgs _multiArgs)
diff --git a/volk/cmake/FindORC.cmake b/volk/cmake/FindORC.cmake
deleted file mode 100644
index f21513f..0000000
--- a/volk/cmake/FindORC.cmake
+++ /dev/null
@@ -1,36 +0,0 @@
-FIND_PACKAGE(PkgConfig)
-PKG_CHECK_MODULES(PC_ORC "orc-0.4 > 0.4.11")
-
-
-
-
-FIND_PROGRAM(ORCC_EXECUTABLE orcc
-             HINTS ${PC_ORC_TOOLSDIR}
-            PATHS ${ORC_ROOT}/bin ${CMAKE_INSTALL_PREFIX}/bin)
-
-FIND_PATH(ORC_INCLUDE_DIR NAMES orc/orc.h
-          HINTS ${PC_ORC_INCLUDEDIR}
-         PATHS ${ORC_ROOT}/include/orc-0.4 
${CMAKE_INSTALL_PREFIX}/include/orc-0.4)
-
-
-FIND_PATH(ORC_LIBRARY_DIR NAMES 
${CMAKE_SHARED_LIBRARY_PREFIX}orc-0.4${CMAKE_SHARED_LIBRARY_SUFFIX}
-          HINTS ${PC_ORC_LIBDIR}
-         PATHS ${ORC_ROOT}/lib${LIB_SUFFIX} 
${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX})
-
-FIND_LIBRARY(ORC_LIB orc-0.4
-             HINTS ${PC_ORC_LIBRARY_DIRS}
-            PATHS ${ORC_ROOT}/lib${LIB_SUFFIX} 
${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX})
-
-LIST(APPEND ORC_LIBRARY
-     ${ORC_LIB}
-)
-
-
-SET(ORC_INCLUDE_DIRS ${ORC_INCLUDE_DIR})
-SET(ORC_LIBRARIES ${ORC_LIBRARY})
-SET(ORC_LIBRARY_DIRS ${ORC_LIBRARY_DIR})
-
-INCLUDE(FindPackageHandleStandardArgs)
-FIND_PACKAGE_HANDLE_STANDARD_ARGS(ORC "orc files" ORC_LIBRARY ORC_INCLUDE_DIR 
ORCC_EXECUTABLE)
-
-mark_as_advanced(ORC_INCLUDE_DIR ORC_LIBRARY ORCC_EXECUTABLE)
diff --git a/volk/cmake/GrPython.cmake b/volk/cmake/GrPython.cmake
deleted file mode 100644
index c3e8176..0000000
--- a/volk/cmake/GrPython.cmake
+++ /dev/null
@@ -1,234 +0,0 @@
-# Copyright 2010-2011,2013 Free Software Foundation, Inc.
-#
-# This file is part of GNU Radio
-#
-# GNU Radio is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# GNU Radio is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GNU Radio; see the file COPYING.  If not, write to
-# the Free Software Foundation, Inc., 51 Franklin Street,
-# Boston, MA 02110-1301, USA.
-
-if(DEFINED __INCLUDED_VOLK_PYTHON_CMAKE)
-    return()
-endif()
-set(__INCLUDED_VOLK_PYTHON_CMAKE TRUE)
-
-########################################################################
-# Setup the python interpreter:
-# This allows the user to specify a specific interpreter,
-# or finds the interpreter via the built-in cmake module.
-########################################################################
-#this allows the user to override PYTHON_EXECUTABLE
-if(PYTHON_EXECUTABLE)
-
-    set(PYTHONINTERP_FOUND TRUE)
-
-#otherwise if not set, try to automatically find it
-else(PYTHON_EXECUTABLE)
-
-    #use the built-in find script
-    find_package(PythonInterp 2)
-
-    #and if that fails use the find program routine
-    if(NOT PYTHONINTERP_FOUND)
-        find_program(PYTHON_EXECUTABLE NAMES python python2 python2.7 
python2.6 python2.5)
-        if(PYTHON_EXECUTABLE)
-            set(PYTHONINTERP_FOUND TRUE)
-        endif(PYTHON_EXECUTABLE)
-    endif(NOT PYTHONINTERP_FOUND)
-
-endif(PYTHON_EXECUTABLE)
-
-#make the path to the executable appear in the cmake gui
-set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE} CACHE FILEPATH "python interpreter")
-
-#make sure we can use -B with python (introduced in 2.6)
-if(PYTHON_EXECUTABLE)
-    execute_process(
-        COMMAND ${PYTHON_EXECUTABLE} -B -c ""
-        OUTPUT_QUIET ERROR_QUIET
-        RESULT_VARIABLE PYTHON_HAS_DASH_B_RESULT
-    )
-    if(PYTHON_HAS_DASH_B_RESULT EQUAL 0)
-        set(PYTHON_DASH_B "-B")
-    endif()
-endif(PYTHON_EXECUTABLE)
-
-########################################################################
-# Check for the existence of a python module:
-# - desc a string description of the check
-# - mod the name of the module to import
-# - cmd an additional command to run
-# - have the result variable to set
-########################################################################
-macro(VOLK_PYTHON_CHECK_MODULE desc mod cmd have)
-    message(STATUS "")
-    message(STATUS "Python checking for ${desc}")
-    execute_process(
-        COMMAND ${PYTHON_EXECUTABLE} -c "
-#########################################
-try: import ${mod}
-except:
-    try: ${mod}
-    except: exit(-1)
-try: assert ${cmd}
-except: exit(-1)
-#########################################"
-        RESULT_VARIABLE ${have}
-    )
-    if(${have} EQUAL 0)
-        message(STATUS "Python checking for ${desc} - found")
-        set(${have} TRUE)
-    else(${have} EQUAL 0)
-        message(STATUS "Python checking for ${desc} - not found")
-        set(${have} FALSE)
-    endif(${have} EQUAL 0)
-endmacro(VOLK_PYTHON_CHECK_MODULE)
-
-########################################################################
-# Sets the python installation directory VOLK_PYTHON_DIR
-########################################################################
-execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "
-from distutils import sysconfig
-print sysconfig.get_python_lib(plat_specific=True, prefix='')
-" OUTPUT_VARIABLE VOLK_PYTHON_DIR OUTPUT_STRIP_TRAILING_WHITESPACE
-)
-file(TO_CMAKE_PATH ${VOLK_PYTHON_DIR} VOLK_PYTHON_DIR)
-
-########################################################################
-# Create an always-built target with a unique name
-# Usage: VOLK_UNIQUE_TARGET(<description> <dependencies list>)
-########################################################################
-function(VOLK_UNIQUE_TARGET desc)
-    file(RELATIVE_PATH reldir ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR})
-    execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import re, hashlib
-unique = hashlib.md5('${reldir}${ARGN}').hexdigest()[:5]
-print(re.sub('\\W', '_', '${desc} ${reldir} ' + unique))"
-    OUTPUT_VARIABLE _target OUTPUT_STRIP_TRAILING_WHITESPACE)
-    add_custom_target(${_target} ALL DEPENDS ${ARGN})
-endfunction(VOLK_UNIQUE_TARGET)
-
-########################################################################
-# Install python sources (also builds and installs byte-compiled python)
-########################################################################
-function(VOLK_PYTHON_INSTALL)
-    include(CMakeParseArgumentsCopy)
-    CMAKE_PARSE_ARGUMENTS(VOLK_PYTHON_INSTALL "" "DESTINATION;COMPONENT" 
"FILES;PROGRAMS" ${ARGN})
-
-    ####################################################################
-    if(VOLK_PYTHON_INSTALL_FILES)
-    ####################################################################
-        install(${ARGN}) #installs regular python files
-
-        #create a list of all generated files
-        unset(pysrcfiles)
-        unset(pycfiles)
-        unset(pyofiles)
-        foreach(pyfile ${VOLK_PYTHON_INSTALL_FILES})
-            get_filename_component(pyfile ${pyfile} ABSOLUTE)
-            list(APPEND pysrcfiles ${pyfile})
-
-            #determine if this file is in the source or binary directory
-            file(RELATIVE_PATH source_rel_path ${CMAKE_CURRENT_SOURCE_DIR} 
${pyfile})
-            string(LENGTH "${source_rel_path}" source_rel_path_len)
-            file(RELATIVE_PATH binary_rel_path ${CMAKE_CURRENT_BINARY_DIR} 
${pyfile})
-            string(LENGTH "${binary_rel_path}" binary_rel_path_len)
-
-            #and set the generated path appropriately
-            if(${source_rel_path_len} GREATER ${binary_rel_path_len})
-                set(pygenfile ${CMAKE_CURRENT_BINARY_DIR}/${binary_rel_path})
-            else()
-                set(pygenfile ${CMAKE_CURRENT_BINARY_DIR}/${source_rel_path})
-            endif()
-            list(APPEND pycfiles ${pygenfile}c)
-            list(APPEND pyofiles ${pygenfile}o)
-
-            #ensure generation path exists
-            get_filename_component(pygen_path ${pygenfile} PATH)
-            file(MAKE_DIRECTORY ${pygen_path})
-
-        endforeach(pyfile)
-
-        #the command to generate the pyc files
-        add_custom_command(
-            DEPENDS ${pysrcfiles} OUTPUT ${pycfiles}
-            COMMAND ${PYTHON_EXECUTABLE} 
${CMAKE_BINARY_DIR}/python_compile_helper.py ${pysrcfiles} ${pycfiles}
-        )
-
-        #the command to generate the pyo files
-        add_custom_command(
-            DEPENDS ${pysrcfiles} OUTPUT ${pyofiles}
-            COMMAND ${PYTHON_EXECUTABLE} -O 
${CMAKE_BINARY_DIR}/python_compile_helper.py ${pysrcfiles} ${pyofiles}
-        )
-
-        #create install rule and add generated files to target list
-        set(python_install_gen_targets ${pycfiles} ${pyofiles})
-        install(FILES ${python_install_gen_targets}
-            DESTINATION ${VOLK_PYTHON_INSTALL_DESTINATION}
-            COMPONENT ${VOLK_PYTHON_INSTALL_COMPONENT}
-        )
-
-
-    ####################################################################
-    elseif(VOLK_PYTHON_INSTALL_PROGRAMS)
-    ####################################################################
-        file(TO_NATIVE_PATH ${PYTHON_EXECUTABLE} pyexe_native)
-
-        if (CMAKE_CROSSCOMPILING)
-           set(pyexe_native "/usr/bin/env python")
-        endif()
-
-        foreach(pyfile ${VOLK_PYTHON_INSTALL_PROGRAMS})
-            get_filename_component(pyfile_name ${pyfile} NAME)
-            get_filename_component(pyfile ${pyfile} ABSOLUTE)
-            string(REPLACE "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" 
pyexefile "${pyfile}.exe")
-            list(APPEND python_install_gen_targets ${pyexefile})
-
-            get_filename_component(pyexefile_path ${pyexefile} PATH)
-            file(MAKE_DIRECTORY ${pyexefile_path})
-
-            add_custom_command(
-                OUTPUT ${pyexefile} DEPENDS ${pyfile}
-                COMMAND ${PYTHON_EXECUTABLE} -c
-                "import re; R=re.compile('^\#!.*$\\n',flags=re.MULTILINE); 
open('${pyexefile}','w').write('\#!${pyexe_native}\\n'+R.sub('',open('${pyfile}','r').read()))"
-                COMMENT "Shebangin ${pyfile_name}"
-                VERBATIM
-            )
-
-            #on windows, python files need an extension to execute
-            get_filename_component(pyfile_ext ${pyfile} EXT)
-            if(WIN32 AND NOT pyfile_ext)
-                set(pyfile_name "${pyfile_name}.py")
-            endif()
-
-            install(PROGRAMS ${pyexefile} RENAME ${pyfile_name}
-                DESTINATION ${VOLK_PYTHON_INSTALL_DESTINATION}
-                COMPONENT ${VOLK_PYTHON_INSTALL_COMPONENT}
-            )
-        endforeach(pyfile)
-
-    endif()
-
-    VOLK_UNIQUE_TARGET("pygen" ${python_install_gen_targets})
-
-endfunction(VOLK_PYTHON_INSTALL)
-
-########################################################################
-# Write the python helper script that generates byte code files
-########################################################################
-file(WRITE ${CMAKE_BINARY_DIR}/python_compile_helper.py "
-import sys, py_compile
-files = sys.argv[1:]
-srcs, gens = files[:len(files)/2], files[len(files)/2:]
-for src, gen in zip(srcs, gens):
-    py_compile.compile(file=src, cfile=gen, doraise=True)
-")
diff --git a/volk/cmake/VolkBoost.cmake b/volk/cmake/VolkBoost.cmake
deleted file mode 100644
index 9afa9f8..0000000
--- a/volk/cmake/VolkBoost.cmake
+++ /dev/null
@@ -1,98 +0,0 @@
-# Copyright 2010-2011 Free Software Foundation, Inc.
-#
-# This file is part of GNU Radio
-#
-# GNU Radio is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# GNU Radio is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GNU Radio; see the file COPYING.  If not, write to
-# the Free Software Foundation, Inc., 51 Franklin Street,
-# Boston, MA 02110-1301, USA.
-
-if(DEFINED __INCLUDED_VOLK_BOOST_CMAKE)
-    return()
-endif()
-set(__INCLUDED_VOLK_BOOST_CMAKE TRUE)
-
-########################################################################
-# Setup Boost and handle some system specific things
-########################################################################
-
-set(BOOST_REQUIRED_COMPONENTS
-    filesystem
-    system
-    unit_test_framework
-    program_options
-)
-
-if(UNIX AND NOT BOOST_ROOT AND EXISTS "/usr/lib64")
-    list(APPEND BOOST_LIBRARYDIR "/usr/lib64") #fedora 64-bit fix
-endif(UNIX AND NOT BOOST_ROOT AND EXISTS "/usr/lib64")
-
-if(MSVC)
-    set(BOOST_REQUIRED_COMPONENTS ${BOOST_REQUIRED_COMPONENTS} chrono)
-
-    if (NOT DEFINED BOOST_ALL_DYN_LINK)
-        set(BOOST_ALL_DYN_LINK TRUE)
-    endif()
-    set(BOOST_ALL_DYN_LINK "${BOOST_ALL_DYN_LINK}" CACHE BOOL "boost enable 
dynamic linking")
-    if(BOOST_ALL_DYN_LINK)
-        add_definitions(-DBOOST_ALL_DYN_LINK) #setup boost auto-linking in msvc
-    else(BOOST_ALL_DYN_LINK)
-        unset(BOOST_REQUIRED_COMPONENTS) #empty components list for static link
-    endif(BOOST_ALL_DYN_LINK)
-endif(MSVC)
-
-find_package(Boost "1.35" COMPONENTS ${BOOST_REQUIRED_COMPONENTS})
-
-# This does not allow us to disable specific versions. It is used
-# internally by cmake to know the formation newer versions. As newer
-# Boost version beyond what is shown here are produced, we must extend
-# this list. To disable Boost versions, see below.
-set(Boost_ADDITIONAL_VERSIONS
-    "1.35.0" "1.35" "1.36.0" "1.36" "1.37.0" "1.37" "1.38.0" "1.38" "1.39.0" 
"1.39"
-    "1.40.0" "1.40" "1.41.0" "1.41" "1.42.0" "1.42" "1.43.0" "1.43" "1.44.0" 
"1.44"
-    "1.45.0" "1.45" "1.46.0" "1.46" "1.47.0" "1.47" "1.48.0" "1.48" "1.49.0" 
"1.49"
-    "1.50.0" "1.50" "1.51.0" "1.51" "1.52.0" "1.52" "1.53.0" "1.53" "1.54.0" 
"1.54"
-    "1.55.0" "1.55" "1.56.0" "1.56" "1.57.0" "1.57" "1.58.0" "1.58" "1.59.0" 
"1.59"
-    "1.60.0" "1.60" "1.61.0" "1.61" "1.62.0" "1.62" "1.63.0" "1.63" "1.64.0" 
"1.64"
-    "1.65.0" "1.65" "1.66.0" "1.66" "1.67.0" "1.67" "1.68.0" "1.68" "1.69.0" 
"1.69"
-)
-
-# Boost 1.52 disabled, see https://svn.boost.org/trac/boost/ticket/7669
-# Similar problems with Boost 1.46 and 1.47.
-
-OPTION(ENABLE_BAD_BOOST "Enable known bad versions of Boost" OFF)
-if(ENABLE_BAD_BOOST)
-  MESSAGE(STATUS "Enabling use of known bad versions of Boost.")
-endif(ENABLE_BAD_BOOST)
-
-# For any unsuitable Boost version, add the version number below in
-# the following format: XXYYZZ
-# Where:
-#     XX is the major version ('10' for version 1)
-#     YY is the minor version number ('46' for 1.46)
-#     ZZ is the patcher version number (typically just '00')
-set(Boost_NOGO_VERSIONS
-  104600 104601 104700 105200
-  )
-
-foreach(ver ${Boost_NOGO_VERSIONS})
-  if("${Boost_VERSION}" STREQUAL "${ver}")
-    if(NOT ENABLE_BAD_BOOST)
-      MESSAGE(STATUS "WARNING: Found a known bad version of Boost 
(v${Boost_VERSION}). Disabling.")
-      set(Boost_FOUND FALSE)
-    else(NOT ENABLE_BAD_BOOST)
-      MESSAGE(STATUS "WARNING: Found a known bad version of Boost 
(v${Boost_VERSION}). Continuing anyway.")
-      set(Boost_FOUND TRUE)
-    endif(NOT ENABLE_BAD_BOOST)
-  endif("${Boost_VERSION}" STREQUAL "${ver}")
-endforeach(ver)
diff --git a/volk/cmake/VolkConfig.cmake b/volk/cmake/VolkConfig.cmake
deleted file mode 100644
index 425eb01..0000000
--- a/volk/cmake/VolkConfig.cmake
+++ /dev/null
@@ -1,26 +0,0 @@
-INCLUDE(FindPkgConfig)
-PKG_CHECK_MODULES(PC_VOLK volk)
-
-FIND_PATH(
-    VOLK_INCLUDE_DIRS
-    NAMES volk/volk.h
-    HINTS $ENV{VOLK_DIR}/include
-        ${PC_VOLK_INCLUDEDIR}
-    PATHS /usr/local/include
-          /usr/include
-)
-
-FIND_LIBRARY(
-    VOLK_LIBRARIES
-    NAMES volk
-    HINTS $ENV{VOLK_DIR}/lib
-        ${PC_VOLK_LIBDIR}
-    PATHS /usr/local/lib
-          /usr/local/lib64
-          /usr/lib
-          /usr/lib64
-)
-
-INCLUDE(FindPackageHandleStandardArgs)
-FIND_PACKAGE_HANDLE_STANDARD_ARGS(VOLK DEFAULT_MSG VOLK_LIBRARIES 
VOLK_INCLUDE_DIRS)
-MARK_AS_ADVANCED(VOLK_LIBRARIES VOLK_INCLUDE_DIRS)
diff --git a/volk/cmake/msvc/config.h b/volk/cmake/msvc/config.h
deleted file mode 100644
index 43792c7..0000000
--- a/volk/cmake/msvc/config.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef _MSC_VER // [
-#error "Use this header only with Microsoft Visual C++ compilers!"
-#endif // _MSC_VER ]
-
-#ifndef _MSC_CONFIG_H_ // [
-#define _MSC_CONFIG_H_
-
-////////////////////////////////////////////////////////////////////////
-// enable inline functions for C code
-////////////////////////////////////////////////////////////////////////
-#ifndef __cplusplus
-#  define inline __inline
-#endif
-
-////////////////////////////////////////////////////////////////////////
-// signed size_t
-////////////////////////////////////////////////////////////////////////
-#include <stddef.h>
-typedef ptrdiff_t ssize_t;
-
-////////////////////////////////////////////////////////////////////////
-// rint functions
-////////////////////////////////////////////////////////////////////////
-#include <math.h>
-static inline long lrint(double x){return (long)(x > 0.0 ? x + 0.5 : x - 0.5);}
-static inline long lrintf(float x){return (long)(x > 0.0f ? x + 0.5f : x - 
0.5f);}
-static inline long long llrint(double x){return (long long)(x > 0.0 ? x + 0.5 
: x - 0.5);}
-static inline long long llrintf(float x){return (long long)(x > 0.0f ? x + 
0.5f : x - 0.5f);}
-static inline double rint(double x){return (x > 0.0)? floor(x + 0.5) : ceil(x 
- 0.5);}
-static inline float rintf(float x){return (x > 0.0f)? floorf(x + 0.5f) : 
ceilf(x - 0.5f);}
-
-////////////////////////////////////////////////////////////////////////
-// math constants
-////////////////////////////////////////////////////////////////////////
-#define INFINITY HUGE_VAL
-
-# define M_E           2.7182818284590452354   /* e */
-# define M_LOG2E       1.4426950408889634074   /* log_2 e */
-# define M_LOG10E      0.43429448190325182765  /* log_10 e */
-# define M_LN2         0.69314718055994530942  /* log_e 2 */
-# define M_LN10                2.30258509299404568402  /* log_e 10 */
-# define M_PI          3.14159265358979323846  /* pi */
-# define M_PI_2                1.57079632679489661923  /* pi/2 */
-# define M_PI_4                0.78539816339744830962  /* pi/4 */
-# define M_1_PI                0.31830988618379067154  /* 1/pi */
-# define M_2_PI                0.63661977236758134308  /* 2/pi */
-# define M_2_SQRTPI    1.12837916709551257390  /* 2/sqrt(pi) */
-# define M_SQRT2       1.41421356237309504880  /* sqrt(2) */
-# define M_SQRT1_2     0.70710678118654752440  /* 1/sqrt(2) */
-
-////////////////////////////////////////////////////////////////////////
-// random and srandom
-////////////////////////////////////////////////////////////////////////
-#include <stdlib.h>
-static inline long int random (void) { return rand(); }
-static inline void srandom (unsigned int seed) { srand(seed); }
-
-#endif // _MSC_CONFIG_H_ ]
diff --git a/volk/cmake/msvc/inttypes.h b/volk/cmake/msvc/inttypes.h
deleted file mode 100644
index 0a1b60f..0000000
--- a/volk/cmake/msvc/inttypes.h
+++ /dev/null
@@ -1,301 +0,0 @@
-// ISO C9x  compliant inttypes.h for Microsoft Visual Studio
-// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
-//
-//  Copyright (c) 2006 Alexander Chemeris
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-//   1. Redistributions of source code must retain the above copyright notice,
-//      this list of conditions and the following disclaimer.
-//
-//   2. Redistributions in binary form must reproduce the above copyright
-//      notice, this list of conditions and the following disclaimer in the
-//      documentation and/or other materials provided with the distribution.
-//
-//   3. The name of the author may be used to endorse or promote products
-//      derived from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
-// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#ifndef _MSC_VER // [
-#error "Use this header only with Microsoft Visual C++ compilers!"
-#endif // _MSC_VER ]
-
-#ifndef _MSC_INTTYPES_H_ // [
-#define _MSC_INTTYPES_H_
-
-#if _MSC_VER > 1000
-#pragma once
-#endif
-
-#include <stdint.h>
-
-// 7.8 Format conversion of integer types
-
-typedef struct {
-   intmax_t quot;
-   intmax_t rem;
-} imaxdiv_t;
-
-// 7.8.1 Macros for format specifiers
-
-// The fprintf macros for signed integers are:
-#define PRId8       "d"
-#define PRIi8       "i"
-#define PRIdLEAST8  "d"
-#define PRIiLEAST8  "i"
-#define PRIdFAST8   "d"
-#define PRIiFAST8   "i"
-
-#define PRId16       "hd"
-#define PRIi16       "hi"
-#define PRIdLEAST16  "hd"
-#define PRIiLEAST16  "hi"
-#define PRIdFAST16   "hd"
-#define PRIiFAST16   "hi"
-
-#define PRId32       "I32d"
-#define PRIi32       "I32i"
-#define PRIdLEAST32  "I32d"
-#define PRIiLEAST32  "I32i"
-#define PRIdFAST32   "I32d"
-#define PRIiFAST32   "I32i"
-
-#define PRId64       "I64d"
-#define PRIi64       "I64i"
-#define PRIdLEAST64  "I64d"
-#define PRIiLEAST64  "I64i"
-#define PRIdFAST64   "I64d"
-#define PRIiFAST64   "I64i"
-
-#define PRIdMAX     "I64d"
-#define PRIiMAX     "I64i"
-
-#define PRIdPTR     "Id"
-#define PRIiPTR     "Ii"
-
-// The fprintf macros for unsigned integers are:
-#define PRIo8       "o"
-#define PRIu8       "u"
-#define PRIx8       "x"
-#define PRIX8       "X"
-#define PRIoLEAST8  "o"
-#define PRIuLEAST8  "u"
-#define PRIxLEAST8  "x"
-#define PRIXLEAST8  "X"
-#define PRIoFAST8   "o"
-#define PRIuFAST8   "u"
-#define PRIxFAST8   "x"
-#define PRIXFAST8   "X"
-
-#define PRIo16       "ho"
-#define PRIu16       "hu"
-#define PRIx16       "hx"
-#define PRIX16       "hX"
-#define PRIoLEAST16  "ho"
-#define PRIuLEAST16  "hu"
-#define PRIxLEAST16  "hx"
-#define PRIXLEAST16  "hX"
-#define PRIoFAST16   "ho"
-#define PRIuFAST16   "hu"
-#define PRIxFAST16   "hx"
-#define PRIXFAST16   "hX"
-
-#define PRIo32       "I32o"
-#define PRIu32       "I32u"
-#define PRIx32       "I32x"
-#define PRIX32       "I32X"
-#define PRIoLEAST32  "I32o"
-#define PRIuLEAST32  "I32u"
-#define PRIxLEAST32  "I32x"
-#define PRIXLEAST32  "I32X"
-#define PRIoFAST32   "I32o"
-#define PRIuFAST32   "I32u"
-#define PRIxFAST32   "I32x"
-#define PRIXFAST32   "I32X"
-
-#define PRIo64       "I64o"
-#define PRIu64       "I64u"
-#define PRIx64       "I64x"
-#define PRIX64       "I64X"
-#define PRIoLEAST64  "I64o"
-#define PRIuLEAST64  "I64u"
-#define PRIxLEAST64  "I64x"
-#define PRIXLEAST64  "I64X"
-#define PRIoFAST64   "I64o"
-#define PRIuFAST64   "I64u"
-#define PRIxFAST64   "I64x"
-#define PRIXFAST64   "I64X"
-
-#define PRIoMAX     "I64o"
-#define PRIuMAX     "I64u"
-#define PRIxMAX     "I64x"
-#define PRIXMAX     "I64X"
-
-#define PRIoPTR     "Io"
-#define PRIuPTR     "Iu"
-#define PRIxPTR     "Ix"
-#define PRIXPTR     "IX"
-
-// The fscanf macros for signed integers are:
-#define SCNd8       "d"
-#define SCNi8       "i"
-#define SCNdLEAST8  "d"
-#define SCNiLEAST8  "i"
-#define SCNdFAST8   "d"
-#define SCNiFAST8   "i"
-
-#define SCNd16       "hd"
-#define SCNi16       "hi"
-#define SCNdLEAST16  "hd"
-#define SCNiLEAST16  "hi"
-#define SCNdFAST16   "hd"
-#define SCNiFAST16   "hi"
-
-#define SCNd32       "ld"
-#define SCNi32       "li"
-#define SCNdLEAST32  "ld"
-#define SCNiLEAST32  "li"
-#define SCNdFAST32   "ld"
-#define SCNiFAST32   "li"
-
-#define SCNd64       "I64d"
-#define SCNi64       "I64i"
-#define SCNdLEAST64  "I64d"
-#define SCNiLEAST64  "I64i"
-#define SCNdFAST64   "I64d"
-#define SCNiFAST64   "I64i"
-
-#define SCNdMAX     "I64d"
-#define SCNiMAX     "I64i"
-
-#ifdef _WIN64 // [
-#  define SCNdPTR     "I64d"
-#  define SCNiPTR     "I64i"
-#else  // _WIN64 ][
-#  define SCNdPTR     "ld"
-#  define SCNiPTR     "li"
-#endif  // _WIN64 ]
-
-// The fscanf macros for unsigned integers are:
-#define SCNo8       "o"
-#define SCNu8       "u"
-#define SCNx8       "x"
-#define SCNX8       "X"
-#define SCNoLEAST8  "o"
-#define SCNuLEAST8  "u"
-#define SCNxLEAST8  "x"
-#define SCNXLEAST8  "X"
-#define SCNoFAST8   "o"
-#define SCNuFAST8   "u"
-#define SCNxFAST8   "x"
-#define SCNXFAST8   "X"
-
-#define SCNo16       "ho"
-#define SCNu16       "hu"
-#define SCNx16       "hx"
-#define SCNX16       "hX"
-#define SCNoLEAST16  "ho"
-#define SCNuLEAST16  "hu"
-#define SCNxLEAST16  "hx"
-#define SCNXLEAST16  "hX"
-#define SCNoFAST16   "ho"
-#define SCNuFAST16   "hu"
-#define SCNxFAST16   "hx"
-#define SCNXFAST16   "hX"
-
-#define SCNo32       "lo"
-#define SCNu32       "lu"
-#define SCNx32       "lx"
-#define SCNX32       "lX"
-#define SCNoLEAST32  "lo"
-#define SCNuLEAST32  "lu"
-#define SCNxLEAST32  "lx"
-#define SCNXLEAST32  "lX"
-#define SCNoFAST32   "lo"
-#define SCNuFAST32   "lu"
-#define SCNxFAST32   "lx"
-#define SCNXFAST32   "lX"
-
-#define SCNo64       "I64o"
-#define SCNu64       "I64u"
-#define SCNx64       "I64x"
-#define SCNX64       "I64X"
-#define SCNoLEAST64  "I64o"
-#define SCNuLEAST64  "I64u"
-#define SCNxLEAST64  "I64x"
-#define SCNXLEAST64  "I64X"
-#define SCNoFAST64   "I64o"
-#define SCNuFAST64   "I64u"
-#define SCNxFAST64   "I64x"
-#define SCNXFAST64   "I64X"
-
-#define SCNoMAX     "I64o"
-#define SCNuMAX     "I64u"
-#define SCNxMAX     "I64x"
-#define SCNXMAX     "I64X"
-
-#ifdef _WIN64 // [
-#  define SCNoPTR     "I64o"
-#  define SCNuPTR     "I64u"
-#  define SCNxPTR     "I64x"
-#  define SCNXPTR     "I64X"
-#else  // _WIN64 ][
-#  define SCNoPTR     "lo"
-#  define SCNuPTR     "lu"
-#  define SCNxPTR     "lx"
-#  define SCNXPTR     "lX"
-#endif  // _WIN64 ]
-
-// 7.8.2 Functions for greatest-width integer types
-
-// 7.8.2.1 The imaxabs function
-#define imaxabs _abs64
-
-// 7.8.2.2 The imaxdiv function
-
-// This is modified version of div() function from Microsoft's div.c found
-// in %MSVC.NET%\crt\src\div.c
-#ifdef STATIC_IMAXDIV // [
-static
-#else // STATIC_IMAXDIV ][
-_inline
-#endif // STATIC_IMAXDIV ]
-imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom)
-{
-   imaxdiv_t result;
-
-   result.quot = numer / denom;
-   result.rem = numer % denom;
-
-   if (numer < 0 && result.rem > 0) {
-      // did division wrong; must fix up
-      ++result.quot;
-      result.rem -= denom;
-   }
-
-   return result;
-}
-
-// 7.8.2.3 The strtoimax and strtoumax functions
-#define strtoimax _strtoi64
-#define strtoumax _strtoui64
-
-// 7.8.2.4 The wcstoimax and wcstoumax functions
-#define wcstoimax _wcstoi64
-#define wcstoumax _wcstoui64
-
-
-#endif // _MSC_INTTYPES_H_ ]
diff --git a/volk/cmake/msvc/stdbool.h b/volk/cmake/msvc/stdbool.h
deleted file mode 100644
index ca4581d..0000000
--- a/volk/cmake/msvc/stdbool.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (C) 2005, 2006 Apple Computer, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public License
- * along with this library; see the file COPYING.LIB.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- *
- */
-
-#ifndef STDBOOL_WIN32_H
-#define STDBOOL_WIN32_H
-
-#ifndef _MSC_VER // [
-#error "Use this header only with Microsoft Visual C++ compilers!"
-#endif // _MSC_VER ]
-
-#ifndef __cplusplus
-
-typedef unsigned char bool;
-
-#define true 1
-#define false 0
-
-#ifndef CASSERT
-#define CASSERT(exp, name) typedef int dummy##name [(exp) ? 1 : -1];
-#endif
-
-CASSERT(sizeof(bool) == 1, bool_is_one_byte)
-CASSERT(true, true_is_true)
-CASSERT(!false, false_is_false)
-
-#endif
-
-#endif
diff --git a/volk/cmake/msvc/stdint.h b/volk/cmake/msvc/stdint.h
deleted file mode 100644
index 108bc89..0000000
--- a/volk/cmake/msvc/stdint.h
+++ /dev/null
@@ -1,251 +0,0 @@
-// ISO C9x  compliant stdint.h for Microsoft Visual Studio
-// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
-//
-//  Copyright (c) 2006-2008 Alexander Chemeris
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-//   1. Redistributions of source code must retain the above copyright notice,
-//      this list of conditions and the following disclaimer.
-//
-//   2. Redistributions in binary form must reproduce the above copyright
-//      notice, this list of conditions and the following disclaimer in the
-//      documentation and/or other materials provided with the distribution.
-//
-//   3. The name of the author may be used to endorse or promote products
-//      derived from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
-// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
-// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
-// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-///////////////////////////////////////////////////////////////////////////////
-
-#ifndef _MSC_VER // [
-#error "Use this header only with Microsoft Visual C++ compilers!"
-#endif // _MSC_VER ]
-
-#ifndef _MSC_STDINT_H_ // [
-#define _MSC_STDINT_H_
-
-#if _MSC_VER > 1000
-#pragma once
-#endif
-
-#include <limits.h>
-
-// For Visual Studio 6 in C++ mode and for many Visual Studio versions when
-// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
-// or compiler give many errors like this:
-//   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
-#ifdef __cplusplus
-extern "C" {
-#endif
-#  include <wchar.h>
-#ifdef __cplusplus
-}
-#endif
-
-// Define _W64 macros to mark types changing their size, like intptr_t.
-#ifndef _W64
-#  if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 
1300
-#     define _W64 __w64
-#  else
-#     define _W64
-#  endif
-#endif
-
-
-// 7.18.1 Integer types
-
-// 7.18.1.1 Exact-width integer types
-
-// Visual Studio 6 and Embedded Visual C++ 4 doesn't
-// realize that, e.g. char has the same size as __int8
-// so we give up on __intX for them.
-#if (_MSC_VER < 1300)
-   typedef signed char       int8_t;
-   typedef signed short      int16_t;
-   typedef signed int        int32_t;
-   typedef unsigned char     uint8_t;
-   typedef unsigned short    uint16_t;
-   typedef unsigned int      uint32_t;
-#else
-   typedef signed __int8     int8_t;
-   typedef signed __int16    int16_t;
-   typedef signed __int32    int32_t;
-   typedef unsigned __int8   uint8_t;
-   typedef unsigned __int16  uint16_t;
-   typedef unsigned __int32  uint32_t;
-#endif
-typedef signed __int64       int64_t;
-typedef unsigned __int64     uint64_t;
-
-
-// 7.18.1.2 Minimum-width integer types
-typedef int8_t    int_least8_t;
-typedef int16_t   int_least16_t;
-typedef int32_t   int_least32_t;
-typedef int64_t   int_least64_t;
-typedef uint8_t   uint_least8_t;
-typedef uint16_t  uint_least16_t;
-typedef uint32_t  uint_least32_t;
-typedef uint64_t  uint_least64_t;
-
-// 7.18.1.3 Fastest minimum-width integer types
-typedef int8_t    int_fast8_t;
-typedef int16_t   int_fast16_t;
-typedef int32_t   int_fast32_t;
-typedef int64_t   int_fast64_t;
-typedef uint8_t   uint_fast8_t;
-typedef uint16_t  uint_fast16_t;
-typedef uint32_t  uint_fast32_t;
-typedef uint64_t  uint_fast64_t;
-
-// 7.18.1.4 Integer types capable of holding object pointers
-#ifdef _WIN64 // [
-   typedef signed __int64    intptr_t;
-   typedef unsigned __int64  uintptr_t;
-#else // _WIN64 ][
-   typedef _W64 signed int   intptr_t;
-   typedef _W64 unsigned int uintptr_t;
-#endif // _WIN64 ]
-
-// 7.18.1.5 Greatest-width integer types
-typedef int64_t   intmax_t;
-typedef uint64_t  uintmax_t;
-
-
-// 7.18.2 Limits of specified-width integer types
-
-#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 
220 at page 257 and footnote 221 at page 259
-
-// 7.18.2.1 Limits of exact-width integer types
-#define INT8_MIN     ((int8_t)_I8_MIN)
-#define INT8_MAX     _I8_MAX
-#define INT16_MIN    ((int16_t)_I16_MIN)
-#define INT16_MAX    _I16_MAX
-#define INT32_MIN    ((int32_t)_I32_MIN)
-#define INT32_MAX    _I32_MAX
-#define INT64_MIN    ((int64_t)_I64_MIN)
-#define INT64_MAX    _I64_MAX
-#define UINT8_MAX    _UI8_MAX
-#define UINT16_MAX   _UI16_MAX
-#define UINT32_MAX   _UI32_MAX
-#define UINT64_MAX   _UI64_MAX
-
-// 7.18.2.2 Limits of minimum-width integer types
-#define INT_LEAST8_MIN    INT8_MIN
-#define INT_LEAST8_MAX    INT8_MAX
-#define INT_LEAST16_MIN   INT16_MIN
-#define INT_LEAST16_MAX   INT16_MAX
-#define INT_LEAST32_MIN   INT32_MIN
-#define INT_LEAST32_MAX   INT32_MAX
-#define INT_LEAST64_MIN   INT64_MIN
-#define INT_LEAST64_MAX   INT64_MAX
-#define UINT_LEAST8_MAX   UINT8_MAX
-#define UINT_LEAST16_MAX  UINT16_MAX
-#define UINT_LEAST32_MAX  UINT32_MAX
-#define UINT_LEAST64_MAX  UINT64_MAX
-
-// 7.18.2.3 Limits of fastest minimum-width integer types
-#define INT_FAST8_MIN    INT8_MIN
-#define INT_FAST8_MAX    INT8_MAX
-#define INT_FAST16_MIN   INT16_MIN
-#define INT_FAST16_MAX   INT16_MAX
-#define INT_FAST32_MIN   INT32_MIN
-#define INT_FAST32_MAX   INT32_MAX
-#define INT_FAST64_MIN   INT64_MIN
-#define INT_FAST64_MAX   INT64_MAX
-#define UINT_FAST8_MAX   UINT8_MAX
-#define UINT_FAST16_MAX  UINT16_MAX
-#define UINT_FAST32_MAX  UINT32_MAX
-#define UINT_FAST64_MAX  UINT64_MAX
-
-// 7.18.2.4 Limits of integer types capable of holding object pointers
-#ifdef _WIN64 // [
-#  define INTPTR_MIN   INT64_MIN
-#  define INTPTR_MAX   INT64_MAX
-#  define UINTPTR_MAX  UINT64_MAX
-#else // _WIN64 ][
-#  define INTPTR_MIN   INT32_MIN
-#  define INTPTR_MAX   INT32_MAX
-#  define UINTPTR_MAX  UINT32_MAX
-#endif // _WIN64 ]
-
-// 7.18.2.5 Limits of greatest-width integer types
-#define INTMAX_MIN   INT64_MIN
-#define INTMAX_MAX   INT64_MAX
-#define UINTMAX_MAX  UINT64_MAX
-
-// 7.18.3 Limits of other integer types
-
-#ifdef _WIN64 // [
-#  define PTRDIFF_MIN  _I64_MIN
-#  define PTRDIFF_MAX  _I64_MAX
-#else  // _WIN64 ][
-#  define PTRDIFF_MIN  _I32_MIN
-#  define PTRDIFF_MAX  _I32_MAX
-#endif  // _WIN64 ]
-
-#define SIG_ATOMIC_MIN  INT_MIN
-#define SIG_ATOMIC_MAX  INT_MAX
-
-#ifndef SIZE_MAX // [
-#  ifdef _WIN64 // [
-#     define SIZE_MAX  _UI64_MAX
-#  else // _WIN64 ][
-#     define SIZE_MAX  _UI32_MAX
-#  endif // _WIN64 ]
-#endif // SIZE_MAX ]
-
-// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
-#ifndef WCHAR_MIN // [
-#  define WCHAR_MIN  0
-#endif  // WCHAR_MIN ]
-#ifndef WCHAR_MAX // [
-#  define WCHAR_MAX  _UI16_MAX
-#endif  // WCHAR_MAX ]
-
-#define WINT_MIN  0
-#define WINT_MAX  _UI16_MAX
-
-#endif // __STDC_LIMIT_MACROS ]
-
-
-// 7.18.4 Limits of other integer types
-
-#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See 
footnote 224 at page 260
-
-// 7.18.4.1 Macros for minimum-width integer constants
-
-#define INT8_C(val)  val##i8
-#define INT16_C(val) val##i16
-#define INT32_C(val) val##i32
-#define INT64_C(val) val##i64
-
-#define UINT8_C(val)  val##ui8
-#define UINT16_C(val) val##ui16
-#define UINT32_C(val) val##ui32
-#define UINT64_C(val) val##ui64
-
-// 7.18.4.2 Macros for greatest-width integer constants
-#ifndef INTMAX_C
-#define INTMAX_C   INT64_C
-#endif
-#ifndef UINTMAX_C
-#define UINTMAX_C  UINT64_C
-#endif
-
-#endif // __STDC_CONSTANT_MACROS ]
-
-
-#endif // _MSC_STDINT_H_ ]
diff --git a/volk/gen/archs.xml b/volk/gen/archs.xml
deleted file mode 100644
index e570fe5..0000000
--- a/volk/gen/archs.xml
+++ /dev/null
@@ -1,204 +0,0 @@
-<!-- archs appear in order of significance for blind, de-facto version 
ordering -->
-<grammar>
-
-<arch name="generic"> <!-- name is required-->
-</arch>
-
-<arch name="altivec">
-  <flag compiler="gnu">-maltivec</flag>
-  <alignment>16</alignment>
-  <check name="has_ppc"></check>
-</arch>
-
-<arch name="softfp">
-  <flag compiler="gnu">-mfloat-abi=softfp</flag>
-</arch>
-
-<arch name="hardfp">
-  <flag compiler="gnu">-mfloat-abi=hard</flag>
-</arch>
-
-<arch name="neon">
-  <flag compiler="gnu">-mfpu=neon</flag>
-  <flag compiler="gnu">-funsafe-math-optimizations</flag>
-  <alignment>16</alignment>
-  <check name="has_neon"></check>
-</arch>
-
-<arch name="32">
-  <flag compiler="gnu">-m32</flag>
-</arch>
-
-<arch name="64">
-  <check name="check_extended_cpuid">
-      <param>0x80000001</param>
-  </check>
-  <check name="cpuid_x86_bit">  <!-- checks to see if a bit is set -->
-      <param>3</param>          <!-- eax, ebx, ecx, [edx] -->
-      <param>0x80000001</param> <!-- cpuid operation -->
-      <param>29</param>         <!-- bit shift -->
-  </check>
-  <flag compiler="gnu">-m64</flag>
-  <flag compiler="clang">-m64</flag>
-</arch>
-
-<arch name="3dnow">
-  <check name="cpuid_x86_bit">
-      <param>3</param>
-      <param>0x80000001</param>
-      <param>31</param>
-  </check>
-  <flag compiler="gnu">-m3dnow</flag>
-  <flag compiler="clang">-m3dnow</flag>
-  <alignment>8</alignment>
-</arch>
-
-<arch name="abm">
-  <check name="cpuid_x86_bit">
-      <param>3</param>
-      <param>0x80000001</param>
-      <param>5</param>
-  </check>
-  <flag compiler="gnu">-msse4.2</flag>
-  <flag compiler="clang">-msse4.2</flag>
-  <alignment>16</alignment>
-</arch>
-
-<arch name="popcount">
-  <check name="cpuid_x86_bit">
-      <param>2</param>
-      <param>0x00000001</param>
-      <param>23</param>
-  </check>
-  <flag compiler="gnu">-mpopcnt</flag>
-  <flag compiler="clang">-mpopcnt</flag>
-  <flag compiler="msvc">/arch:AVX</flag>
-</arch>
-
-<arch name="mmx">
-  <check name="cpuid_x86_bit">
-      <param>3</param>
-      <param>0x00000001</param>
-      <param>23</param>
-  </check>
-  <flag compiler="gnu">-mmmx</flag>
-  <flag compiler="clang">-mmmx</flag>
-  <flag compiler="msvc">/arch:SSE</flag>
-  <alignment>8</alignment>
-</arch>
-
-<arch name="sse">
-  <check name="cpuid_x86_bit">
-      <param>3</param>
-      <param>0x00000001</param>
-      <param>25</param>
-  </check>
-  <flag compiler="gnu">-msse</flag>
-  <flag compiler="clang">-msse</flag>
-  <flag compiler="msvc">/arch:SSE</flag>
-  <environment>_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);</environment>
-  <include>xmmintrin.h</include>
-  <alignment>16</alignment>
-</arch>
-
-<arch name="sse2">
-  <check name="cpuid_x86_bit">
-      <param>3</param>
-      <param>0x00000001</param>
-      <param>26</param>
-  </check>
-  <flag compiler="gnu">-msse2</flag>
-  <flag compiler="clang">-msse2</flag>
-  <flag compiler="msvc">/arch:SSE2</flag>
-  <alignment>16</alignment>
-</arch>
-
-<arch name="orc">
-</arch>
-
-<!-- it's here for overrule stuff. -->
-<arch name="norc">
-</arch>
-
-<arch name="sse3">
-  <check name="cpuid_x86_bit">
-      <param>2</param>
-      <param>0x00000001</param>
-      <param>0</param>
-  </check>
-  <flag compiler="gnu">-msse3</flag>
-  <flag compiler="clang">-msse3</flag>
-  <flag compiler="msvc">/arch:AVX</flag>
-  
<environment>_MM_SET_DENORMALS_ZERO_MODE(_MM_DENORMALS_ZERO_ON);</environment>
-  <include>pmmintrin.h</include>
-  <alignment>16</alignment>
-</arch>
-
-<arch name="ssse3">
-  <check name="cpuid_x86_bit">
-      <param>2</param>
-      <param>0x00000001</param>
-      <param>9</param>
-  </check>
-  <flag compiler="gnu">-mssse3</flag>
-  <flag compiler="clang">-mssse3</flag>
-  <flag compiler="msvc">/arch:AVX</flag>
-  <alignment>16</alignment>
-</arch>
-
-<arch name="sse4_a">
-  <check name="cpuid_x86_bit">
-      <param>2</param>
-      <param>0x80000001</param>
-      <param>6</param>
-  </check>
-  <flag compiler="gnu">-msse4a</flag>
-  <flag compiler="clang">-msse4a</flag>
-  <alignment>16</alignment>
-</arch>
-
-<arch name="sse4_1">
-  <check name="cpuid_x86_bit">
-      <param>2</param>
-      <param>0x00000001</param>
-      <param>19</param>
-  </check>
-  <flag compiler="gnu">-msse4.1</flag>
-  <flag compiler="clang">-msse4.1</flag>
-  <flag compiler="msvc">/arch:AVX</flag>
-  <alignment>16</alignment>
-</arch>
-
-<arch name="sse4_2">
-  <check name="cpuid_x86_bit">
-      <param>2</param>
-      <param>0x00000001</param>
-      <param>20</param>
-  </check>
-  <flag compiler="gnu">-msse4.2</flag>
-  <flag compiler="clang">-msse4.2</flag>
-  <flag compiler="msvc">/arch:AVX</flag>
-  <alignment>16</alignment>
-</arch>
-
-<arch name="avx">
-  <check name="cpuid_x86_bit">
-      <param>2</param>
-      <param>0x00000001</param>
-      <param>28</param>
-  </check>
-  <!-- check to make sure that xgetbv is enabled in OS -->
-  <check name="cpuid_x86_bit">
-      <param>2</param>
-      <param>0x00000001</param>
-      <param>27</param>
-  </check>
-  <!-- check to see that the OS has enabled AVX -->
-  <check name="get_avx_enabled"></check>
-  <flag compiler="gnu">-mavx</flag>
-  <flag compiler="clang">-mavx</flag>
-  <flag compiler="msvc">/arch:AVX</flag>
-  <alignment>32</alignment>
-</arch>
-
-</grammar>
diff --git a/volk/gen/machines.xml b/volk/gen/machines.xml
deleted file mode 100644
index 357bf75..0000000
--- a/volk/gen/machines.xml
+++ /dev/null
@@ -1,55 +0,0 @@
-<grammar>
-
-<machine name="generic">
-<archs>generic orc|</archs>
-</machine>
-
-<!--
-<machine name="mmx">
-<archs>generic 32|64 mmx orc|</archs>
-</machine>
-
-<machine name="sse">
-<archs>generic 32|64| mmx| sse orc|</archs>
-</machine>
--->
-
-<machine name="neon">
-<archs>generic neon softfp|hardfp orc|</archs>
-</machine>
-
-<!-- trailing | bar means generate without either for MSVC -->
-<machine name="sse2">
-<archs>generic 32|64| mmx| sse sse2 orc|</archs>
-</machine>
-
-<machine name="sse3">
-<archs>generic 32|64 mmx sse sse2 sse3 orc|</archs>
-</machine>
-
-<machine name="ssse3">
-<archs>generic 32|64 mmx sse sse2 sse3 ssse3 orc|</archs>
-</machine>
-
-<machine name="sse4_a">
-<archs>generic 32|64 mmx sse sse2 sse3 sse4_a popcount orc|</archs>
-</machine>
-
-<machine name="sse4_1">
-<archs>generic 32|64 mmx sse sse2 sse3 ssse3 sse4_1 orc|</archs>
-</machine>
-
-<machine name="sse4_2">
-<archs>generic 32|64 mmx sse sse2 sse3 ssse3 sse4_1 sse4_2 popcount 
orc|</archs>
-</machine>
-
-<!-- trailing | bar means generate without either for MSVC -->
-<machine name="avx">
-<archs>generic 32|64| mmx| sse sse2 sse3 ssse3 sse4_1 sse4_2 popcount avx 
orc|</archs>
-</machine>
-
-<machine name="altivec">
-<archs>generic altivec</archs>
-</machine>
-
-</grammar>
diff --git a/volk/gen/volk_arch_defs.py b/volk/gen/volk_arch_defs.py
deleted file mode 100644
index 3c75e13..0000000
--- a/volk/gen/volk_arch_defs.py
+++ /dev/null
@@ -1,85 +0,0 @@
-#
-# Copyright 2012 Free Software Foundation, Inc.
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-
-archs = list()
-arch_dict = dict()
-
-class arch_class:
-    def __init__(self, flags, checks, **kwargs):
-        for key, cast, failval in (
-            ('name', str, None),
-            ('environment', str, None),
-            ('include', str, None),
-            ('alignment', int, 1)
-        ):
-            try: setattr(self, key, cast(kwargs[key]))
-            except: setattr(self, key, failval)
-        self.checks = checks
-        assert(self.name)
-        self._flags = flags
-
-    def is_supported(self, compiler):
-        if not self._flags.keys(): return True
-        return compiler in self._flags.keys()
-
-    def get_flags(self, compiler):
-        try: return self._flags[compiler]
-        except KeyError: return list()
-
-    def __repr__(self): return self.name
-
-def register_arch(**kwargs):
-    arch = arch_class(**kwargs)
-    archs.append(arch)
-    arch_dict[arch.name] = arch
-
-########################################################################
-# register the arches
-########################################################################
-#TODO skip the XML and put it here
-from xml.dom import minidom
-import os
-gendir = os.path.dirname(__file__)
-archs_xml = minidom.parse(os.path.join(gendir, 
'archs.xml')).getElementsByTagName('arch')
-for arch_xml in archs_xml:
-    kwargs = dict()
-    for attr in arch_xml.attributes.keys():
-        kwargs[attr] = arch_xml.attributes[attr].value
-    for node in arch_xml.childNodes:
-        try:
-            name = node.tagName
-            val = arch_xml.getElementsByTagName(name)[0].firstChild.data
-            kwargs[name] = val
-        except: pass
-    checks = list()
-    for check_xml in arch_xml.getElementsByTagName("check"):
-        name = check_xml.attributes["name"].value
-        params = list()
-        for param_xml in check_xml.getElementsByTagName("param"):
-            params.append(param_xml.firstChild.data)
-        checks.append([name, params])
-    flags = dict()
-    for flag_xml in arch_xml.getElementsByTagName("flag"):
-        name = flag_xml.attributes["compiler"].value
-        if not flags.has_key(name): flags[name] = list()
-        flags[name].append(flag_xml.firstChild.data)
-    #force kwargs keys to be of type str, not unicode for py25
-    kwargs = dict((str(k), v) for k, v in kwargs.iteritems())
-    register_arch(flags=flags, checks=checks, **kwargs)
-
-if __name__ == '__main__':
-    print archs
diff --git a/volk/gen/volk_compile_utils.py b/volk/gen/volk_compile_utils.py
deleted file mode 100644
index cf13573..0000000
--- a/volk/gen/volk_compile_utils.py
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2012 Free Software Foundation, Inc.
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-
-import optparse
-import volk_arch_defs
-import volk_machine_defs
-
-def do_arch_flags_list(compiler):
-    output = list()
-    for arch in volk_arch_defs.archs:
-        if not arch.is_supported(compiler): continue
-        fields = [arch.name] + arch.get_flags(compiler)
-        output.append(','.join(fields))
-    print ';'.join(output)
-
-def do_machines_list(arch_names):
-    output = list()
-    for machine in volk_machine_defs.machines:
-        machine_arch_set = set(machine.arch_names)
-        if set(arch_names).intersection(machine_arch_set) == machine_arch_set:
-            output.append(machine.name)
-    print ';'.join(output)
-
-def do_machine_flags_list(compiler, machine_name):
-    output = list()
-    machine = volk_machine_defs.machine_dict[machine_name]
-    for arch in machine.archs:
-        output.extend(arch.get_flags(compiler))
-    print ' '.join(output)
-
-def main():
-    parser = optparse.OptionParser()
-    parser.add_option('--mode', type='string')
-    parser.add_option('--compiler', type='string')
-    parser.add_option('--archs', type='string')
-    parser.add_option('--machine', type='string')
-    (opts, args) = parser.parse_args()
-
-    if opts.mode == 'arch_flags': return 
do_arch_flags_list(opts.compiler.lower())
-    if opts.mode == 'machines': return do_machines_list(opts.archs.split(';'))
-    if opts.mode == 'machine_flags': return 
do_machine_flags_list(opts.compiler.lower(), opts.machine)
-
-if __name__ == '__main__': main()
diff --git a/volk/gen/volk_kernel_defs.py b/volk/gen/volk_kernel_defs.py
deleted file mode 100644
index f246db0..0000000
--- a/volk/gen/volk_kernel_defs.py
+++ /dev/null
@@ -1,209 +0,0 @@
-#
-# Copyright 2011-2012 Free Software Foundation, Inc.
-#
-# This file is part of GNU Radio
-#
-# GNU Radio is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# GNU Radio is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GNU Radio; see the file COPYING.  If not, write to
-# the Free Software Foundation, Inc., 51 Franklin Street,
-# Boston, MA 02110-1301, USA.
-#
-
-import os
-import re
-import sys
-import glob
-
-########################################################################
-# Strip comments from a c/cpp file.
-# Input is code string, output is code string without comments.
-# 
http://stackoverflow.com/questions/241327/python-snippet-to-remove-c-and-c-comments
-########################################################################
-def comment_remover(text):
-    def replacer(match):
-        s = match.group(0)
-        if s.startswith('/'):
-            return ""
-        else:
-            return s
-    pattern = re.compile(
-        r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"',
-        re.DOTALL | re.MULTILINE
-    )
-    return re.sub(pattern, replacer, text)
-
-########################################################################
-# Split code into nested sections according to ifdef preprocessor macros
-########################################################################
-def split_into_nested_ifdef_sections(code):
-    sections = list()
-    section = ''
-    header = 'text'
-    in_section_depth = 0
-    for i, line in enumerate(code.splitlines()):
-        m = re.match('^(\s*)#(\s*)(\w+)(.*)$', line)
-        line_is = 'normal'
-        if m:
-            p0, p1, fcn, stuff = m.groups()
-            if fcn in ('if', 'ifndef', 'ifdef'): line_is = 'if'
-            if fcn in ('else', 'elif'): line_is = 'else'
-            if fcn in ('endif',): line_is = 'end'
-
-        if line_is == 'if': in_section_depth += 1
-        if line_is == 'end': in_section_depth -= 1
-
-        if in_section_depth == 1 and line_is == 'if':
-            sections.append((header, section))
-            section = ''
-            header = line
-            continue
-
-        if in_section_depth == 1 and line_is == 'else':
-            sections.append((header, section))
-            section = ''
-            header = line
-            continue
-
-        if in_section_depth == 0 and line_is == 'end':
-            sections.append((header, section))
-            section = ''
-            header = 'text'
-            continue
-
-        section += line + '\n'
-
-    sections.append((header, section)) #and pack remainder into sections
-    sections = [sec for sec in sections if sec[1].strip()] #filter empty 
sections
-
-    #recurse into non-text sections to fill subsections
-    for i, (header, section) in enumerate(sections):
-        if header == 'text': continue
-        sections[i] = (header, split_into_nested_ifdef_sections(section))
-
-    return sections
-
-########################################################################
-# Recursive print of sections to test code above
-########################################################################
-def print_sections(sections, indent = '  '):
-    for header, body in sections:
-        if header == 'text':
-            print indent, ('\n'+indent).join(body.splitlines())
-            continue
-        print indent.replace(' ', '-') + '>', header
-        print_sections(body, indent + '  ')
-
-########################################################################
-# Flatten a section to just body text
-########################################################################
-def flatten_section_text(sections):
-    output = ''
-    for hdr, bdy in sections:
-        if hdr != 'text': output += flatten_section_text(bdy)
-        else: output += bdy
-    return output
-
-########################################################################
-# Extract kernel info from section, represent as an implementation
-########################################################################
-class impl_class:
-    def __init__(self, kern_name, header, body):
-        #extract LV_HAVE_*
-        self.deps = set(map(str.lower, re.findall('LV_HAVE_(\w+)', header)))
-        #extract function suffix and args
-        body = flatten_section_text(body)
-        try:
-            fcn_matcher = re.compile('^.*(%s\\w*)\\s*\\((.*)$'%kern_name, 
re.DOTALL | re.MULTILINE)
-            body = body.split('{')[0].rsplit(')', 1)[0] #get the part before 
the open ){ bracket
-            m = fcn_matcher.match(body)
-            impl_name, the_rest = m.groups()
-            self.name = impl_name.replace(kern_name+'_', '')
-            self.args = list()
-            fcn_args = the_rest.split(',')
-            for fcn_arg in fcn_args:
-                arg_matcher = re.compile('^\s*(.*\\W)\s*(\w+)\s*$', re.DOTALL 
| re.MULTILINE)
-                m = arg_matcher.match(fcn_arg)
-                arg_type, arg_name = m.groups()
-                self.args.append((arg_type, arg_name))
-        except Exception as ex:
-            raise Exception, 'I cant parse the function prototype from: %s in 
%s\n%s'%(kern_name, body, ex)
-
-        assert self.name
-        self.is_aligned = self.name.startswith('a_')
-
-    def __repr__(self):
-        return self.name
-
-########################################################################
-# Get sets of LV_HAVE_* from the code
-########################################################################
-def extract_lv_haves(code):
-    haves = list()
-    for line in code.splitlines():
-        if not line.strip().startswith('#'): continue
-        have_set = set(map(str.lower, re.findall('LV_HAVE_(\w+)', line)))
-        if have_set: haves.append(have_set)
-    return haves
-
-########################################################################
-# Represent a processing kernel, parse from file
-########################################################################
-class kernel_class:
-    def __init__(self, kernel_file):
-        self.name = os.path.splitext(os.path.basename(kernel_file))[0]
-        self.pname = self.name.replace('volk_', 'p_')
-        code = open(kernel_file, 'r').read()
-        code = comment_remover(code)
-        sections = split_into_nested_ifdef_sections(code)
-        self._impls = list()
-        for header, section in sections:
-            if 'ifndef' not in header.lower(): continue
-            for sub_hdr, body in section:
-                if 'if' not in sub_hdr.lower(): continue
-                if 'LV_HAVE_' not in sub_hdr: continue
-                self._impls.append(impl_class(
-                    kern_name=self.name, header=sub_hdr, body=body,
-                ))
-        assert(self._impls)
-        self.has_dispatcher = False
-        for impl in self._impls:
-            if impl.name == 'dispatcher':
-                self._impls.remove(impl)
-                self.has_dispatcher = True
-                break
-        self.args = self._impls[0].args
-        self.arglist_types = ', '.join([a[0] for a in self.args])
-        self.arglist_full = ', '.join(['%s %s'%a for a in self.args])
-        self.arglist_names = ', '.join([a[1] for a in self.args])
-
-    def get_impls(self, archs):
-        archs = set(archs)
-        impls = list()
-        for impl in self._impls:
-            if impl.deps.intersection(archs) == impl.deps:
-                impls.append(impl)
-        return impls
-
-    def __repr__(self):
-        return self.name
-
-########################################################################
-# Extract information from the VOLK kernels
-########################################################################
-__file__ = os.path.abspath(__file__)
-srcdir = os.path.dirname(os.path.dirname(__file__))
-kernel_files = glob.glob(os.path.join(srcdir, "kernels", "volk", "*.h"))
-kernels = map(kernel_class, kernel_files)
-
-if __name__ == '__main__':
-    print kernels
diff --git a/volk/gen/volk_machine_defs.py b/volk/gen/volk_machine_defs.py
deleted file mode 100644
index 7293d47..0000000
--- a/volk/gen/volk_machine_defs.py
+++ /dev/null
@@ -1,74 +0,0 @@
-#
-# Copyright 2012 Free Software Foundation, Inc.
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-
-from volk_arch_defs import arch_dict
-
-machines = list()
-machine_dict = dict()
-
-class machine_class:
-    def __init__(self, name, archs):
-        self.name = name
-        self.archs = list()
-        self.arch_names = list()
-        for arch_name in archs:
-            if not arch_name: continue
-            arch = arch_dict[arch_name]
-            self.archs.append(arch)
-            self.arch_names.append(arch_name)
-        self.alignment = max(map(lambda a: a.alignment, self.archs))
-
-    def __repr__(self): return self.name
-
-def register_machine(name, archs):
-    for i, arch_name in enumerate(archs):
-        if '|' in arch_name: #handle special arch names with the '|'
-            for arch_sub in arch_name.split('|'):
-                if arch_sub:
-                    register_machine(name+'_'+arch_sub, archs[:i] + [arch_sub] 
+ archs[i+1:])
-                else:
-                    register_machine(name, archs[:i] + archs[i+1:])
-            return
-    machine = machine_class(name=name, archs=archs)
-    machines.append(machine)
-    machine_dict[machine.name] = machine
-
-########################################################################
-# register the machines
-########################################################################
-#TODO skip the XML and put it here
-from xml.dom import minidom
-import os
-gendir = os.path.dirname(__file__)
-machines_xml = minidom.parse(os.path.join(gendir, 
'machines.xml')).getElementsByTagName('machine')
-for machine_xml in machines_xml:
-    kwargs = dict()
-    for attr in machine_xml.attributes.keys():
-        kwargs[attr] = machine_xml.attributes[attr].value
-    for node in machine_xml.childNodes:
-        try:
-            name = node.tagName
-            val = machine_xml.getElementsByTagName(name)[0].firstChild.data
-            kwargs[name] = val
-        except: pass
-    kwargs['archs'] = kwargs['archs'].split()
-    #force kwargs keys to be of type str, not unicode for py25
-    kwargs = dict((str(k), v) for k, v in kwargs.iteritems())
-    register_machine(**kwargs)
-
-if __name__ == '__main__':
-    print machines
diff --git a/volk/gen/volk_tmpl_utils.py b/volk/gen/volk_tmpl_utils.py
deleted file mode 100644
index 6c08a82..0000000
--- a/volk/gen/volk_tmpl_utils.py
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2012 Free Software Foundation, Inc.
-#
-# This file is part of GNU Radio
-#
-# GNU Radio is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# GNU Radio is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GNU Radio; see the file COPYING.  If not, write to
-# the Free Software Foundation, Inc., 51 Franklin Street,
-# Boston, MA 02110-1301, USA.
-#
-
-import os
-import re
-import sys
-import optparse
-import volk_arch_defs
-import volk_machine_defs
-import volk_kernel_defs
-from Cheetah import Template
-
-def __escape_pre_processor(code):
-    out = list()
-    for line in code.splitlines():
-        m = re.match('^(\s*)#(\s*)(\w+)(.*)$', line)
-        if m:
-            p0, p1, fcn, stuff = m.groups()
-            conly = fcn in ('include', 'define', 'ifdef', 'ifndef', 'endif', 
'elif', 'pragma')
-            both = fcn in ('if', 'else')
-            istmpl = '$' in stuff
-            if 'defined' in stuff: istmpl = False
-            if conly or (both and not istmpl):
-                line = '%s\\#%s%s%s'%(p0, p1, fcn, stuff)
-        out.append(line)
-    return '\n'.join(out)
-
-def __parse_tmpl(_tmpl, **kwargs):
-    defs = {
-        'archs': volk_arch_defs.archs,
-        'arch_dict': volk_arch_defs.arch_dict,
-        'machines': volk_machine_defs.machines,
-        'machine_dict': volk_machine_defs.machine_dict,
-        'kernels': volk_kernel_defs.kernels,
-    }
-    defs.update(kwargs)
-    _tmpl = __escape_pre_processor(_tmpl)
-    _tmpl = """
-
-/* this file was generated by volk template utils, do not edit! */
-
-""" + _tmpl
-    return str(Template.Template(_tmpl, defs))
-
-def main():
-    parser = optparse.OptionParser()
-    parser.add_option('--input', type='string')
-    parser.add_option('--output', type='string')
-    (opts, args) = parser.parse_args()
-
-    output = __parse_tmpl(open(opts.input).read(), args=args)
-    if opts.output: open(opts.output, 'w').write(output)
-    else: print output
-
-if __name__ == '__main__': main()
diff --git a/volk/include/volk/constants.h b/volk/include/volk/constants.h
deleted file mode 100644
index bac0e36..0000000
--- a/volk/include/volk/constants.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2006,2009,2013 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_VOLK_CONSTANTS_H
-#define INCLUDED_VOLK_CONSTANTS_H
-
-#include <volk/volk_common.h>
-
-__VOLK_DECL_BEGIN
-
-VOLK_API char* volk_prefix();
-VOLK_API char* volk_build_date();
-VOLK_API char* volk_version();
-VOLK_API char* volk_c_compiler();
-VOLK_API char* volk_compiler_flags();
-VOLK_API char* volk_available_machines();
-
-__VOLK_DECL_END
-
-#endif /* INCLUDED_VOLK_CONSTANTS_H */
diff --git a/volk/include/volk/volk_common.h b/volk/include/volk/volk_common.h
deleted file mode 100644
index 38263d5..0000000
--- a/volk/include/volk/volk_common.h
+++ /dev/null
@@ -1,96 +0,0 @@
-#ifndef INCLUDED_LIBVOLK_COMMON_H
-#define INCLUDED_LIBVOLK_COMMON_H
-
-////////////////////////////////////////////////////////////////////////
-// Cross-platform attribute macros
-////////////////////////////////////////////////////////////////////////
-#if defined __GNUC__
-#  define __VOLK_ATTR_ALIGNED(x) __attribute__((aligned(x)))
-#  define __VOLK_ATTR_UNUSED     __attribute__((unused))
-#  define __VOLK_ATTR_INLINE     __attribute__((always_inline))
-#  define __VOLK_ATTR_DEPRECATED __attribute__((deprecated))
-#  if __GNUC__ >= 4
-#    define __VOLK_ATTR_EXPORT   __attribute__((visibility("default")))
-#    define __VOLK_ATTR_IMPORT   __attribute__((visibility("default")))
-#  else
-#    define __VOLK_ATTR_EXPORT
-#    define __VOLK_ATTR_IMPORT
-#  endif
-#elif _MSC_VER
-#  define __VOLK_ATTR_ALIGNED(x) __declspec(align(x))
-#  define __VOLK_ATTR_UNUSED
-#  define __VOLK_ATTR_INLINE     __forceinline
-#  define __VOLK_ATTR_DEPRECATED __declspec(deprecated)
-#  define __VOLK_ATTR_EXPORT     __declspec(dllexport)
-#  define __VOLK_ATTR_IMPORT     __declspec(dllimport)
-#else
-#  define __VOLK_ATTR_ALIGNED(x)
-#  define __VOLK_ATTR_UNUSED
-#  define __VOLK_ATTR_INLINE
-#  define __VOLK_ATTR_DEPRECATED
-#  define __VOLK_ATTR_EXPORT
-#  define __VOLK_ATTR_IMPORT
-#endif
-
-////////////////////////////////////////////////////////////////////////
-// Ignore annoying warnings in MSVC
-////////////////////////////////////////////////////////////////////////
-#if defined(_MSC_VER)
-#  pragma warning(disable: 4244) //'conversion' conversion from 'type1' to 
'type2', possible loss of data
-#  pragma warning(disable: 4305) //'identifier' : truncation from 'type1' to 
'type2'
-#endif
-
-////////////////////////////////////////////////////////////////////////
-// C-linkage declaration macros
-// FIXME: due to the usage of complex.h, require gcc for c-linkage
-////////////////////////////////////////////////////////////////////////
-#if defined(__cplusplus) && (__GNUC__)
-#  define __VOLK_DECL_BEGIN extern "C" {
-#  define __VOLK_DECL_END }
-#else
-#  define __VOLK_DECL_BEGIN
-#  define __VOLK_DECL_END
-#endif
-
-////////////////////////////////////////////////////////////////////////
-// Define VOLK_API for library symbols
-// http://gcc.gnu.org/wiki/Visibility
-////////////////////////////////////////////////////////////////////////
-#ifdef volk_EXPORTS
-#  define VOLK_API __VOLK_ATTR_EXPORT
-#else
-#  define VOLK_API __VOLK_ATTR_IMPORT
-#endif
-
-////////////////////////////////////////////////////////////////////////
-// The bit128 union used by some
-////////////////////////////////////////////////////////////////////////
-#include <inttypes.h>
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-#endif
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-#endif
-
-union bit128{
-  uint16_t i16[8];
-  uint32_t i[4];
-  float f[4];
-  double d[2];
-
-  #ifdef LV_HAVE_SSE
-  __m128 float_vec;
-  #endif
-
-  #ifdef LV_HAVE_SSE2
-  __m128i int_vec;
-  __m128d double_vec;
-  #endif
-};
-
-#define bit128_p(x) ((union bit128 *)(x))
-
-#endif /*INCLUDED_LIBVOLK_COMMON_H*/
diff --git a/volk/include/volk/volk_complex.h b/volk/include/volk/volk_complex.h
deleted file mode 100644
index 5bd9250..0000000
--- a/volk/include/volk/volk_complex.h
+++ /dev/null
@@ -1,86 +0,0 @@
-#ifndef INCLUDE_VOLK_COMPLEX_H
-#define INCLUDE_VOLK_COMPLEX_H
-
-/*!
- * \brief Provide typedefs and operators for all complex types in C and C++.
- *
- * The typedefs encompass all signed integer and floating point types.
- * Each operator function is intended to work across all data types.
- * Under C++, these operators are defined as inline templates.
- * Under C, these operators are defined as preprocessor macros.
- * The use of macros makes the operators agnostic to the type.
- *
- * The following operator functions are defined:
- * - lv_cmake - make a complex type from components
- * - lv_creal - get the real part of the complex number
- * - lv_cimag - get the imaginary part of the complex number
- * - lv_conj - take the conjugate of the complex number
- */
-
-#ifdef __cplusplus
-
-#include <complex>
-#include <stdint.h>
-
-typedef std::complex<int8_t>  lv_8sc_t;
-typedef std::complex<int16_t> lv_16sc_t;
-typedef std::complex<int32_t> lv_32sc_t;
-typedef std::complex<int64_t> lv_64sc_t;
-typedef std::complex<float>   lv_32fc_t;
-typedef std::complex<double>  lv_64fc_t;
-
-template <typename T> inline std::complex<T> lv_cmake(const T &r, const T &i){
-    return std::complex<T>(r, i);
-}
-
-template <typename T> inline typename T::value_type lv_creal(const T &x){
-    return x.real();
-}
-
-template <typename T> inline typename T::value_type lv_cimag(const T &x){
-    return x.imag();
-}
-
-template <typename T> inline T lv_conj(const T &x){
-    return std::conj(x);
-}
-
-#else /* __cplusplus */
-
-#include <complex.h>
-
-typedef char complex         lv_8sc_t;
-typedef short complex        lv_16sc_t;
-typedef long complex         lv_32sc_t;
-typedef long long complex    lv_64sc_t;
-typedef float complex        lv_32fc_t;
-typedef double complex       lv_64fc_t;
-
-#define lv_cmake(r, i) ((r) + _Complex_I*(i))
-
-// When GNUC is available, use the complex extensions.
-// The extensions always return the correct value type.
-// http://gcc.gnu.org/onlinedocs/gcc/Complex.html
-#ifdef __GNUC__
-
-#define lv_creal(x) (__real__(x))
-
-#define lv_cimag(x) (__imag__(x))
-
-#define lv_conj(x) (~(x))
-
-// When not available, use the c99 complex function family,
-// which always returns double regardless of the input type.
-#else /* __GNUC__ */
-
-#define lv_creal(x) (creal(x))
-
-#define lv_cimag(x) (cimag(x))
-
-#define lv_conj(x) (conj(x))
-
-#endif /* __GNUC__ */
-
-#endif /* __cplusplus */
-
-#endif /* INCLUDE_VOLK_COMPLEX_H */
diff --git a/volk/include/volk/volk_malloc.h b/volk/include/volk/volk_malloc.h
deleted file mode 100644
index 6ec7391..0000000
--- a/volk/include/volk/volk_malloc.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/* -*- c -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_VOLK_MALLOC_H
-#define INCLUDED_VOLK_MALLOC_H
-
-#include <volk/volk_common.h>
-#include <stdlib.h>
-
-__VOLK_DECL_BEGIN
-
-/*!
- * \brief Allocate \p size bytes of data aligned to \p alignment.
- *
- * \details
- * Because we don't have a standard method to allocate buffers in
- * memory that are guaranteed to be on an alignment, VOLK handles this
- * itself. The volk_malloc function behaves like malloc in that it
- * returns a pointer to the allocated memory. However, it also takes
- * in an alignment specfication, which is usually something like 16 or
- * 32 to ensure that the aligned memory is located on a particular
- * byte boundary for use with SIMD.
- *
- * Internally, the volk_malloc first checks if the compiler is C11
- * compliant and uses the new aligned_alloc method. If not, it checks
- * if the system is POSIX compliant and uses posix_memalign. If that
- * fails, volk_malloc handles the memory allocation and alignment
- * internally.
- *
- * Because of the ways in which volk_malloc may allocate memory, it is
- * important to always free volk_malloc pointers using volk_free.
- *
- * \param size The number of bytes to allocate.
- * \param alignment The byte alignment of the allocated memory.
- * \return pointer to aligned memory.
- */
-VOLK_API void *volk_malloc(size_t size, size_t alignment);
-
-/*!
- * \brief Free's memory allocated by volk_malloc.
- * \param aptr The aligned pointer allocaed by volk_malloc.
- */
-VOLK_API void volk_free(void *aptr);
-
-__VOLK_DECL_END
-
-#endif /* INCLUDED_VOLK_MALLOC_H */
diff --git a/volk/include/volk/volk_prefs.h b/volk/include/volk/volk_prefs.h
deleted file mode 100644
index 690e5f9..0000000
--- a/volk/include/volk/volk_prefs.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef INCLUDED_VOLK_PREFS_H
-#define INCLUDED_VOLK_PREFS_H
-
-#include <volk/volk_common.h>
-#include <stdlib.h>
-
-__VOLK_DECL_BEGIN
-
-typedef struct volk_arch_pref
-{
-    char name[128];   //name of the kernel
-    char impl_a[128]; //best aligned impl
-    char impl_u[128]; //best unaligned impl
-} volk_arch_pref_t;
-
-////////////////////////////////////////////////////////////////////////
-// get path to volk_config profiling info
-////////////////////////////////////////////////////////////////////////
-VOLK_API void volk_get_config_path(char *);
-
-////////////////////////////////////////////////////////////////////////
-// load prefs into global prefs struct
-////////////////////////////////////////////////////////////////////////
-VOLK_API size_t volk_load_preferences(volk_arch_pref_t **);
-
-__VOLK_DECL_END
-
-#endif //INCLUDED_VOLK_PREFS_H
diff --git a/volk/kernels/README.txt b/volk/kernels/README.txt
deleted file mode 100644
index 5dd7434..0000000
--- a/volk/kernels/README.txt
+++ /dev/null
@@ -1,67 +0,0 @@
-########################################################################
-# How to create custom kernel dispatchers
-########################################################################
-A kernel dispatcher is kernel implementation that calls other kernel 
implementations.
-By default, a dispatcher is generated by the build system for every kernel 
such that:
-  * the best aligned implemention is called when all pointer arguments are 
aligned,
-  * and otherwise the best unaligned implementation is called.
-
-The author of a VOLK kernel may create a custom dispatcher,
-to be called in place of the automatically generated one.
-A custom dispatcher may be useful to handle head and tail cases,
-or to implement different alignment and bounds checking logic.
-
-########################################################################
-# Code for an example dispatcher w/ tail case
-########################################################################
-#include <volk/volk_common.h>
-
-#ifdef LV_HAVE_DISPATCHER
-
-static inline void volk_32f_x2_add_32f_dispatcher(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points)
-{
-    const unsigned int num_points_r = num_points%4;
-    const unsigned int num_points_x = num_points - num_points_r;
-
-    if (volk_is_aligned(VOLK_OR_PTR(cVector, VOLK_OR_PTR(aVector, bVector))))
-    {
-        volk_32f_x2_add_32f_a(cVector, aVector, bVector, num_points_x);
-    }
-    else
-    {
-        volk_32f_x2_add_32f_u(cVector, aVector, bVector, num_points_x);
-    }
-
-    volk_32f_x2_add_32f_g(cVector+num_points_x, aVector+num_points_x, 
bVector+num_points_x, num_points_r);
-}
-
-#endif //LV_HAVE_DISPATCHER
-
-########################################################################
-# Code for an example dispatcher w/ tail case and accumulator
-########################################################################
-#include <volk/volk_common.h>
-
-#ifdef LV_HAVE_DISPATCHER
-
-static inline void volk_32f_x2_dot_prod_32f_dispatcher(float * result, const 
float * input, const float * taps, unsigned int num_points)
-{
-    const unsigned int num_points_r = num_points%16;
-    const unsigned int num_points_x = num_points - num_points_r;
-
-    if (volk_is_aligned(VOLK_OR_PTR(input, taps)))
-    {
-        volk_32f_x2_dot_prod_32f_a(result, input, taps, num_points_x);
-    }
-    else
-    {
-        volk_32f_x2_dot_prod_32f_u(result, input, taps, num_points_x);
-    }
-
-    float result_tail = 0;
-    volk_32f_x2_dot_prod_32f_g(&result_tail, input+num_points_x, 
taps+num_points_x, num_points_r);
-
-    *result += result_tail;
-}
-
-#endif //LV_HAVE_DISPATCHER
diff --git a/volk/kernels/volk/asm/neon/volk_16i_max_star_horizontal_16i.s 
b/volk/kernels/volk/asm/neon/volk_16i_max_star_horizontal_16i.s
deleted file mode 100644
index 2099355..0000000
--- a/volk/kernels/volk/asm/neon/volk_16i_max_star_horizontal_16i.s
+++ /dev/null
@@ -1,52 +0,0 @@
-@ static inline void volk_16i_max_star_horizontal_16i_neonasm(float* cVector, 
const float* aVector, const float* bVector, unsigned int num_points);
-       .global volk_16i_max_star_horizontal_16i_neonasm
-volk_16i_max_star_horizontal_16i_neonasm:
-       @ r0 - cVector: pointer to output array
-       @ r1 - aVector: pointer to input array 1
-       @ r2 - num_points: number of items to process
-
-volk_16i_max_star_horizontal_16i_neonasm:
-    pld     [r1:128]
-    push    {r4, r5, r6}    @ preserve register states
-    lsrs    r5, r2, #4      @ 1/16th points = num_points/16
-    vmov.i32    q12, #0     @ q12 = [0,0,0,0]
-    beq .smallvector        @ less than 16 elements in vector
-    mov r4, r1              @ r4 = aVector
-    mov r12, r0             @ gcc calls this ip
-    mov r3, #0              @ number = 0
-
-.loop1:
-    vld2.16 {d16-d19}, [r4]! @ aVector, interleaved load
-    pld [r4:128]
-    add r3, r3, #1           @ number += 1
-    cmp r3, r5               @ number < 1/16th points
-    vsub.i16    q10, q8, q9  @ subtraction
-    vcge.s16    q11, q10, #0 @ result > 0?
-    vcgt.s16    q10, q12, q10 @ result < 0?
-    vand.i16    q11, q8, q11 @ multiply by comparisons
-    vand.i16    q10, q9, q10 @ multiply by other comparison
-    vadd.i16    q10, q11, q10 @ add results to get max
-    vst1.16 {d20-d21}, [r12]! @ store the results
-    bne .loop1               @ at least 16 items left
-    add r1, r1, r3, lsl #5   
-    add r0, r0, r3, lsl #4
-.smallvector:
-    ands    r2, r2, #15
-    beq .end
-    mov r3, #0
-.loop3:
-    ldrh    r4, [r1]
-    bic r5, r3, #1
-    ldrh    ip, [r1, #2]
-    add r3, r3, #2
-    add r1, r1, #4
-    rsb r6, ip, r4
-    sxth    r6, r6
-    cmp r6, #0
-    movgt   ip, r4
-    cmp r3, r2
-    strh    ip, [r0, r5]
-    bcc .loop3
-.end:
-    pop {r4, r5, r6}
-    bx  lr
diff --git a/volk/kernels/volk/asm/neon/volk_32f_s32f_multiply_32f_neonasm.s 
b/volk/kernels/volk/asm/neon/volk_32f_s32f_multiply_32f_neonasm.s
deleted file mode 100644
index 235d375..0000000
--- a/volk/kernels/volk/asm/neon/volk_32f_s32f_multiply_32f_neonasm.s
+++ /dev/null
@@ -1,52 +0,0 @@
-@ static inline void volk_32f_s32f_multiply_32f_neonasm(float* cVector, const 
float* aVector, const float* bVector, unsigned int num_points);
-       .global volk_32f_s32f_multiply_32f_neonasm
-volk_32f_s32f_multiply_32f_neonasm:
-       @ r0 - cVector: pointer to output array
-       @ r1 - aVector: pointer to input array 1
-       @ r2 - bVector: pointer to input array 2
-       @ r3 - num_points: number of items to process
-
-       stmfd   sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12}    @ prologue - 
save register states
-
-
-    @ quarter_points = num_points / 4
-       movs r11, r3, lsr #2
-       beq .loop2 @ if zero into quarterPoints
-
-    @ number = quarter_points
-       mov r10, r3
-    @ copy address of input vector
-    mov r4, r1
-    @ copy address of output vector
-    mov r5, r0
-
-    @ load the scalar to a quad register
-    @ vmov.32 d2[0], r2
-    @ The scalar might be in s0, not totally sure
-    vdup.32 q2, d0[0]
-
-    @ this is giving fits. Current theory is hf has something to do with it
-    .loop1:
-    @  vld1.32 {q1}, [r4:128]! @ aVal
-    @  vmul.f32 q3, q1, q2
-    @  vst1.32 {q3}, [r5:128]! @ cVal
-    @
-    @  subs r10, r10, #1
-    @  bne     .loop1  @ first loop
-
-    @ number = quarter_points * 4
-    mov        r10, r11, asl #2
-
-    .loop2:
-    @   cmp    num_points, number
-    @   bls    .done
-    @
-    @   vld1.32 {d0[0]}, [aVector]!
-    @   vmul.f32 s2, s0, s4
-    @   vst1.32 {d1[0]}, [cVector]!
-    @   add number, number, #1
-    @   b .loop2
-
-.done:
-       ldmfd   sp!, {r4, r5, r6, r7, r8, r9, r10, r11, r12} @ epilogue - 
restore register states
-       bx      lr
diff --git a/volk/kernels/volk/asm/neon/volk_32f_x2_add_32f_a_neonasm.s 
b/volk/kernels/volk/asm/neon/volk_32f_x2_add_32f_a_neonasm.s
deleted file mode 100644
index 09e8638..0000000
--- a/volk/kernels/volk/asm/neon/volk_32f_x2_add_32f_a_neonasm.s
+++ /dev/null
@@ -1,54 +0,0 @@
-@ static inline void volk_32f_x2_add_32f_a_neonasm(float* cVector, const 
float* aVector, const float* bVector, unsigned int num_points);
-       .global volk_32f_x2_add_32f_a_neonasm
-volk_32f_x2_add_32f_a_neonasm:
-       @ r0 - cVector: pointer to output array
-       @ r1 - aVector: pointer to input array 1
-       @ r2 - bVector: pointer to input array 2
-       @ r3 - num_points: number of items to process
-       cVector .req r0
-       aVector .req r1
-       bVector .req r2
-       num_points .req r3
-       quarterPoints .req r7
-       number .req r8
-       aVal .req q0 @ d0-d1
-       bVal .req q1 @ d2-d3
-       cVal .req q2 @ d4-d5
-
-       @ AAPCS Section 5.1.1
-       @ A subroutine must preserve the contents of the registers r4-r8, r10, 
r11 and SP
-       stmfd   sp!, {r7, r8, sl}       @ prologue - save register states
-
-       movs quarterPoints, num_points, lsr #2
-       beq .loop2 @ if zero into quarterPoints
-
-       mov     number, #0      @ number, 0
-.loop1:
-       pld [aVector, #128] @ pre-load hint - this is implementation specific!
-       pld [bVector, #128] @ pre-load hint - this is implementation specific!
-
-       vld1.32 {d0-d1}, [aVector:128]! @ aVal
-       add     number, number, #1
-       vld1.32 {d2-d3}, [bVector:128]! @ bVal
-       vadd.f32 cVal, bVal, aVal
-       cmp     number, quarterPoints
-       vst1.32 {d4-d5}, [cVector:128]! @ cVal
-
-       ble     .loop1  @ first loop
-
-       mov     number, quarterPoints, asl #2
-
-.loop2:
-       cmp     num_points, number
-       bls     .done
-
-       vld1.32 {d0[0]}, [aVector]!
-       vld1.32 {d0[1]}, [bVector]!
-       vadd.f32 s2, s1, s0
-       vst1.32 {d1[0]}, [cVector]!
-       add number, number, #1
-       b .loop2
-
-.done:
-       ldmfd   sp!, {r7, r8, sl} @ epilogue - restore register states
-       bx      lr
diff --git a/volk/kernels/volk/asm/neon/volk_32f_x2_add_32f_a_neonpipeline.s 
b/volk/kernels/volk/asm/neon/volk_32f_x2_add_32f_a_neonpipeline.s
deleted file mode 100644
index 4c8af8b..0000000
--- a/volk/kernels/volk/asm/neon/volk_32f_x2_add_32f_a_neonpipeline.s
+++ /dev/null
@@ -1,68 +0,0 @@
-@ static inline void volk_32f_x2_add_32f_a_neonpipeline(float* cVector, const 
float* aVector, const float* bVector, unsigned int num_points);
-       .global volk_32f_x2_add_32f_a_neonpipeline
-volk_32f_x2_add_32f_a_neonpipeline:
-       @ r0 - cVector: pointer to output array
-       @ r1 - aVector: pointer to input array 1
-       @ r2 - bVector: pointer to input array 2
-       @ r3 - num_points: number of items to process
-       cVector .req r0
-       aVector .req r1
-       bVector .req r2
-       num_points .req r3
-       quarterPoints .req r7
-       number .req r8
-       aVal .req q0 @ d0-d1
-       bVal .req q1 @ d2-d3
-       cVal .req q2 @ d4-d5
-
-       stmfd   sp!, {r7, r8, sl}       @ prologue - save register states
-
-       pld [aVector, #128] @ pre-load hint - this is implementation specific!
-       pld [bVector, #128] @ pre-load hint - this is implementation specific!
-
-       movs quarterPoints, num_points, lsr #2
-       beq .loop2 @ if zero into quarterPoints
-
-       mov number, quarterPoints
-
-       @ Optimizing for pipeline
-       vld1.32 {d0-d1}, [aVector:128]! @ aVal
-       vld1.32 {d2-d3}, [bVector:128]! @ bVal
-       subs number, number, #1
-
-.loop1:
-       pld [aVector, #128] @ pre-load hint - this is implementation specific!
-       pld [bVector, #128] @ pre-load hint - this is implementation specific!
-       vadd.f32 cVal, bVal, aVal
-       vld1.32 {d0-d1}, [aVector:128]! @ aVal
-       vld1.32 {d2-d3}, [bVector:128]! @ bVal
-       vst1.32 {d4-d5}, [cVector:128]! @ cVal
-
-       subs number, number, #1
-       bne     .loop1  @ first loop
-
-       @ One more time
-       vadd.f32 cVal, bVal, aVal
-       vst1.32 {d4-d5}, [cVector:128]! @ cVal
-
-       mov     number, quarterPoints, asl #2
-
-.loop2:
-       cmp     num_points, number
-       bls     .done
-
-       vld1.32 {d0[0]}, [aVector]!
-       vld1.32 {d0[1]}, [bVector]!
-       vadd.f32 s2, s1, s0
-       vst1.32 {d1[0]}, [cVector]!
-       add number, number, #1
-       b .loop2
-
-.done:
-       ldmfd   sp!, {r7, r8, sl} @ epilogue - restore register states
-       bx      lr
-
-
-
-
-
diff --git a/volk/kernels/volk/asm/neon/volk_32f_x2_dot_prod_32f_neonasm.s 
b/volk/kernels/volk/asm/neon/volk_32f_x2_dot_prod_32f_neonasm.s
deleted file mode 100644
index 6457957..0000000
--- a/volk/kernels/volk/asm/neon/volk_32f_x2_dot_prod_32f_neonasm.s
+++ /dev/null
@@ -1,58 +0,0 @@
-@ static inline void volk_32f_x2_dot_prod_32f_neonasm(float* cVector, const 
float* aVector, const float* bVector, unsigned int num_points);
-       .global volk_32f_x2_dot_prod_32f_neonasm
-volk_32f_x2_dot_prod_32f_neonasm:
-       @ r0 - cVector: pointer to output array
-       @ r1 - aVector: pointer to input array 1
-       @ r2 - bVector: pointer to input array 2
-       @ r3 - num_points: number of items to process
-       cVector .req r0
-       aVector .req r1
-       bVector .req r2
-       num_points .req r3
-       quarterPoints .req r7
-       number .req r8
-       aVal .req q0 @ d0-d1
-       bVal .req q1 @ d2-d3
-       cVal .req q2 @ d4-d5
-
-       @ AAPCS Section 5.1.1
-       @ A subroutine must preserve the contents of the registers r4-r8, r10, 
r11 and SP
-       stmfd   sp!, {r7, r8, sl}       @ prologue - save register states
-
-    veor.32 q0, q0, q0
-       movs quarterPoints, num_points, lsr #2
-       beq .loop2 @ if zero into quarterPoints
-
-       mov     number, #0      @ number, 0
-.loop1:
-       pld [aVector, #128] @ pre-load hint - this is implementation specific!
-       pld [bVector, #128] @ pre-load hint - this is implementation specific!
-
-       vld1.32 {q1}, [aVector:128]!    @ aVal
-       vld1.32 {q2}, [bVector:128]!    @ bVal
-    vmla.f32 q0, q1, q2
-
-       add     number, number, #1
-       cmp     number, quarterPoints
-       ble     .loop1  @ first loop
-    
-    @ strange order comes from trying to schedule instructions
-    vadd.f32 s0, s0, s1
-    vadd.f32 s2, s2, s3
-       mov     number, quarterPoints, asl #2
-    vadd.f32 s0, s0, s2
-
-.loop2:
-       cmp     num_points, number
-       bls     .done
-
-       vld1.32 {d1[0]}, [aVector]!
-       vld1.32 {d1[1]}, [bVector]!
-       vmla.f32 s0, s2, s3
-       add number, number, #1
-       b .loop2
-
-.done:
-       vstr s0, [cVector]
-       ldmfd   sp!, {r7, r8, sl} @ epilogue - restore register states
-       bx      lr
diff --git a/volk/kernels/volk/asm/neon/volk_32f_x2_dot_prod_32f_neonasm_opts.s 
b/volk/kernels/volk/asm/neon/volk_32f_x2_dot_prod_32f_neonasm_opts.s
deleted file mode 100644
index 3093edc..0000000
--- a/volk/kernels/volk/asm/neon/volk_32f_x2_dot_prod_32f_neonasm_opts.s
+++ /dev/null
@@ -1,116 +0,0 @@
-@ static inline void volk_32f_x2_dot_prod_32f_neonasm_opts(float* cVector, 
const float* aVector, const float* bVector, unsigned int num_points);
-    @ r0 = cVector
-    @ r1 = aVector
-    @ r2 = bVector
-    @ r3 = num_points
-    .global    volk_32f_x2_dot_prod_32f_neonasm_opts
-volk_32f_x2_dot_prod_32f_neonasm_opts:
-     push    {r4, r5, r6, r7, r8, r9, r10, r11}
-    @ sixteenth_points = num_points / 16
-     lsrs       r8, r3, #4
-     sub        r13, r13, #16 @ subtracting 16 from stack pointer?, wat?
-    @ 0 out neon accumulators
-     veor       q0, q3, q3
-     veor       q1, q3, q3
-     veor       q2, q3, q3
-     veor       q3, q3, q3
-     beq        .smallvector @ if less than 16 points skip main loop
-     mov        r7, r2  @ copy input ptrs
-     mov        r6, r1  @ copy input ptrs
-     mov        r5, #0  @ loop counter
-.mainloop:
-     vld4.32    {d16,d18,d20,d22}, [r6]!
-     add        r5, r5, #1 @ inc loop counter
-     cmp        r5, r8     @ loop counter < sixteenth_points?
-     vld4.32    {d24,d26,d28,d30}, [r7]!
-     vld4.32    {d17,d19,d21,d23}, [r6]!
-     vld4.32    {d25,d27,d29,d31}, [r7]!
-     vmla.f32   q3, q8, q12
-     vmla.f32   q0, q13, q9
-     vmla.f32   q1, q14, q10
-     vmla.f32   q2, q15, q11
-     bne        .mainloop
-     lsl        r12, r8, #6 @ r12=r8/64
-     add        r1, r1, r12
-     add        r2, r2, r12
-.smallvector: @ actually this can be skipped for small vectors
-     vadd.f32   q3, q3, q0
-     lsl        r8, r8, #4 @ sixteenth_points * 16
-     cmp        r3, r8     @ num_points < sixteenth_points*16?
-     vadd.f32   q2, q1, q2
-     vadd.f32   q3, q2, q3 @ sum of 4 accumulators in to q3
-     vadd.f32   s15, s12, s15 @ q3 is s12-s15, so reduce to a single float
-     vadd.f32   s15, s15, s13
-     vadd.f32   s15, s15, s14
-     bls        .done      @ if vector is multiple of 16 then finish
-     sbfx       r11, r1, #2, #1 @ check alignment
-     rsb        r9, r8, r3
-     and        r11, r11, #3
-     mov        r6, r1
-     cmp        r11, r9
-     movcs      r11, r9
-     cmp        r9, #3
-     movls      r11, r9
-     cmp        r11, #0
-     beq        .nothingtodo
-     mov        r5, r2
-     mov        r12, r8
-.dlabel5:
-     add        r12, r12, #1
-     vldmia     r6!, {s14}
-     rsb        r4, r8, r12
-     vldmia     r5!, {s13}
-     cmp        r4, r11
-     vmla.f32   s15, s13, s14
-     mov        r7, r6
-     mov        r4, r5
-     bcc        .dlabel5
-     cmp        r9, r11
-     beq        .done
-.dlabel8:
-     rsb        r9, r11, r9
-     lsr        r8, r9, #2
-     lsls       r10, r8, #2
-     beq        .dlabel6
-     lsl        r6, r11, #2
-     veor       q8, q8, q8
-     add        r1, r1, r6
-     add        r6, r2, r6
-     mov        r5, #0
-.dlabel9:
-     add        r5, r5, #1
-     vld1.32    {d20-d21}, [r6]!
-     cmp        r5, r8
-     vld1.64    {d18-d19}, [r1 :64]!
-     vmla.f32   q8, q10, q9
-     bcc        .dlabel9
-     vadd.f32   d16, d16, d17
-     lsl        r2, r10, #2
-     veor       q9, q9, q9
-     add        r7, r7, r2
-     vpadd.f32  d6, d16, d16
-     add        r4, r4, r2
-     cmp        r9, r10
-     add        r12, r12, r10
-     vadd.f32   s15, s15, s12
-     beq        .done
-.dlabel6:
-     mov        r2, r7
-.dlabel7:
-     add        r12, r12, #1
-     vldmia     r2!, {s13}
-     cmp        r3, r12
-     vldmia     r4!, {s14}
-     vmla.f32   s15, s13, s14
-     bhi        .dlabel7
-.done:
-     vstr       s15, [r0]
-     add        r13, r13, #16
-     pop        {r4, r5, r6, r7, r8, r9, r10, r11}
-     bx         lr @ lr is the return address
-.nothingtodo:
-     mov        r12, r8
-     mov        r4, r2
-     mov        r7, r1
-     b          .dlabel8
-
diff --git a/volk/kernels/volk/asm/neon/volk_32fc_32f_dot_prod_32fc_a_neonasm.s 
b/volk/kernels/volk/asm/neon/volk_32fc_32f_dot_prod_32fc_a_neonasm.s
deleted file mode 100644
index 481cade..0000000
--- a/volk/kernels/volk/asm/neon/volk_32fc_32f_dot_prod_32fc_a_neonasm.s
+++ /dev/null
@@ -1,79 +0,0 @@
-@ static inline void volk_32fc_32f_dot_prod_32fc_a_neonasm ( lv_32fc_t* 
result, const  lv_32fc_t* input, const  float* taps, unsigned int num_points) {
-    .global    volk_32fc_32f_dot_prod_32fc_a_neonasm
-    volk_32fc_32f_dot_prod_32fc_a_neonasm:
-    @ r0 - result: pointer to output array (32fc)
-    @ r1 - input: pointer to input array 1 (32fc)
-    @ r2 - taps: pointer to input array 2 (32f)
-    @ r3 - num_points: number of items to process
-    
-    result .req r0
-    input .req r1
-    taps .req r2
-    num_points .req r3
-    quarterPoints .req r7
-    number .req r8
-    @ Note that according to the ARM EABI (AAPCS) Section 5.1.1:
-    @ registers s16-s31 (d8-d15, q4-q7) must be preserved across subroutine 
calls;
-    @ registers s0-s15 (d0-d7, q0-q3) do not need to be preserved
-    @ registers d16-d31 (q8-q15), if present, do not need to be preserved.
-    realAccQ   .req q0 @ d0-d1/s0-s3
-    compAccQ   .req q1 @ d2-d3/s4-s7
-    realAccS   .req s0 @ d0[0]
-    compAccS   .req s4 @ d2[0]
-    tapsVal    .req q2 @ d4-d5
-    outVal     .req q3 @ d6-d7
-    realMul    .req q8 @ d8-d9
-    compMul    .req q9 @ d16-d17
-    inRealVal  .req q10 @ d18-d19
-    inCompVal  .req q11 @ d20-d21
-    
-    stmfd      sp!, {r7, r8, sl}       @ prologue - save register states
-    
-    veor realAccQ, realAccQ @ zero out accumulators
-    veor compAccQ, compAccQ @ zero out accumulators
-    movs quarterPoints, num_points, lsr #2
-    beq .loop2 @ if zero into quarterPoints
-    
-    mov number, quarterPoints
-
-.loop1:
-    @ do work here
-    @pld [taps, #128] @ pre-load hint - this is implementation specific!
-    @pld [input, #128] @ pre-load hint - this is implementation specific!
-    vld1.32 {d4-d5}, [taps:128]! @ tapsVal
-    vld2.32 {d20-d23}, [input:128]! @ inRealVal, inCompVal
-    vmul.f32 realMul, tapsVal, inRealVal
-    vmul.f32 compMul, tapsVal, inCompVal
-    vadd.f32 realAccQ, realAccQ, realMul
-    vadd.f32 compAccQ, compAccQ, compMul
-    subs number, number, #1
-    bne        .loop1  @ first loop
-
-    @ Sum up across realAccQ and compAccQ
-    vpadd.f32 d0, d0, d1      @ realAccQ +-> d0
-    vpadd.f32 d2, d2, d3      @ compAccQ +-> d2
-    vadd.f32 realAccS, s0, s1 @ sum the contents of d0 together (realAccQ)
-    vadd.f32 compAccS, s4, s5 @ sum the contents of d2 together (compAccQ)
-    @ critical values are now in s0 (realAccS), s4 (realAccQ)
-       mov     number, quarterPoints, asl #2
-
-.loop2:
-    cmp        num_points, number
-    bls        .done
-    
-    vld1.32 {d4[0]}, [taps]! @ s8
-    vld2.32 {d5[0],d6[0]}, [input]! @ s10, s12
-    vmul.f32 s5, s8, s10
-    vmul.f32 s6, s8, s12
-    vadd.f32 realAccS, realAccS, s5
-    vadd.f32 compAccS, compAccS, s6
-    
-    add number, number, #1
-    b .loop2
-
-.done:
-    vst1.32 {d0[0]}, [result]! @ realAccS
-    vst1.32 {d2[0]}, [result]  @ compAccS
-
-    ldmfd      sp!, {r7, r8, sl} @ epilogue - restore register states
-    bx lr
diff --git 
a/volk/kernels/volk/asm/neon/volk_32fc_32f_dot_prod_32fc_a_neonasmpipeline.s 
b/volk/kernels/volk/asm/neon/volk_32fc_32f_dot_prod_32fc_a_neonasmpipeline.s
deleted file mode 100644
index aaf70e2..0000000
--- a/volk/kernels/volk/asm/neon/volk_32fc_32f_dot_prod_32fc_a_neonasmpipeline.s
+++ /dev/null
@@ -1,86 +0,0 @@
-@ static inline void volk_32fc_32f_dot_prod_32fc_a_neonasmpipeline ( 
lv_32fc_t* result, const  lv_32fc_t* input, const  float* taps, unsigned int 
num_points) {
-       .global volk_32fc_32f_dot_prod_32fc_a_neonasmpipeline
-volk_32fc_32f_dot_prod_32fc_a_neonasmpipeline:
-       @ r0 - result: pointer to output array (32fc)
-       @ r1 - input: pointer to input array 1 (32fc)
-       @ r2 - taps: pointer to input array 2 (32f)
-       @ r3 - num_points: number of items to process
-
-       result .req r0
-       input .req r1
-       taps .req r2
-       num_points .req r3
-       quarterPoints .req r7
-       number .req r8
-       @ Note that according to the ARM EABI (AAPCS) Section 5.1.1:
-    @ registers s16-s31 (d8-d15, q4-q7) must be preserved across subroutine 
calls;
-    @ registers s0-s15 (d0-d7, q0-q3) do not need to be preserved
-    @ registers d16-d31 (q8-q15), if present, do not need to be preserved.
-       realAccQ   .req q0 @ d0-d1/s0-s3
-       compAccQ   .req q1 @ d2-d3/s4-s7
-       realAccS   .req s0 @ d0[0]
-       compAccS   .req s4 @ d2[0]
-       tapsVal    .req q2 @ d4-d5
-       outVal     .req q3 @ d6-d7
-    realMul    .req q8 @ d8-d9
-    compMul    .req q9 @ d16-d17
-    inRealVal  .req q10 @ d18-d19
-       inCompVal  .req q11 @ d20-d21
-
-       stmfd   sp!, {r7, r8, sl}       @ prologue - save register states
-
-    pld [taps, #128] @ pre-load hint - this is implementation specific!
-       pld [input, #128] @ pre-load hint - this is implementation specific!
-
-       veor realAccQ, realAccQ @ zero out accumulators
-       veor compAccQ, compAccQ @ zero out accumulators
-       movs quarterPoints, num_points, lsr #2
-       beq .loop2 @ if zero into quarterPoints
-
-       mov number, quarterPoints
-       @ Optimizing for pipeline
-       vld1.32 {d4-d5}, [taps:128]! @ tapsVal
-       vld2.32 {d18-d21}, [input:128]! @ inRealVal, inCompVal
-       subs number, number, #1
-
-.loop1:
-       @ do work here
-       pld [taps, #128] @ pre-load hint - this is implementation specific!
-       pld [input, #128] @ pre-load hint - this is implementation specific!
-       vmul.f32 realMul, tapsVal, inRealVal
-       vmul.f32 compMul, tapsVal, inCompVal
-       vadd.f32 realAccQ, realAccQ, realMul
-       vadd.f32 compAccQ, compAccQ, compMul
-       vld1.32 {d4-d5}, [taps:128]! @ tapsVal
-       vld2.32 {d18-d21}, [input:128]! @ inRealVal, inCompVal
-
-       subs number, number, #1
-       bne     .loop1  @ first loop
-
-    @ Sum up across realAccQ and compAccQ
-    vpadd.f32 d0, d0, d1      @ realAccQ +-> d0
-    vpadd.f32 d2, d2, d3      @ compAccQ +-> d2
-    vadd.f32 realAccS, s0, s1 @ sum the contents of d0 together (realAccQ)
-    vadd.f32 compAccS, s4, s5 @ sum the contents of d2 together (compAccQ)
-    @ critical values are now in s0 (realAccS), s4 (realAccQ)
-       mov     number, quarterPoints, asl #2
-.loop2:
-       cmp     num_points, number
-       bls     .done
-
-       vld1.32 {d4[0]}, [taps]! @ s8
-       vld2.32 {d5[0],d6[0]}, [input]! @ s10, s12
-       vmul.f32 s5, s8, s12
-       vmul.f32 s6, s8, s10
-       vadd.f32 realAccS, realAccS, s5
-       vadd.f32 compAccS, compAccS, s6
-
-       add number, number, #1
-       b .loop2
-
-.done:
-    vst1.32 {d0[0]}, [result]! @ realAccS
-    vst1.32 {d2[0]}, [result]  @ compAccS
-
-       ldmfd   sp!, {r7, r8, sl} @ epilogue - restore register states
-       bx      lr
diff --git 
a/volk/kernels/volk/asm/neon/volk_32fc_32f_dot_prod_32fc_a_neonasmvmla.s 
b/volk/kernels/volk/asm/neon/volk_32fc_32f_dot_prod_32fc_a_neonasmvmla.s
deleted file mode 100644
index cb50e4b..0000000
--- a/volk/kernels/volk/asm/neon/volk_32fc_32f_dot_prod_32fc_a_neonasmvmla.s
+++ /dev/null
@@ -1,74 +0,0 @@
-@ static inline void volk_32fc_32f_dot_prod_32fc_a_neonasmvmla ( lv_32fc_t* 
result, const  lv_32fc_t* input, const  float* taps, unsigned int num_points)
-       .global volk_32fc_32f_dot_prod_32fc_a_neonasmvmla
-volk_32fc_32f_dot_prod_32fc_a_neonasmvmla:
-       @ r0 - result: pointer to output array (32fc)
-       @ r1 - input: pointer to input array 1 (32fc)
-       @ r2 - taps: pointer to input array 2 (32f)
-       @ r3 - num_points: number of items to process
-
-       result .req r0
-       input .req r1
-       taps .req r2
-       num_points .req r3
-       quarterPoints .req r7
-       number .req r8
-       @ Note that according to the ARM EABI (AAPCS) Section 5.1.1:
-    @ registers s16-s31 (d8-d15, q4-q7) must be preserved across subroutine 
calls;
-    @ registers s0-s15 (d0-d7, q0-q3) do not need to be preserved
-    @ registers d16-d31 (q8-q15), if present, do not need to be preserved.
-       realAccQ   .req q0 @ d0-d1/s0-s3
-       compAccQ   .req q1 @ d2-d3/s4-s7
-       realAccS   .req s0 @ d0[0]
-       compAccS   .req s4 @ d2[0]
-       tapsVal    .req q2 @ d4-d5
-       outVal     .req q3 @ d6-d7
-    realMul    .req q8 @ d8-d9
-    compMul    .req q9 @ d16-d17
-    inRealVal  .req q10 @ d18-d19
-       inCompVal  .req q11 @ d20-d21
-
-       stmfd   sp!, {r7, r8, sl}       @ prologue - save register states
-
-       veor realAccQ, realAccQ @ zero out accumulators
-       veor compAccQ, compAccQ @ zero out accumulators
-       movs quarterPoints, num_points, lsr #2
-       beq .loop2 @ if zero into quarterPoints
-
-       mov number, quarterPoints
-
-.loop1:
-       @ do work here
-       pld [taps, #128] @ pre-load hint - this is implementation specific!
-       pld [input, #128] @ pre-load hint - this is implementation specific!
-       vld1.32 {d4-d5}, [taps:128]! @ tapsVal
-       vld2.32 {d18-d21}, [input:128]! @ inRealVal, inCompVal
-       vmla.f32 realAccQ, tapsVal, inRealVal
-       vmla.f32 compAccQ, tapsVal, inCompVal
-       subs number, number, #1
-       bne     .loop1  @ first loop
-
-    @ Sum up across realAccQ and compAccQ
-    vpadd.f32 d0, d0, d1      @ realAccQ +-> d0
-    vpadd.f32 d2, d2, d3      @ compAccQ +-> d2
-    vadd.f32 realAccS, s0, s1 @ sum the contents of d0 together (realAccQ)
-    vadd.f32 compAccS, s4, s5 @ sum the contents of d2 together (compAccQ)
-    @ critical values are now in s0 (realAccS), s4 (compAccS)
-       mov     number, quarterPoints, asl #2
-.loop2:
-       cmp     num_points, number
-       bls     .done
-
-       vld1.32 {d4[0]}, [taps]! @ s8
-       vld2.32 {d5[0],d6[0]}, [input]! @ s10, s12
-       vmla.f32 realAccS, s8, s10 @ d0[0]
-       vmla.f32 compAccS, s8, s12 @ d2[0]
-
-       add number, number, #1
-       b .loop2
-
-.done:
-    vst1.32 {d0[0]}, [result]! @ realAccS
-    vst1.32 {d2[0]}, [result]  @ compAccS
-
-       ldmfd   sp!, {r7, r8, sl} @ epilogue - restore register states
-       bx      lr
diff --git a/volk/kernels/volk/asm/neon/volk_32fc_32f_dot_prod_32fc_unrollasm.s 
b/volk/kernels/volk/asm/neon/volk_32fc_32f_dot_prod_32fc_unrollasm.s
deleted file mode 100644
index 7185ab9..0000000
--- a/volk/kernels/volk/asm/neon/volk_32fc_32f_dot_prod_32fc_unrollasm.s
+++ /dev/null
@@ -1,146 +0,0 @@
-@ static inline void volk_32fc_32f_dot_prod_32fc_unrollasm ( lv_32fc_t* 
result, const  lv_32fc_t* input, const  float* taps, unsigned int num_points)
-.global        volk_32fc_32f_dot_prod_32fc_unrollasm
-volk_32fc_32f_dot_prod_32fc_unrollasm:
-       @ r0 - result: pointer to output array (32fc)
-       @ r1 - input: pointer to input array 1 (32fc)
-       @ r2 - taps: pointer to input array 2 (32f)
-       @ r3 - num_points: number of items to process
-
-    push    {r4, r5, r6, r7, r8, r9}
-    vpush   {q4-q7}
-    sub     r13, r13, #56   @ 0x38
-    add     r12, r13, #8
-    lsrs    r8, r3, #3
-    veor.32 q2, q5, q5
-    veor.32 q3, q5, q5
-    veor.32 q4, q5, q5
-    veor.32 q5, q5, q5
-    beq     .smallvector 
-    vld2.32 {d20-d23}, [r1]!
-    vld1.32 {d24-d25}, [r2]!
-    mov     r5, #1
-
-
-
-.mainloop:
-    vld2.32 {d14-d17}, [r1]!  @ q7,q8
-    vld1.32 {d18-d19}, [r2]!  @ q9
-
-    vmul.f32        q0, q12, q10 @ real mult
-    vmul.f32        q1, q12, q11 @ imag mult
-
-    add     r5, r5, #1
-    cmp     r5, r8
-
-    vadd.f32        q4, q4, q0@ q4 accumulates real
-    vadd.f32        q5, q5, q1@ q5 accumulates imag
-
-    vld2.32 {d20-d23}, [r1]!  @ q10-q11
-    vld1.32 {d24-d25}, [r2]!  @ q12
-
-    vmul.f32        q13, q9, q7
-    vmul.f32        q14, q9, q8
-    vadd.f32        q2, q2, q13  @ q2 accumulates real
-    vadd.f32        q3, q3, q14  @ q3 accumulates imag
-
-
-
-    bne     .mainloop 
-
-    vmul.f32        q0, q12, q10 @ real mult
-    vmul.f32        q1, q12, q11 @ imag mult
-
-    vadd.f32        q4, q4, q0@ q4 accumulates real
-    vadd.f32        q5, q5, q1@ q5 accumulates imag
-
-
-.smallvector:
-    vadd.f32        q0, q2, q4
-    add     r12, r13, #24
-    lsl     r8, r8, #3
-    vadd.f32        q1, q3, q5
-    cmp     r3, r8
-
-    vadd.f32    d0, d0, d1
-    vadd.f32    d1, d2, d3
-    vadd.f32    s14, s0, s1
-    vadd.f32    s15, s2, s3
-
-    vstr    s14, [r13]
-    vstr    s15, [r13, #4]
-    bls     .D1 
-    rsb     r12, r8, r3
-    lsr     r4, r12, #2
-    cmp     r4, #0
-    cmpne   r12, #3
-    lsl     r5, r4, #2
-    movhi   r6, #0
-    movls   r6, #1
-    bls     .L1 
-    vmov.i32        q10, #0 @ 0x00000000
-    mov     r9, r1
-    mov     r7, r2
-    vorr    q11, q10, q10
-
-.smallloop:
-    add     r6, r6, #1
-    vld2.32 {d16-d19}, [r9]!
-    cmp     r4, r6
-    vld1.32 {d24-d25}, [r7]!
-    vmla.f32        q11, q12, q8
-    vmla.f32        q10, q12, q9
-    bhi     .smallloop 
-    vmov.i32        q9, #0  @ 0x00000000
-    cmp     r12, r5
-    vadd.f32        d20, d20, d21
-    add     r8, r8, r5
-    vorr    q8, q9, q9
-    add     r1, r1, r5, lsl #3
-    vadd.f32        d22, d22, d23
-    add     r2, r2, r5, lsl #2
-    vpadd.f32       d18, d20, d20
-    vpadd.f32       d16, d22, d22
-    vmov.32 r4, d18[0]
-    vmov.32 r12, d16[0]
-    vmov    s13, r4
-    vadd.f32        s15, s13, s15
-    vmov    s13, r12
-    vadd.f32        s14, s13, s14
-    beq     .finishreduction 
-    .L1: 
-    add     r12, r8, #1
-    vldr    s13, [r2]
-    cmp     r3, r12
-    vldr    s11, [r1]
-    vldr    s12, [r1, #4]
-    vmla.f32        s14, s13, s11
-    vmla.f32        s15, s13, s12
-    bls     .finishreduction 
-    add     r8, r8, #2
-    vldr    s13, [r2, #4]
-    cmp     r3, r8
-    vldr    s11, [r1, #8]
-    vldr    s12, [r1, #12]
-    vmla.f32        s14, s13, s11
-    vmla.f32        s15, s13, s12
-    bls     .finishreduction 
-    vldr    s13, [r2, #8]
-    vldr    s11, [r1, #16]
-    vldr    s12, [r1, #20]
-    vmla.f32        s14, s13, s11
-    vmla.f32        s15, s13, s12
-
-.finishreduction:
-    vstr    s14, [r13]
-    vstr    s15, [r13, #4]
-    .D1:
-    ldr     r3, [r13]
-    str     r3, [r0]
-    ldr     r3, [r13, #4]
-    str     r3, [r0, #4]
-    add     r13, r13, #56   @ 0x38
-    vpop    {q4-q7}
-    pop     {r4, r5, r6, r7, r8, r9}
-    bx      r14
-
-
diff --git a/volk/kernels/volk/asm/neon/volk_32fc_x2_dot_prod_32fc_neonasm.s 
b/volk/kernels/volk/asm/neon/volk_32fc_x2_dot_prod_32fc_neonasm.s
deleted file mode 100644
index a1c5b7f..0000000
--- a/volk/kernels/volk/asm/neon/volk_32fc_x2_dot_prod_32fc_neonasm.s
+++ /dev/null
@@ -1,98 +0,0 @@
-@ static inline void volk_32fc_x2_dot_prod_32fc_neonasm(float* cVector, const 
float* aVector, const float* bVector, unsigned int num_points);
-       .global volk_32fc_x2_dot_prod_32fc_neonasm
-volk_32fc_x2_dot_prod_32fc_neonasm:
-    push    {r4, r5, r6, r7, r8, lr}
-    vpush   {q0-q7}
-    vpush   {q8-q15}
-    mov r8, r3          @ hold on to num_points (r8)
-    @ zero out accumulators -- leave 1 reg in alu
-    veor    q8, q15, q15
-    mov r7, r0          @ (r7) is cVec
-    veor    q9, q15, q15
-    mov r5, r1          @ (r5) is aVec
-    veor    q10, q15, q15
-    mov r6, r2          @ (r6) is bVec
-    veor    q11, q15, q15
-    lsrs    r3, r3, #3  @ eighth_points (r3) = num_points/8
-    veor    q12, q15, q15
-    mov r12, r2         @ (r12) is bVec
-    veor    q13, q15, q15
-    mov r4, r1          @ (r4) is aVec
-    veor    q14, q15, q15
-    veor    q15, q15, q15
-    beq .smallvector @ nathan optimized this file based on an objdump
-    @ but I don't understand this jump. Seems like it should go to loop2
-    @ and smallvector (really vector reduction) shouldn't need to be a label
-    mov r2, #0          @ 0 out r2 (now number)
-.loop1:
-    add r2, r2, #1      @ increment number
-    vld4.32 {d0,d2,d4,d6}, [r12]! @ q0-q3
-    cmp r2, r3          @ is number < eighth_points
-    @pld [r12, #64]   
-    vld4.32 {d8,d10,d12,d14}, [r4]! @ q4-q7
-    @pld [r4, #64]  
-    vmla.f32    q12, q4, q0 @ real (re*re)
-    vmla.f32    q14, q4, q1 @ imag (re*im)
-    vmls.f32    q15, q5, q1 @ real (im*im)
-    vmla.f32    q13, q5, q0 @ imag (im*re)
-
-    vmla.f32    q8, q2, q6 @ real (re*re)
-    vmla.f32    q9, q2, q7 @ imag (re*im)
-    vmls.f32    q10, q3, q7 @ real (im*im)
-    vmla.f32    q11, q3, q6 @ imag (im*re)
-    bne .loop1
-    lsl r2, r3, #3      @ r2 = eighth_points * 8
-    add r6, r6, r2      @ bVec = bVec + eighth_points -- whyyyyy gcc?!?
-    add r5, r5, r2      @ aVec = aVec + eighth_points
-    @ q12-q13 were original real accumulators
-    @ q14-q15 were original imag accumulators
-    @ reduce 8 accumulators down to 2 (1 real, 1 imag)
-    vadd.f32    q8, q10, q8 @ real + real
-    vadd.f32    q11, q11, q9 @ imag + imag
-    vadd.f32    q12, q12, q15 @ real + real
-    vadd.f32    q14, q14, q13 @ imag + imag
-    vadd.f32    q8, q8, q12
-    vadd.f32    q9, q9, q14
-.smallvector:
-    lsl r4, r3, #3
-    cmp r8, r4
-    vst2.32 {d16-d19}, [sp :64] @ whaaaaat? no way this is necessary!
-    vldr    s15, [sp, #8]
-    vldr    s17, [sp]
-    vldr    s16, [sp, #4]
-    vadd.f32    s17, s17, s15
-    vldr    s11, [sp, #12]
-    vldr    s12, [sp, #24]
-    vldr    s13, [sp, #28]
-    vldr    s14, [sp, #16]
-    vldr    s15, [sp, #20]
-    vadd.f32    s16, s16, s11
-    vadd.f32    s17, s17, s12
-    vadd.f32    s16, s16, s13
-    vadd.f32    s17, s17, s14
-    vadd.f32    s16, s16, s15
-    vstr    s17, [r7]
-    vstr    s16, [r7, #4]
-    bls .done
-.loop2:
-    mov r3, r6
-    add r6, r6, #8
-    vldr    s0, [r3]
-    vldr    s1, [r6, #-4]
-    mov r3, r5
-    add r5, r5, #8
-    vldr    s2, [r3]
-    vldr    s3, [r5, #-4]
-    bl  __mulsc3            @ GCC/Clang built-in. Portability?
-    add r4, r4, #1
-    cmp r4, r8
-    vadd.f32    s17, s17, s0
-    vadd.f32    s16, s16, s1
-    vstr    s17, [r7]
-    vstr    s16, [r7, #4]
-    bne .loop2
-.done: 
-    vpop    {q8-q15}
-    vpop    {q0-q7}
-    pop {r4, r5, r6, r7, r8, pc}
-
diff --git 
a/volk/kernels/volk/asm/neon/volk_32fc_x2_dot_prod_32fc_neonasm_opttests.s 
b/volk/kernels/volk/asm/neon/volk_32fc_x2_dot_prod_32fc_neonasm_opttests.s
deleted file mode 100644
index 77f026e..0000000
--- a/volk/kernels/volk/asm/neon/volk_32fc_x2_dot_prod_32fc_neonasm_opttests.s
+++ /dev/null
@@ -1,96 +0,0 @@
-@ static inline void volk_32fc_x2_dot_prod_32fc_neonasm_opttests(float* 
cVector, const float* aVector, const float* bVector, unsigned int num_points)@
-.global        volk_32fc_x2_dot_prod_32fc_neonasm_opttests
-volk_32fc_x2_dot_prod_32fc_neonasm_opttests:
-    push    {r4, r5, r6, r7, r8, r9, sl, fp, lr}
-    vpush   {d8-d15}
-    lsrs    fp, r3, #3
-    sub     sp, sp, #52     @ 0x34
-    mov     r9, r3
-    mov     sl, r0
-    mov     r7, r1
-    mov     r8, r2
-    vorr    q0, q7, q7
-    vorr    q1, q7, q7
-    vorr    q2, q7, q7
-    vorr    q3, q7, q7
-    vorr    q4, q7, q7
-    vorr    q5, q7, q7
-    veor    q6, q7, q7
-    vorr    q7, q7, q7
-    beq     .smallvector 
-    mov     r4, r1
-    mov     ip, r2
-    mov     r3, #0
-.mainloop:
-    @mov     r6, ip
-    @mov     r5, r4
-    vld4.32 {d24,d26,d28,d30}, [r6]!
-    @add     ip, ip, #64     @ 0x40
-    @add     r4, r4, #64     @ 0x40
-    vld4.32 {d16,d18,d20,d22}, [r5]!
-    add     r3, r3, #1
-    vld4.32 {d25,d27,d29,d31}, [r6]!
-    vld4.32 {d17,d19,d21,d23}, [r5]!
-    vmla.f32        q6, q8, q12
-    vmla.f32        q0, q9, q12
-    cmp     r3, fp
-    vmls.f32        q5, q13, q9
-    vmla.f32        q2, q13, q8
-    vmla.f32        q7, q10, q14
-    vmla.f32        q1, q11, q14
-    vmls.f32        q4, q15, q11
-    vmla.f32        q3, q15, q10
-    bne     .mainloop 
-    lsl     r3, fp, #6
-    add     r8, r8, r3
-    add     r7, r7, r3
-.smallvector:
-    vadd.f32        q3, q2, q3
-    add     r3, sp, #16
-    lsl     r4, fp, #3
-    vadd.f32        q4, q5, q4
-    cmp     r9, r4
-    vadd.f32        q6, q6, q7
-    vadd.f32        q1, q0, q1
-    vadd.f32        q8, q6, q4
-    vadd.f32        q9, q1, q3
-    vst2.32 {d16-d19}, [r3 :64]
-    vldr    s15, [sp, #24]
-    vldr    s16, [sp, #16]
-    vldr    s17, [sp, #20]
-    vadd.f32        s16, s16, s15
-    vldr    s11, [sp, #28]
-    vldr    s12, [sp, #40]  @ 0x28
-    vldr    s13, [sp, #44]  @ 0x2c
-    vldr    s14, [sp, #32]
-    vldr    s15, [sp, #36]  @ 0x24
-    vadd.f32        s17, s17, s11
-    vadd.f32        s16, s16, s12
-    vadd.f32        s17, s17, s13
-    vadd.f32        s16, s16, s14
-    vadd.f32        s17, s17, s15
-    vstr    s16, [sl]
-    vstr    s17, [sl, #4]
-    bls     .epilog 
-    add     r5, sp, #8
-.tailcase:
-    ldr     r3, [r7], #8
-    mov     r0, r5
-    ldr     r1, [r8], #8
-    add     r4, r4, #1
-    ldr     ip, [r7, #-4]
-    ldr     r2, [r8, #-4]
-    str     ip, [sp]
-    bl      __mulsc3
-    vldr    s14, [sp, #8]
-    vldr    s15, [sp, #12]
-    vadd.f32        s16, s16, s14
-    cmp     r4, r9
-    vadd.f32        s17, s17, s15
-    vstr    s16, [sl]
-    vstr    s17, [sl, #4]
-    bne     .tailcase 
-.epilog:
-    add     sp, sp, #52     @ 0x34
-    vpop    {d8-d15}
-    pop     {r4, r5, r6, r7, r8, r9, sl, fp, pc}
diff --git a/volk/kernels/volk/asm/neon/volk_32fc_x2_multiply_32fc_neonasm.s 
b/volk/kernels/volk/asm/neon/volk_32fc_x2_multiply_32fc_neonasm.s
deleted file mode 100644
index 5d79b46..0000000
--- a/volk/kernels/volk/asm/neon/volk_32fc_x2_multiply_32fc_neonasm.s
+++ /dev/null
@@ -1,45 +0,0 @@
-@ static inline void volk_32fc_x2_multiply_32fc_neonasm(float* cVector, const 
float* aVector, const float* bVector, unsigned int num_points);
-       .global volk_32fc_x2_multiply_32fc_neonasm
-volk_32fc_x2_multiply_32fc_neonasm:
-    push    {r4, r5, r6, r7, r8, r9, r14}
-    lsrs    r7, r3, #2
-    @ r0 is c vector
-    @ r1 is a vector
-    @ r2 is b vector
-    @ r3 is num_points
-    @ r7 is quarter_points
-    beq     .smallvector
-    mov     r5, #0
-.mainloop:
-   vld2.32   {d24-d27}, [r1]!  @ ar=q12, ai=q13
-   add       r5, r5, #1
-   cmp       r5, r7
-   vld2.32   {d20-d23}, [r2]!  @ br=q10, bi=q11
-   pld       [r1]
-   pld       [r2]
-   vmul.f32  q0, q12, q10 @ q15 = ar*br
-   vmul.f32  q1, q13, q11 @ q11 = ai*bi
-   vmul.f32  q2, q12, q11 @ q14 = ar*bi
-   vmul.f32  q3, q13, q10 @ q12 = ai*br
-   vsub.f32  q8, q0, q1  @ real
-   vadd.f32  q9, q2, q3  @ imag
-   vst2.32   {d16-d19}, [r0]!
-   bne     .mainloop 
-
-.smallvector:
-   lsl     r5, r7, #2
-   cmp     r3, r7
-   bls     .done
-.tailcase:
-   add    r5, r5, #1
-   vld1.32    d1, [r1]! @ s2, s3 = ar, ai
-   vld1.32    d0, [r2]! @ s0, s1 = br, bi
-   vmul.f32   s4, s0, s2 @ s4 = ar*br
-   vmul.f32   s5, s0, s3 @ s5 = ar*bi
-   vmls.f32   s4, s1, s3 @ s4 = s4 - ai*bi
-   vmla.f32   s5, s1, s2 @ s5 = s5 + ai*br
-   vst1.32    d2, [r0]!
-   cmp     r3, r5
-   bne     .tailcase 
-.done:
-   pop     {r4, r5, r6, r7, r8, r9, r15}
diff --git 
a/volk/kernels/volk/asm/neon/volk_arm_32fc_32f_dot_prod_32fc_a_neonpipeline.s 
b/volk/kernels/volk/asm/neon/volk_arm_32fc_32f_dot_prod_32fc_a_neonpipeline.s
deleted file mode 100644
index 758e743..0000000
--- 
a/volk/kernels/volk/asm/neon/volk_arm_32fc_32f_dot_prod_32fc_a_neonpipeline.s
+++ /dev/null
@@ -1,92 +0,0 @@
-@ static inline void volk_32fc_32f_dot_prod_32fc_a_neonpipeline ( lv_32fc_t* 
result, const  lv_32fc_t* input, const  float* taps, unsigned int num_points) {
-       .global volk_32fc_32f_dot_prod_32fc_a_neonpipeline
-volk_32fc_32f_dot_prod_32fc_a_neonpipeline:
-    @ r0 - result: pointer to output array (32fc)
-    @ r1 - input: pointer to input array 1 (32fc)
-    @ r2 - taps: pointer to input array 2 (32f)
-    @ r3 - num_points: number of items to process
-    
-    result .req r0
-    input .req r1
-    taps .req r2
-    num_points .req r3
-    quarterPoints .req r7
-    number .req r8
-    @ Note that according to the ARM EABI (AAPCS) Section 5.1.1:
-    @ registers s16-s31 (d8-d15, q4-q7) must be preserved across subroutine 
calls;
-    @ registers s0-s15 (d0-d7, q0-q3) do not need to be preserved
-    @ registers d16-d31 (q8-q15), if present, do not need to be preserved.
-    realAccQ   .req q0 @ d0-d1/s0-s3
-    compAccQ   .req q1 @ d2-d3/s4-s7
-    realAccS   .req s0 @ d0[0]
-    compAccS   .req s4 @ d2[0]
-    tapsVal    .req q2 @ d4-d5
-    outVal     .req q3 @ d6-d7
-    realMul    .req q8 @ d8-d9
-    compMul    .req q9 @ d16-d17
-    inRealVal  .req q10 @ d18-d19
-    inCompVal  .req q11 @ d20-d21
-    
-    stmfd      sp!, {r7, r8, sl}       @ prologue - save register states
-    
-    pld [taps, #128] @ pre-load hint - this is implementation specific!
-    pld [input, #128] @ pre-load hint - this is implementation specific!
-    
-    veor realAccQ, realAccQ @ zero out accumulators
-    veor compAccQ, compAccQ @ zero out accumulators
-    movs quarterPoints, num_points, lsr #2
-    beq .loop2 @ if zero into quarterPoints
-    
-    mov number, quarterPoints
-    @ Optimizing for pipeline
-    vld1.32 {d4-d5}, [taps:128]! @ tapsVal
-    vld2.32 {d20-d23}, [input:128]! @ inRealVal, inCompVal
-    subs number, number, #1
-
-.loop1:
-       @ do work here
-       pld [taps, #128] @ pre-load hint - this is implementation specific!
-       pld [input, #128] @ pre-load hint - this is implementation specific!
-       vmul.f32 realMul, tapsVal, inRealVal
-       vmul.f32 compMul, tapsVal, inCompVal
-       vadd.f32 realAccQ, realAccQ, realMul
-       vadd.f32 compAccQ, compAccQ, compMul
-       vld1.32 {d4-d5}, [taps:128]! @ tapsVal
-       vld2.32 {d20-d23}, [input:128]! @ inRealVal, inCompVal
-
-       subs number, number, #1
-       bne     .loop1  @ first loop
-
-       vmul.f32 realMul, tapsVal, inRealVal
-       vmul.f32 compMul, tapsVal, inCompVal
-       vadd.f32 realAccQ, realAccQ, realMul
-       vadd.f32 compAccQ, compAccQ, compMul
-
-    @ Sum up across realAccQ and compAccQ
-    vpadd.f32 d0, d0, d1      @ realAccQ +-> d0
-    vpadd.f32 d2, d2, d3      @ compAccQ +-> d2
-    vadd.f32 realAccS, s0, s1 @ sum the contents of d0 together (realAccQ)
-    vadd.f32 compAccS, s4, s5 @ sum the contents of d2 together (compAccQ)
-    @ critical values are now in s0 (realAccS), s4 (realAccQ)
-       mov     number, quarterPoints, asl #2
-       sub number, number, #5
-.loop2:
-       cmp     num_points, number
-       bls     .done
-
-       vld1.32 {d4[0]}, [taps]! @ s8
-       vld2.32 {d5[0],d6[0]}, [input]! @ s10, s12
-       vmul.f32 s5, s8, s10
-       vmul.f32 s6, s8, s12
-       vadd.f32 realAccS, realAccS, s5
-       vadd.f32 compAccS, compAccS, s6
-
-       add number, number, #1
-       b .loop2
-
-.done:
-    vst1.32 {d0[0]}, [result]! @ realAccS
-    vst1.32 {d2[0]}, [result]  @ compAccS
-
-       ldmfd   sp!, {r7, r8, sl} @ epilogue - restore register states
-       bx      lr
diff --git a/volk/kernels/volk/volk_16i_32fc_dot_prod_32fc.h 
b/volk/kernels/volk/volk_16i_32fc_dot_prod_32fc.h
deleted file mode 100644
index 2656d76..0000000
--- a/volk/kernels/volk/volk_16i_32fc_dot_prod_32fc.h
+++ /dev/null
@@ -1,274 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_16i_32fc_dot_prod_32fc_H
-#define INCLUDED_volk_16i_32fc_dot_prod_32fc_H
-
-#include <volk/volk_common.h>
-#include<stdio.h>
-
-
-#ifdef LV_HAVE_GENERIC
-
-static inline void volk_16i_32fc_dot_prod_32fc_generic(lv_32fc_t* result, 
const short* input, const lv_32fc_t * taps, unsigned int num_points) {
-
-  static const int N_UNROLL = 4;
-
-  lv_32fc_t acc0 = 0;
-  lv_32fc_t acc1 = 0;
-  lv_32fc_t acc2 = 0;
-  lv_32fc_t acc3 = 0;
-
-  unsigned i = 0;
-  unsigned n = (num_points / N_UNROLL) * N_UNROLL;
-
-  for(i = 0; i < n; i += N_UNROLL) {
-    acc0 += taps[i + 0] * (float)input[i + 0];
-    acc1 += taps[i + 1] * (float)input[i + 1];
-    acc2 += taps[i + 2] * (float)input[i + 2];
-    acc3 += taps[i + 3] * (float)input[i + 3];
-  }
-
-  for(; i < num_points; i++) {
-    acc0 += taps[i] * (float)input[i];
-  }
-
-  *result = acc0 + acc1 + acc2 + acc3;
-}
-
-#endif /*LV_HAVE_GENERIC*/
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-static inline void volk_16i_32fc_dot_prod_32fc_neon(lv_32fc_t* result, const 
short* input, const lv_32fc_t * taps, unsigned int num_points) {
-
-  unsigned ii;
-  unsigned quarter_points = num_points / 4;
-  lv_32fc_t* tapsPtr = (lv_32fc_t*) taps;
-  short* inputPtr = (short*) input;
-  lv_32fc_t accumulator_vec[4];
-
-  float32x4x2_t tapsVal, accumulator_val;
-  int16x4_t input16;
-  int32x4_t input32;
-  float32x4_t input_float, prod_re, prod_im;
-
-  accumulator_val.val[0] = vdupq_n_f32(0.0);
-  accumulator_val.val[1] = vdupq_n_f32(0.0);
-
-  for(ii = 0; ii < quarter_points; ++ii) {
-    tapsVal = vld2q_f32((float*)tapsPtr);
-    input16 = vld1_s16(inputPtr);
-    // widen 16-bit int to 32-bit int
-    input32 = vmovl_s16(input16);
-    // convert 32-bit int to float with scale
-    input_float = vcvtq_f32_s32(input32);
-
-    prod_re = vmulq_f32(input_float, tapsVal.val[0]);
-    prod_im = vmulq_f32(input_float, tapsVal.val[1]);
-
-    accumulator_val.val[0] = vaddq_f32(prod_re, accumulator_val.val[0]);
-    accumulator_val.val[1] = vaddq_f32(prod_im, accumulator_val.val[1]);
-
-    tapsPtr += 4;
-    inputPtr += 4;
-  }
-  vst2q_f32((float*)accumulator_vec, accumulator_val);
-  accumulator_vec[0] += accumulator_vec[1];
-  accumulator_vec[2] += accumulator_vec[3];
-  accumulator_vec[0] += accumulator_vec[2];
-
-  for(ii = quarter_points * 4; ii < num_points; ++ii) {
-    accumulator_vec[0] += *(tapsPtr++) * (float)(*(inputPtr++));
-  }
-
-  *result = accumulator_vec[0];
-}
-
-#endif /*LV_HAVE_NEON*/
-
-#if LV_HAVE_SSE && LV_HAVE_MMX
-
-static inline void volk_16i_32fc_dot_prod_32fc_u_sse( lv_32fc_t* result, const 
 short* input, const  lv_32fc_t* taps, unsigned int num_points) {
-
-  unsigned int number = 0;
-  const unsigned int sixteenthPoints = num_points / 8;
-
-  float res[2];
-  float *realpt = &res[0], *imagpt = &res[1];
-  const short* aPtr = input;
-  const float* bPtr = (float*)taps;
-
-  __m64  m0, m1;
-  __m128 f0, f1, f2, f3;
-  __m128 a0Val, a1Val, a2Val, a3Val;
-  __m128 b0Val, b1Val, b2Val, b3Val;
-  __m128 c0Val, c1Val, c2Val, c3Val;
-
-  __m128 dotProdVal0 = _mm_setzero_ps();
-  __m128 dotProdVal1 = _mm_setzero_ps();
-  __m128 dotProdVal2 = _mm_setzero_ps();
-  __m128 dotProdVal3 = _mm_setzero_ps();
-
-  for(;number < sixteenthPoints; number++){
-
-    m0 = _mm_set_pi16(*(aPtr+3), *(aPtr+2), *(aPtr+1), *(aPtr+0));
-    m1 = _mm_set_pi16(*(aPtr+7), *(aPtr+6), *(aPtr+5), *(aPtr+4));
-    f0 = _mm_cvtpi16_ps(m0);
-    f1 = _mm_cvtpi16_ps(m0);
-    f2 = _mm_cvtpi16_ps(m1);
-    f3 = _mm_cvtpi16_ps(m1);
-
-    a0Val = _mm_unpacklo_ps(f0, f1);
-    a1Val = _mm_unpackhi_ps(f0, f1);
-    a2Val = _mm_unpacklo_ps(f2, f3);
-    a3Val = _mm_unpackhi_ps(f2, f3);
-
-    b0Val = _mm_loadu_ps(bPtr);
-    b1Val = _mm_loadu_ps(bPtr+4);
-    b2Val = _mm_loadu_ps(bPtr+8);
-    b3Val = _mm_loadu_ps(bPtr+12);
-
-    c0Val = _mm_mul_ps(a0Val, b0Val);
-    c1Val = _mm_mul_ps(a1Val, b1Val);
-    c2Val = _mm_mul_ps(a2Val, b2Val);
-    c3Val = _mm_mul_ps(a3Val, b3Val);
-
-    dotProdVal0 = _mm_add_ps(c0Val, dotProdVal0);
-    dotProdVal1 = _mm_add_ps(c1Val, dotProdVal1);
-    dotProdVal2 = _mm_add_ps(c2Val, dotProdVal2);
-    dotProdVal3 = _mm_add_ps(c3Val, dotProdVal3);
-
-    aPtr += 8;
-    bPtr += 16;
-  }
-
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal1);
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal2);
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal3);
-
-  __VOLK_ATTR_ALIGNED(16) float dotProductVector[4];
-
-  _mm_store_ps(dotProductVector,dotProdVal0); // Store the results back into 
the dot product vector
-
-  *realpt = dotProductVector[0];
-  *imagpt = dotProductVector[1];
-  *realpt += dotProductVector[2];
-  *imagpt += dotProductVector[3];
-
-  number = sixteenthPoints*8;
-  for(;number < num_points; number++){
-    *realpt += ((*aPtr)   * (*bPtr++));
-    *imagpt += ((*aPtr++) * (*bPtr++));
-  }
-
-  *result = *(lv_32fc_t*)(&res[0]);
-}
-
-#endif /*LV_HAVE_SSE && LV_HAVE_MMX*/
-
-
-
-
-#if LV_HAVE_SSE && LV_HAVE_MMX
-
-
-static inline void volk_16i_32fc_dot_prod_32fc_a_sse( lv_32fc_t* result, const 
 short* input, const  lv_32fc_t* taps, unsigned int num_points) {
-
-  unsigned int number = 0;
-  const unsigned int sixteenthPoints = num_points / 8;
-
-  float res[2];
-  float *realpt = &res[0], *imagpt = &res[1];
-  const short* aPtr = input;
-  const float* bPtr = (float*)taps;
-
-  __m64  m0, m1;
-  __m128 f0, f1, f2, f3;
-  __m128 a0Val, a1Val, a2Val, a3Val;
-  __m128 b0Val, b1Val, b2Val, b3Val;
-  __m128 c0Val, c1Val, c2Val, c3Val;
-
-  __m128 dotProdVal0 = _mm_setzero_ps();
-  __m128 dotProdVal1 = _mm_setzero_ps();
-  __m128 dotProdVal2 = _mm_setzero_ps();
-  __m128 dotProdVal3 = _mm_setzero_ps();
-
-  for(;number < sixteenthPoints; number++){
-
-    m0 = _mm_set_pi16(*(aPtr+3), *(aPtr+2), *(aPtr+1), *(aPtr+0));
-    m1 = _mm_set_pi16(*(aPtr+7), *(aPtr+6), *(aPtr+5), *(aPtr+4));
-    f0 = _mm_cvtpi16_ps(m0);
-    f1 = _mm_cvtpi16_ps(m0);
-    f2 = _mm_cvtpi16_ps(m1);
-    f3 = _mm_cvtpi16_ps(m1);
-
-    a0Val = _mm_unpacklo_ps(f0, f1);
-    a1Val = _mm_unpackhi_ps(f0, f1);
-    a2Val = _mm_unpacklo_ps(f2, f3);
-    a3Val = _mm_unpackhi_ps(f2, f3);
-
-    b0Val = _mm_load_ps(bPtr);
-    b1Val = _mm_load_ps(bPtr+4);
-    b2Val = _mm_load_ps(bPtr+8);
-    b3Val = _mm_load_ps(bPtr+12);
-
-    c0Val = _mm_mul_ps(a0Val, b0Val);
-    c1Val = _mm_mul_ps(a1Val, b1Val);
-    c2Val = _mm_mul_ps(a2Val, b2Val);
-    c3Val = _mm_mul_ps(a3Val, b3Val);
-
-    dotProdVal0 = _mm_add_ps(c0Val, dotProdVal0);
-    dotProdVal1 = _mm_add_ps(c1Val, dotProdVal1);
-    dotProdVal2 = _mm_add_ps(c2Val, dotProdVal2);
-    dotProdVal3 = _mm_add_ps(c3Val, dotProdVal3);
-
-    aPtr += 8;
-    bPtr += 16;
-  }
-
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal1);
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal2);
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal3);
-
-  __VOLK_ATTR_ALIGNED(16) float dotProductVector[4];
-
-  _mm_store_ps(dotProductVector,dotProdVal0); // Store the results back into 
the dot product vector
-
-  *realpt = dotProductVector[0];
-  *imagpt = dotProductVector[1];
-  *realpt += dotProductVector[2];
-  *imagpt += dotProductVector[3];
-
-  number = sixteenthPoints*8;
-  for(;number < num_points; number++){
-    *realpt += ((*aPtr)   * (*bPtr++));
-    *imagpt += ((*aPtr++) * (*bPtr++));
-  }
-
-  *result = *(lv_32fc_t*)(&res[0]);
-}
-
-#endif /*LV_HAVE_SSE && LV_HAVE_MMX*/
-
-
-#endif /*INCLUDED_volk_16i_32fc_dot_prod_32fc_H*/
diff --git a/volk/kernels/volk/volk_16i_branch_4_state_8.h 
b/volk/kernels/volk/volk_16i_branch_4_state_8.h
deleted file mode 100644
index 4477245..0000000
--- a/volk/kernels/volk/volk_16i_branch_4_state_8.h
+++ /dev/null
@@ -1,216 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_16i_branch_4_state_8_a_H
-#define INCLUDED_volk_16i_branch_4_state_8_a_H
-
-
-#include<inttypes.h>
-#include<stdio.h>
-
-
-
-
-#ifdef LV_HAVE_SSSE3
-
-#include<xmmintrin.h>
-#include<emmintrin.h>
-#include<tmmintrin.h>
-
-static inline  void volk_16i_branch_4_state_8_a_ssse3(short* target,  short* 
src0, char** permuters, short* cntl2, short* cntl3, short* scalars) {
-
-
-  __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, 
xmm11;
-
-  __m128i *p_target, *p_src0, *p_cntl2, *p_cntl3, *p_scalars;
-
-
-
-  p_target = (__m128i*)target;
-  p_src0 = (__m128i*)src0;
-  p_cntl2 = (__m128i*)cntl2;
-  p_cntl3 = (__m128i*)cntl3;
-  p_scalars = (__m128i*)scalars;
-
-  int i = 0;
-
-  int bound = 1;
-
-
-  xmm0 = _mm_load_si128(p_scalars);
-
-  xmm1 = _mm_shufflelo_epi16(xmm0, 0);
-  xmm2 = _mm_shufflelo_epi16(xmm0, 0x55);
-  xmm3 = _mm_shufflelo_epi16(xmm0, 0xaa);
-  xmm4 = _mm_shufflelo_epi16(xmm0, 0xff);
-
-  xmm1 = _mm_shuffle_epi32(xmm1, 0x00);
-  xmm2 = _mm_shuffle_epi32(xmm2, 0x00);
-  xmm3 = _mm_shuffle_epi32(xmm3, 0x00);
-  xmm4 = _mm_shuffle_epi32(xmm4, 0x00);
-
-  xmm0 = _mm_load_si128((__m128i*)permuters[0]);
-  xmm6 = _mm_load_si128((__m128i*)permuters[1]);
-  xmm8 = _mm_load_si128((__m128i*)permuters[2]);
-  xmm10 = _mm_load_si128((__m128i*)permuters[3]);
-
-  for(; i < bound; ++i) {
-
-    xmm5 = _mm_load_si128(p_src0);
-
-
-
-
-
-
-
-
-
-    xmm0 = _mm_shuffle_epi8(xmm5, xmm0);
-    xmm6 = _mm_shuffle_epi8(xmm5, xmm6);
-    xmm8 = _mm_shuffle_epi8(xmm5, xmm8);
-    xmm10 = _mm_shuffle_epi8(xmm5, xmm10);
-
-    p_src0 += 4;
-
-
-    xmm5 = _mm_add_epi16(xmm1, xmm2);
-
-    xmm6 = _mm_add_epi16(xmm2, xmm6);
-    xmm8 = _mm_add_epi16(xmm1, xmm8);
-
-
-    xmm7 = _mm_load_si128(p_cntl2);
-    xmm9 = _mm_load_si128(p_cntl3);
-
-    xmm0 = _mm_add_epi16(xmm5, xmm0);
-
-
-    xmm7 = _mm_and_si128(xmm7, xmm3);
-    xmm9 = _mm_and_si128(xmm9, xmm4);
-
-    xmm5 = _mm_load_si128(&p_cntl2[1]);
-    xmm11 = _mm_load_si128(&p_cntl3[1]);
-
-    xmm7 = _mm_add_epi16(xmm7, xmm9);
-
-    xmm5 = _mm_and_si128(xmm5, xmm3);
-    xmm11 = _mm_and_si128(xmm11, xmm4);
-
-    xmm0 = _mm_add_epi16(xmm0, xmm7);
-
-
-
-    xmm7 = _mm_load_si128(&p_cntl2[2]);
-    xmm9 = _mm_load_si128(&p_cntl3[2]);
-
-    xmm5 = _mm_add_epi16(xmm5, xmm11);
-
-    xmm7 = _mm_and_si128(xmm7, xmm3);
-    xmm9 = _mm_and_si128(xmm9, xmm4);
-
-    xmm6 = _mm_add_epi16(xmm6, xmm5);
-
-
-    xmm5 = _mm_load_si128(&p_cntl2[3]);
-    xmm11 = _mm_load_si128(&p_cntl3[3]);
-
-    xmm7 = _mm_add_epi16(xmm7, xmm9);
-
-    xmm5 = _mm_and_si128(xmm5, xmm3);
-    xmm11 = _mm_and_si128(xmm11, xmm4);
-
-    xmm8 = _mm_add_epi16(xmm8, xmm7);
-
-    xmm5 = _mm_add_epi16(xmm5, xmm11);
-
-    _mm_store_si128(p_target, xmm0);
-    _mm_store_si128(&p_target[1], xmm6);
-
-    xmm10 = _mm_add_epi16(xmm5, xmm10);
-
-    _mm_store_si128(&p_target[2], xmm8);
-
-    _mm_store_si128(&p_target[3], xmm10);
-
-    p_target += 3;
-  }
-}
-
-
-#endif /*LV_HAVE_SSEs*/
-
-#ifdef LV_HAVE_GENERIC
-static inline  void volk_16i_branch_4_state_8_generic(short* target,  short* 
src0, char** permuters, short* cntl2, short* cntl3, short* scalars) {
-       int i = 0;
-
-       int bound = 4;
-
-       for(; i < bound; ++i) {
-         target[i* 8] = src0[((char)permuters[i][0])/2]
-           + ((i + 1)%2  * scalars[0])
-           + (((i >> 1)^1) * scalars[1])
-           + (cntl2[i * 8] & scalars[2])
-           + (cntl3[i * 8] & scalars[3]);
-         target[i* 8 + 1] = src0[((char)permuters[i][1 * 2])/2]
-           + ((i + 1)%2  * scalars[0])
-           + (((i >> 1)^1) * scalars[1])
-           + (cntl2[i * 8 + 1] & scalars[2])
-           + (cntl3[i * 8 + 1] & scalars[3]);
-         target[i* 8 + 2] = src0[((char)permuters[i][2 * 2])/2]
-           + ((i + 1)%2  * scalars[0])
-           + (((i >> 1)^1) * scalars[1])
-           + (cntl2[i * 8 + 2] & scalars[2])
-           + (cntl3[i * 8 + 2] & scalars[3]);
-         target[i* 8 + 3] = src0[((char)permuters[i][3 * 2])/2]
-           + ((i + 1)%2  * scalars[0])
-           + (((i >> 1)^1) * scalars[1])
-           + (cntl2[i * 8 + 3] & scalars[2])
-           + (cntl3[i * 8 + 3] & scalars[3]);
-         target[i* 8 + 4] = src0[((char)permuters[i][4 * 2])/2]
-           + ((i + 1)%2  * scalars[0])
-           + (((i >> 1)^1) * scalars[1])
-           + (cntl2[i * 8 + 4] & scalars[2])
-           + (cntl3[i * 8 + 4] & scalars[3]);
-         target[i* 8 + 5] = src0[((char)permuters[i][5 * 2])/2]
-           + ((i + 1)%2  * scalars[0])
-           + (((i >> 1)^1) * scalars[1])
-           + (cntl2[i * 8 + 5] & scalars[2])
-           + (cntl3[i * 8 + 5] & scalars[3]);
-         target[i* 8 + 6] = src0[((char)permuters[i][6 * 2])/2]
-           + ((i + 1)%2  * scalars[0])
-           + (((i >> 1)^1) * scalars[1])
-           + (cntl2[i * 8 + 6] & scalars[2])
-           + (cntl3[i * 8 + 6] & scalars[3]);
-         target[i* 8 + 7] = src0[((char)permuters[i][7 * 2])/2]
-           + ((i + 1)%2  * scalars[0])
-           + (((i >> 1)^1) * scalars[1])
-           + (cntl2[i * 8 + 7] & scalars[2])
-           + (cntl3[i * 8 + 7] & scalars[3]);
-
-       }
-}
-
-#endif /*LV_HAVE_GENERIC*/
-
-
-#endif /*INCLUDED_volk_16i_branch_4_state_8_a_H*/
diff --git a/volk/kernels/volk/volk_16i_convert_8i.h 
b/volk/kernels/volk/volk_16i_convert_8i.h
deleted file mode 100644
index 6f16fa4..0000000
--- a/volk/kernels/volk/volk_16i_convert_8i.h
+++ /dev/null
@@ -1,199 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_16i_convert_8i_u_H
-#define INCLUDED_volk_16i_convert_8i_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-/*!
-  \brief Converts the input 16 bit integer data into 8 bit integer data
-  \param inputVector The 16 bit input data buffer
-  \param outputVector The 8 bit output data buffer
-  \param num_points The number of data values to be converted
-  \note Input and output buffers do NOT need to be properly aligned
-*/
-static inline void volk_16i_convert_8i_u_sse2(int8_t* outputVector, const 
int16_t* inputVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int sixteenthPoints = num_points / 16;
-
-     int8_t* outputVectorPtr = outputVector;
-    int16_t* inputPtr = (int16_t*)inputVector;
-    __m128i inputVal1;
-    __m128i inputVal2;
-    __m128i ret;
-
-    for(;number < sixteenthPoints; number++){
-
-      // Load the 16 values
-      inputVal1 = _mm_loadu_si128((__m128i*)inputPtr); inputPtr += 8;
-      inputVal2 = _mm_loadu_si128((__m128i*)inputPtr); inputPtr += 8;
-
-      inputVal1 = _mm_srai_epi16(inputVal1, 8);
-      inputVal2 = _mm_srai_epi16(inputVal2, 8);
-
-      ret = _mm_packs_epi16(inputVal1, inputVal2);
-
-      _mm_storeu_si128((__m128i*)outputVectorPtr, ret);
-
-      outputVectorPtr += 16;
-    }
-
-    number = sixteenthPoints * 16;
-    for(; number < num_points; number++){
-      outputVector[number] =(int8_t)(inputVector[number] >> 8);
-    }
-}
-#endif /* LV_HAVE_SSE2 */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Converts the input 16 bit integer data into 8 bit integer data
-  \param inputVector The 16 bit input data buffer
-  \param outputVector The 8 bit output data buffer
-  \param num_points The number of data values to be converted
-  \note Input and output buffers do NOT need to be properly aligned
-*/
-static inline void volk_16i_convert_8i_generic(int8_t* outputVector, const 
int16_t* inputVector, unsigned int num_points){
-  int8_t* outputVectorPtr = outputVector;
-  const int16_t* inputVectorPtr = inputVector;
-  unsigned int number = 0;
-
-  for(number = 0; number < num_points; number++){
-    *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++  >> 8));
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_16i_convert_8i_u_H */
-#ifndef INCLUDED_volk_16i_convert_8i_a_H
-#define INCLUDED_volk_16i_convert_8i_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-/*!
-  \brief Converts the input 16 bit integer data into 8 bit integer data
-  \param inputVector The 16 bit input data buffer
-  \param outputVector The 8 bit output data buffer
-  \param num_points The number of data values to be converted
-*/
-static inline void volk_16i_convert_8i_a_sse2(int8_t* outputVector, const 
int16_t* inputVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int sixteenthPoints = num_points / 16;
-
-     int8_t* outputVectorPtr = outputVector;
-    int16_t* inputPtr = (int16_t*)inputVector;
-    __m128i inputVal1;
-    __m128i inputVal2;
-    __m128i ret;
-
-    for(;number < sixteenthPoints; number++){
-
-      // Load the 16 values
-      inputVal1 = _mm_load_si128((__m128i*)inputPtr); inputPtr += 8;
-      inputVal2 = _mm_load_si128((__m128i*)inputPtr); inputPtr += 8;
-
-      inputVal1 = _mm_srai_epi16(inputVal1, 8);
-      inputVal2 = _mm_srai_epi16(inputVal2, 8);
-
-      ret = _mm_packs_epi16(inputVal1, inputVal2);
-
-      _mm_store_si128((__m128i*)outputVectorPtr, ret);
-
-      outputVectorPtr += 16;
-    }
-
-    number = sixteenthPoints * 16;
-    for(; number < num_points; number++){
-      outputVector[number] =(int8_t)(inputVector[number] >> 8);
-    }
-}
-#endif /* LV_HAVE_SSE2 */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-/*!
-  \brief Converts the input 16 bit integer data into 8 bit integer data
-  \param inputVector The 16 bit input data buffer
-  \param outputVector The 8 bit output data buffer
-  \param num_points The number of data values to be converted
-*/
-static inline void volk_16i_convert_8i_neon(int8_t* outputVector, const 
int16_t* inputVector, unsigned int num_points){
-  int8_t* outputVectorPtr = outputVector;
-  const int16_t* inputVectorPtr = inputVector;
-  unsigned int number = 0;
-  unsigned int sixteenth_points = num_points / 16;
-
-  int16x8_t inputVal0;
-  int16x8_t inputVal1;
-  int8x8_t outputVal0;
-  int8x8_t outputVal1;
-  int8x16_t outputVal;
-  
-  for(number = 0; number < sixteenth_points; number++){
-    // load two input vectors
-    inputVal0 = vld1q_s16(inputVectorPtr);
-    inputVal1 = vld1q_s16(inputVectorPtr+8);
-    // shift right
-    outputVal0 = vshrn_n_s16(inputVal0, 8);
-    outputVal1 = vshrn_n_s16(inputVal1, 8);
-    // squash two vectors and write output
-    outputVal = vcombine_s8(outputVal0, outputVal1);
-    vst1q_s8(outputVectorPtr, outputVal);
-    inputVectorPtr += 16;
-    outputVectorPtr += 16;
-  }
-
-  for(number = sixteenth_points * 16; number < num_points; number++){
-    *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++ >> 8));
-  }
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Converts the input 16 bit integer data into 8 bit integer data
-  \param inputVector The 16 bit input data buffer
-  \param outputVector The 8 bit output data buffer
-  \param num_points The number of data values to be converted
-*/
-static inline void volk_16i_convert_8i_a_generic(int8_t* outputVector, const 
int16_t* inputVector, unsigned int num_points){
-  int8_t* outputVectorPtr = outputVector;
-  const int16_t* inputVectorPtr = inputVector;
-  unsigned int number = 0;
-
-  for(number = 0; number < num_points; number++){
-    *outputVectorPtr++ = ((int8_t)(*inputVectorPtr++ >> 8));
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#endif /* INCLUDED_volk_16i_convert_8i_a_H */
diff --git a/volk/kernels/volk/volk_16i_max_star_16i.h 
b/volk/kernels/volk/volk_16i_max_star_16i.h
deleted file mode 100644
index e81a91a..0000000
--- a/volk/kernels/volk/volk_16i_max_star_16i.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_16i_max_star_16i_a_H
-#define INCLUDED_volk_16i_max_star_16i_a_H
-
-
-#include<inttypes.h>
-#include<stdio.h>
-
-
-#ifdef LV_HAVE_SSSE3
-
-#include<xmmintrin.h>
-#include<emmintrin.h>
-#include<tmmintrin.h>
-
-static inline  void volk_16i_max_star_16i_a_ssse3(short* target, short* src0, 
unsigned int num_points) {
-
-  const unsigned int num_bytes = num_points*2;
-
-  short candidate = src0[0];
-  short cands[8];
-  __m128i xmm0, xmm1, xmm3, xmm4, xmm5, xmm6;
-
-
-  __m128i *p_src0;
-
-  p_src0 = (__m128i*)src0;
-
-  int bound = num_bytes >> 4;
-  int leftovers = (num_bytes >> 1) & 7;
-
-  int i = 0;
-
-
-  xmm1 = _mm_setzero_si128();
-  xmm0 = _mm_setzero_si128();
-  //_mm_insert_epi16(xmm0, candidate, 0);
-
-  xmm0 = _mm_shuffle_epi8(xmm0, xmm1);
-
-
-  for(i = 0; i < bound; ++i) {
-    xmm1 = _mm_load_si128(p_src0);
-    p_src0 += 1;
-    //xmm2 = _mm_sub_epi16(xmm1, xmm0);
-
-
-
-
-
-
-    xmm3 = _mm_cmpgt_epi16(xmm0, xmm1);
-    xmm4 = _mm_cmpeq_epi16(xmm0, xmm1);
-    xmm5 = _mm_cmpgt_epi16(xmm1, xmm0);
-
-    xmm6 = _mm_xor_si128(xmm4, xmm5);
-
-    xmm3 = _mm_and_si128(xmm3, xmm0);
-    xmm4 = _mm_and_si128(xmm6, xmm1);
-
-    xmm0 = _mm_add_epi16(xmm3, xmm4);
-
-
-  }
-
-  _mm_store_si128((__m128i*)cands, xmm0);
-
-  for(i = 0; i < 8; ++i) {
-    candidate = ((short)(candidate - cands[i]) > 0) ? candidate : cands[i];
-  }
-
-
-
-  for(i = 0; i < leftovers; ++i) {
-
-    candidate = ((short)(candidate - src0[(bound << 3) + i]) > 0) ? candidate 
: src0[(bound << 3) + i];
-  }
-
-  target[0] = candidate;
-
-
-
-
-
-}
-
-#endif /*LV_HAVE_SSSE3*/
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-static inline void volk_16i_max_star_16i_neon(short* target, short* src0, 
unsigned int num_points) {
-    const unsigned int eighth_points = num_points / 8;
-    unsigned number;
-    int16x8_t input_vec;
-    int16x8_t diff, zeros;
-    uint16x8_t comp1, comp2;
-    zeros = veorq_s16(zeros, zeros);
-
-    int16x8x2_t tmpvec;
-
-    int16x8_t candidate_vec = vld1q_dup_s16(src0 );
-    short candidate;
-    ++src0;
-
-    for(number=0; number < eighth_points; ++number) {
-        input_vec = vld1q_s16(src0);
-        __builtin_prefetch(src0+16);
-        diff = vsubq_s16(candidate_vec, input_vec);
-        comp1 = vcgeq_s16(diff, zeros);
-        comp2 = vcltq_s16(diff, zeros);
-
-        tmpvec.val[0] = vandq_s16(candidate_vec, (int16x8_t)comp1);
-        tmpvec.val[1] = vandq_s16(input_vec, (int16x8_t)comp2);
-
-        candidate_vec = vaddq_s16(tmpvec.val[0], tmpvec.val[1]);
-        src0 += 8;
-    }
-        vst1q_s16(&candidate, candidate_vec);
-
-    for(number=0; number < num_points%8; number++) {
-        candidate = ((int16_t)(candidate - src0[number]) > 0) ? candidate : 
src0[number];
-    }
-    target[0] = candidate;
-}
-#endif /*LV_HAVE_NEON*/
-
-#ifdef LV_HAVE_GENERIC
-
-static inline void volk_16i_max_star_16i_generic(short* target, short* src0, 
unsigned int num_points) {
-
-       const unsigned int num_bytes = num_points*2;
-
-       int i = 0;
-
-       int bound = num_bytes >> 1;
-
-       short candidate = src0[0];
-       for(i = 1; i < bound; ++i) {
-         candidate = ((short)(candidate - src0[i]) > 0) ? candidate : src0[i];
-       }
-       target[0] = candidate;
-
-}
-
-
-#endif /*LV_HAVE_GENERIC*/
-
-
-#endif /*INCLUDED_volk_16i_max_star_16i_a_H*/
diff --git a/volk/kernels/volk/volk_16i_max_star_horizontal_16i.h 
b/volk/kernels/volk/volk_16i_max_star_horizontal_16i.h
deleted file mode 100644
index b671929..0000000
--- a/volk/kernels/volk/volk_16i_max_star_horizontal_16i.h
+++ /dev/null
@@ -1,190 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_16i_max_star_horizontal_16i_a_H
-#define INCLUDED_volk_16i_max_star_horizontal_16i_a_H
-
-#include <volk/volk_common.h>
-
-#include<inttypes.h>
-#include<stdio.h>
-
-
-#ifdef LV_HAVE_SSSE3
-
-#include<xmmintrin.h>
-#include<emmintrin.h>
-#include<tmmintrin.h>
-
-static inline  void volk_16i_max_star_horizontal_16i_a_ssse3(int16_t* target, 
int16_t* src0, unsigned int num_points) {
-
-  const unsigned int num_bytes = num_points*2;
-
-  const static uint8_t shufmask0[16] = {0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 
0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
-  const static uint8_t shufmask1[16] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 
0xff, 0xff, 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d};
-  const static uint8_t andmask0[16] = {0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 
0x02,0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
-  const static uint8_t andmask1[16] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02};
-
-
-
-  __m128i xmm0, xmm1, xmm2, xmm3, xmm4;
-  __m128i  xmm5, xmm6, xmm7, xmm8;
-
-  xmm4 = _mm_load_si128((__m128i*)shufmask0);
-  xmm5 = _mm_load_si128((__m128i*)shufmask1);
-  xmm6 = _mm_load_si128((__m128i*)andmask0);
-  xmm7 = _mm_load_si128((__m128i*)andmask1);
-
-  __m128i *p_target, *p_src0;
-
-  p_target = (__m128i*)target;
-  p_src0 = (__m128i*)src0;
-
-  int bound = num_bytes >> 5;
-  int intermediate = (num_bytes >> 4) & 1;
-  int leftovers = (num_bytes >> 1) & 7;
-
-  int i = 0;
-
-
-  for(i = 0; i < bound; ++i) {
-
-    xmm0 = _mm_load_si128(p_src0);
-    xmm1 = _mm_load_si128(&p_src0[1]);
-
-
-
-    xmm2 = _mm_xor_si128(xmm2, xmm2);
-    p_src0 += 2;
-
-    xmm3 = _mm_hsub_epi16(xmm0, xmm1);
-
-    xmm2 = _mm_cmpgt_epi16(xmm2, xmm3);
-
-    xmm8 = _mm_and_si128(xmm2, xmm6);
-    xmm3 = _mm_and_si128(xmm2, xmm7);
-
-
-    xmm8 = _mm_add_epi8(xmm8, xmm4);
-    xmm3 = _mm_add_epi8(xmm3, xmm5);
-
-    xmm0 = _mm_shuffle_epi8(xmm0, xmm8);
-    xmm1 = _mm_shuffle_epi8(xmm1, xmm3);
-
-
-    xmm3 = _mm_add_epi16(xmm0, xmm1);
-
-
-    _mm_store_si128(p_target, xmm3);
-
-    p_target += 1;
-
-  }
-
-  for(i = 0; i < intermediate; ++i) {
-
-    xmm0 = _mm_load_si128(p_src0);
-
-
-    xmm2 = _mm_xor_si128(xmm2, xmm2);
-    p_src0 += 1;
-
-    xmm3 = _mm_hsub_epi16(xmm0, xmm1);
-    xmm2 = _mm_cmpgt_epi16(xmm2, xmm3);
-
-    xmm8 = _mm_and_si128(xmm2, xmm6);
-
-    xmm3 = _mm_add_epi8(xmm8, xmm4);
-
-    xmm0 = _mm_shuffle_epi8(xmm0, xmm3);
-
-    _mm_storel_pd((double*)p_target, bit128_p(&xmm0)->double_vec);
-
-    p_target = (__m128i*)((int8_t*)p_target + 8);
-
-  }
-
-  for(i = (bound << 4) + (intermediate << 3); i < (bound << 4) + (intermediate 
<< 3) + leftovers ; i += 2) {
-    target[i>>1] = ((int16_t)(src0[i] - src0[i + 1]) > 0) ? src0[i] : src0[i + 
1];
-  }
-
-
-}
-
-#endif /*LV_HAVE_SSSE3*/
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-static inline void volk_16i_max_star_horizontal_16i_neon(int16_t* target, 
int16_t* src0, unsigned int num_points) {
-    const unsigned int eighth_points = num_points / 16;
-    unsigned number;
-    int16x8x2_t input_vec;
-    int16x8_t diff, max_vec, zeros;
-    uint16x8_t comp1, comp2;
-    zeros = veorq_s16(zeros, zeros);
-    for(number=0; number < eighth_points; ++number) {
-        input_vec = vld2q_s16(src0);
-        //__builtin_prefetch(src0+16);
-        diff = vsubq_s16(input_vec.val[0], input_vec.val[1]);
-        comp1 = vcgeq_s16(diff, zeros);
-        comp2 = vcltq_s16(diff, zeros);
-
-        input_vec.val[0] = vandq_s16(input_vec.val[0], (int16x8_t)comp1);
-        input_vec.val[1] = vandq_s16(input_vec.val[1], (int16x8_t)comp2);
-
-        max_vec = vaddq_s16(input_vec.val[0], input_vec.val[1]);
-        vst1q_s16(target, max_vec);
-        src0 += 16;
-        target += 8;
-    }
-    for(number=0; number < num_points%16; number+=2) {
-        target[number >> 1] = ((int16_t)(src0[number] - src0[number + 1]) > 0) 
? src0[number] : src0[number+1];
-    }
-
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_NEON
-extern void volk_16i_max_star_horizontal_16i_neonasm(int16_t* target, int16_t* 
src0, unsigned int num_points);
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_GENERIC
-static inline void volk_16i_max_star_horizontal_16i_generic(int16_t* target, 
int16_t* src0, unsigned int num_points) {
-
-       const unsigned int num_bytes = num_points*2;
-
-       int i = 0;
-
-       int bound = num_bytes >> 1;
-
-
-       for(i = 0; i < bound; i += 2) {
-         target[i >> 1] = ((int16_t) (src0[i] - src0[i + 1]) > 0) ? src0[i] : 
src0[i+1];
-       }
-
-}
-
-
-
-#endif /*LV_HAVE_GENERIC*/
-
-#endif /*INCLUDED_volk_16i_max_star_horizontal_16i_a_H*/
diff --git a/volk/kernels/volk/volk_16i_permute_and_scalar_add.h 
b/volk/kernels/volk/volk_16i_permute_and_scalar_add.h
deleted file mode 100644
index e6f20b7..0000000
--- a/volk/kernels/volk/volk_16i_permute_and_scalar_add.h
+++ /dev/null
@@ -1,164 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_16i_permute_and_scalar_add_a_H
-#define INCLUDED_volk_16i_permute_and_scalar_add_a_H
-
-
-#include<inttypes.h>
-#include<stdio.h>
-
-
-
-
-#ifdef LV_HAVE_SSE2
-
-#include<xmmintrin.h>
-#include<emmintrin.h>
-
-static inline  void volk_16i_permute_and_scalar_add_a_sse2(short* target,  
short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, 
short* cntl3, short* scalars, unsigned int num_points) {
-
-  const unsigned int num_bytes = num_points*2;
-
-  __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
-
-  __m128i *p_target, *p_cntl0, *p_cntl1, *p_cntl2, *p_cntl3, *p_scalars;
-
-  short* p_permute_indexes = permute_indexes;
-
-  p_target = (__m128i*)target;
-  p_cntl0 = (__m128i*)cntl0;
-  p_cntl1 = (__m128i*)cntl1;
-  p_cntl2 = (__m128i*)cntl2;
-  p_cntl3 = (__m128i*)cntl3;
-  p_scalars = (__m128i*)scalars;
-
-  int i = 0;
-
-  int bound = (num_bytes >> 4);
-  int leftovers = (num_bytes >> 1) & 7;
-
-  xmm0 = _mm_load_si128(p_scalars);
-
-  xmm1 = _mm_shufflelo_epi16(xmm0, 0);
-  xmm2 = _mm_shufflelo_epi16(xmm0, 0x55);
-  xmm3 = _mm_shufflelo_epi16(xmm0, 0xaa);
-  xmm4 = _mm_shufflelo_epi16(xmm0, 0xff);
-
-  xmm1 = _mm_shuffle_epi32(xmm1, 0x00);
-  xmm2 = _mm_shuffle_epi32(xmm2, 0x00);
-  xmm3 = _mm_shuffle_epi32(xmm3, 0x00);
-  xmm4 = _mm_shuffle_epi32(xmm4, 0x00);
-
-
-  for(; i < bound; ++i) {
-    xmm0 = _mm_setzero_si128();
-    xmm5 = _mm_setzero_si128();
-    xmm6 = _mm_setzero_si128();
-    xmm7 = _mm_setzero_si128();
-
-    xmm0 = _mm_insert_epi16(xmm0, src0[p_permute_indexes[0]], 0);
-    xmm5 = _mm_insert_epi16(xmm5, src0[p_permute_indexes[1]], 1);
-    xmm6 = _mm_insert_epi16(xmm6, src0[p_permute_indexes[2]], 2);
-    xmm7 = _mm_insert_epi16(xmm7, src0[p_permute_indexes[3]], 3);
-    xmm0 = _mm_insert_epi16(xmm0, src0[p_permute_indexes[4]], 4);
-    xmm5 = _mm_insert_epi16(xmm5, src0[p_permute_indexes[5]], 5);
-    xmm6 = _mm_insert_epi16(xmm6, src0[p_permute_indexes[6]], 6);
-    xmm7 = _mm_insert_epi16(xmm7, src0[p_permute_indexes[7]], 7);
-
-    xmm0 = _mm_add_epi16(xmm0, xmm5);
-    xmm6 = _mm_add_epi16(xmm6, xmm7);
-
-    p_permute_indexes += 8;
-
-    xmm0 = _mm_add_epi16(xmm0, xmm6);
-
-    xmm5 = _mm_load_si128(p_cntl0);
-    xmm6 = _mm_load_si128(p_cntl1);
-    xmm7 = _mm_load_si128(p_cntl2);
-
-    xmm5 = _mm_and_si128(xmm5, xmm1);
-    xmm6 = _mm_and_si128(xmm6, xmm2);
-    xmm7 = _mm_and_si128(xmm7, xmm3);
-
-    xmm0 = _mm_add_epi16(xmm0, xmm5);
-
-    xmm5 = _mm_load_si128(p_cntl3);
-
-    xmm6 = _mm_add_epi16(xmm6, xmm7);
-
-    p_cntl0 += 1;
-
-    xmm5 = _mm_and_si128(xmm5, xmm4);
-
-    xmm0 = _mm_add_epi16(xmm0, xmm6);
-
-    p_cntl1 += 1;
-    p_cntl2 += 1;
-
-    xmm0 = _mm_add_epi16(xmm0, xmm5);
-
-    p_cntl3 += 1;
-
-    _mm_store_si128(p_target, xmm0);
-
-    p_target += 1;
-  }
-
-
-
-
-
-  for(i = bound * 8; i < (bound * 8) + leftovers; ++i) {
-    target[i] = src0[permute_indexes[i]]
-      + (cntl0[i] & scalars[0])
-      + (cntl1[i] & scalars[1])
-      + (cntl2[i] & scalars[2])
-      + (cntl3[i] & scalars[3]);
-  }
-}
-#endif /*LV_HAVE_SSEs*/
-
-
-#ifdef LV_HAVE_GENERIC
-static inline void volk_16i_permute_and_scalar_add_generic(short* target, 
short* src0, short* permute_indexes, short* cntl0, short* cntl1, short* cntl2, 
short* cntl3, short* scalars, unsigned int num_points) {
-
-       const unsigned int num_bytes = num_points*2;
-
-       int i = 0;
-
-       int bound = num_bytes >> 1;
-
-       for(i = 0; i < bound; ++i) {
-               target[i] = src0[permute_indexes[i]]
-                       + (cntl0[i] & scalars[0])
-                       + (cntl1[i] & scalars[1])
-                       + (cntl2[i] & scalars[2])
-                       + (cntl3[i] & scalars[3]);
-
-       }
-}
-
-#endif /*LV_HAVE_GENERIC*/
-
-
-#endif /*INCLUDED_volk_16i_permute_and_scalar_add_a_H*/
diff --git a/volk/kernels/volk/volk_16i_s32f_convert_32f.h 
b/volk/kernels/volk/volk_16i_s32f_convert_32f.h
deleted file mode 100644
index 6ea28f0..0000000
--- a/volk/kernels/volk/volk_16i_s32f_convert_32f.h
+++ /dev/null
@@ -1,423 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_16i_s32f_convert_32f_u_H
-#define INCLUDED_volk_16i_s32f_convert_32f_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-
-  /*!
-    \brief Converts the input 16 bit integer data into floating point data, 
and divides the each floating point output data point by the scalar value
-    \param inputVector The 16 bit input data buffer
-    \param outputVector The floating point output data buffer
-    \param scalar The value divided against each point in the output buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_16i_s32f_convert_32f_u_avx(float* outputVector, const 
int16_t* inputVector, const float scalar, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int eighthPoints = num_points / 8;
-
-     float* outputVectorPtr = outputVector;
-    __m128 invScalar = _mm_set_ps1(1.0/scalar);
-    int16_t* inputPtr = (int16_t*)inputVector;
-    __m128i inputVal, inputVal2;
-    __m128 ret;
-    __m256 output;
-    __m256 dummy = _mm256_setzero_ps();
-
-    for(;number < eighthPoints; number++){
-
-      // Load the 8 values
-      //inputVal = _mm_loadu_si128((__m128i*)inputPtr);
-      inputVal = _mm_loadu_si128((__m128i*)inputPtr);
-
-      // Shift the input data to the right by 64 bits ( 8 bytes )
-      inputVal2 = _mm_srli_si128(inputVal, 8);
-
-      // Convert the lower 4 values into 32 bit words
-      inputVal = _mm_cvtepi16_epi32(inputVal);
-      inputVal2 = _mm_cvtepi16_epi32(inputVal2);
-
-      ret = _mm_cvtepi32_ps(inputVal);
-      ret = _mm_mul_ps(ret, invScalar);
-      output = _mm256_insertf128_ps(dummy, ret, 0);
-
-      ret = _mm_cvtepi32_ps(inputVal2);
-      ret = _mm_mul_ps(ret, invScalar);
-      output = _mm256_insertf128_ps(output, ret, 1);
-
-      _mm256_storeu_ps(outputVectorPtr, output);
-
-      outputVectorPtr += 8;
-
-      inputPtr += 8;
-    }
-
-    number = eighthPoints * 8;
-    for(; number < num_points; number++){
-      outputVector[number] =((float)(inputVector[number])) / scalar;
-    }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-
-  /*!
-    \brief Converts the input 16 bit integer data into floating point data, 
and divides the each floating point output data point by the scalar value
-    \param inputVector The 16 bit input data buffer
-    \param outputVector The floating point output data buffer
-    \param scalar The value divided against each point in the output buffer
-    \param num_points The number of data values to be converted
-    \note Output buffer does NOT need to be properly aligned
-  */
-static inline void volk_16i_s32f_convert_32f_u_sse4_1(float* outputVector, 
const int16_t* inputVector, const float scalar, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int eighthPoints = num_points / 8;
-
-     float* outputVectorPtr = outputVector;
-    __m128 invScalar = _mm_set_ps1(1.0/scalar);
-    int16_t* inputPtr = (int16_t*)inputVector;
-    __m128i inputVal;
-    __m128i inputVal2;
-    __m128 ret;
-
-    for(;number < eighthPoints; number++){
-
-      // Load the 8 values
-      inputVal = _mm_loadu_si128((__m128i*)inputPtr);
-
-      // Shift the input data to the right by 64 bits ( 8 bytes )
-      inputVal2 = _mm_srli_si128(inputVal, 8);
-
-      // Convert the lower 4 values into 32 bit words
-      inputVal = _mm_cvtepi16_epi32(inputVal);
-      inputVal2 = _mm_cvtepi16_epi32(inputVal2);
-
-      ret = _mm_cvtepi32_ps(inputVal);
-      ret = _mm_mul_ps(ret, invScalar);
-      _mm_storeu_ps(outputVectorPtr, ret);
-      outputVectorPtr += 4;
-
-      ret = _mm_cvtepi32_ps(inputVal2);
-      ret = _mm_mul_ps(ret, invScalar);
-      _mm_storeu_ps(outputVectorPtr, ret);
-
-      outputVectorPtr += 4;
-
-      inputPtr += 8;
-    }
-
-    number = eighthPoints * 8;
-    for(; number < num_points; number++){
-      outputVector[number] =((float)(inputVector[number])) / scalar;
-    }
-}
-#endif /* LV_HAVE_SSE4_1 */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-
-  /*!
-    \brief Converts the input 16 bit integer data into floating point data, 
and divides the each floating point output data point by the scalar value
-    \param inputVector The 16 bit input data buffer
-    \param outputVector The floating point output data buffer
-    \param scalar The value divided against each point in the output buffer
-    \param num_points The number of data values to be converted
-    \note Output buffer does NOT need to be properly aligned
-  */
-static inline void volk_16i_s32f_convert_32f_u_sse(float* outputVector, const 
int16_t* inputVector, const float scalar, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    float* outputVectorPtr = outputVector;
-    __m128 invScalar = _mm_set_ps1(1.0/scalar);
-    int16_t* inputPtr = (int16_t*)inputVector;
-    __m128 ret;
-
-    for(;number < quarterPoints; number++){
-      ret = _mm_set_ps((float)(inputPtr[3]), (float)(inputPtr[2]), 
(float)(inputPtr[1]), (float)(inputPtr[0]));
-
-      ret = _mm_mul_ps(ret, invScalar);
-      _mm_storeu_ps(outputVectorPtr, ret);
-
-      inputPtr += 4;
-      outputVectorPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(; number < num_points; number++){
-      outputVector[number] = (float)(inputVector[number]) / scalar;
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Converts the input 16 bit integer data into floating point data, 
and divides the each floating point output data point by the scalar value
-    \param inputVector The 16 bit input data buffer
-    \param outputVector The floating point output data buffer
-    \param scalar The value divided against each point in the output buffer
-    \param num_points The number of data values to be converted
-    \note Output buffer does NOT need to be properly aligned
-  */
-static inline void volk_16i_s32f_convert_32f_generic(float* outputVector, 
const int16_t* inputVector, const float scalar, unsigned int num_points){
-  float* outputVectorPtr = outputVector;
-  const int16_t* inputVectorPtr = inputVector;
-  unsigned int number = 0;
-
-  for(number = 0; number < num_points; number++){
-    *outputVectorPtr++ = ((float)(*inputVectorPtr++)) / scalar;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_NEON
-  /*!
-    \brief Converts the input 16 bit integer data into floating point data, 
and divides the each floating point output data point by the scalar value
-    \param inputVector The 16 bit input data buffer
-    \param outputVector The floating point output data buffer
-    \param scalar The value divided against each point in the output buffer
-    \param num_points The number of data values to be converted
-    \note Output buffer does NOT need to be properly aligned
-  */
-static inline void volk_16i_s32f_convert_32f_neon(float* outputVector, const 
int16_t* inputVector, const float scalar, unsigned int num_points){
-  float* outputPtr = outputVector;
-  const int16_t* inputPtr = inputVector;
-  unsigned int number = 0;
-  unsigned int eighth_points = num_points / 8;
-
-  int16x4x2_t input16;
-  int32x4_t input32_0, input32_1;
-  float32x4_t input_float_0, input_float_1;
-  float32x4x2_t output_float;
-  float32x4_t inv_scale;
-
-  inv_scale = vdupq_n_f32(1.0/scalar);
-
-  // the generic disassembles to a 128-bit load
-  // and duplicates every instruction to operate on 64-bits
-  // at a time. This is only possible with lanes, which is faster
-  // than just doing a vld1_s16, but still slower.
-  for(number = 0; number < eighth_points; number++){
-    input16 = vld2_s16(inputPtr);
-    // widen 16-bit int to 32-bit int
-    input32_0 = vmovl_s16(input16.val[0]);
-    input32_1 = vmovl_s16(input16.val[1]);
-    // convert 32-bit int to float with scale
-    input_float_0 = vcvtq_f32_s32(input32_0);
-    input_float_1 = vcvtq_f32_s32(input32_1);
-    output_float.val[0] = vmulq_f32(input_float_0, inv_scale);
-    output_float.val[1] = vmulq_f32(input_float_1, inv_scale);
-    vst2q_f32(outputPtr, output_float);
-    inputPtr += 8;
-    outputPtr += 8;
-  }
-
-  for(number = eighth_points*8; number < num_points; number++){
-    *outputPtr++ = ((float)(*inputPtr++)) / scalar;
-  }
-}
-#endif /* LV_HAVE_NEON */
-
-
-#endif /* INCLUDED_volk_16i_s32f_convert_32f_u_H */
-#ifndef INCLUDED_volk_16i_s32f_convert_32f_a_H
-#define INCLUDED_volk_16i_s32f_convert_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-
-  /*!
-    \brief Converts the input 16 bit integer data into floating point data, 
and divides the each floating point output data point by the scalar value
-    \param inputVector The 16 bit input data buffer
-    \param outputVector The floating point output data buffer
-    \param scalar The value divided against each point in the output buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_16i_s32f_convert_32f_a_avx(float* outputVector, const 
int16_t* inputVector, const float scalar, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int eighthPoints = num_points / 8;
-
-     float* outputVectorPtr = outputVector;
-    __m128 invScalar = _mm_set_ps1(1.0/scalar);
-    int16_t* inputPtr = (int16_t*)inputVector;
-    __m128i inputVal, inputVal2;
-    __m128 ret;
-    __m256 output;
-    __m256 dummy = _mm256_setzero_ps();
-
-    for(;number < eighthPoints; number++){
-
-      // Load the 8 values
-      //inputVal = _mm_loadu_si128((__m128i*)inputPtr);
-      inputVal = _mm_load_si128((__m128i*)inputPtr);
-
-      // Shift the input data to the right by 64 bits ( 8 bytes )
-      inputVal2 = _mm_srli_si128(inputVal, 8);
-
-      // Convert the lower 4 values into 32 bit words
-      inputVal = _mm_cvtepi16_epi32(inputVal);
-      inputVal2 = _mm_cvtepi16_epi32(inputVal2);
-
-      ret = _mm_cvtepi32_ps(inputVal);
-      ret = _mm_mul_ps(ret, invScalar);
-      output = _mm256_insertf128_ps(dummy, ret, 0);
-
-      ret = _mm_cvtepi32_ps(inputVal2);
-      ret = _mm_mul_ps(ret, invScalar);
-      output = _mm256_insertf128_ps(output, ret, 1);
-
-      _mm256_store_ps(outputVectorPtr, output);
-
-      outputVectorPtr += 8;
-
-      inputPtr += 8;
-    }
-
-    number = eighthPoints * 8;
-    for(; number < num_points; number++){
-      outputVector[number] =((float)(inputVector[number])) / scalar;
-    }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-
-  /*!
-    \brief Converts the input 16 bit integer data into floating point data, 
and divides the each floating point output data point by the scalar value
-    \param inputVector The 16 bit input data buffer
-    \param outputVector The floating point output data buffer
-    \param scalar The value divided against each point in the output buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_16i_s32f_convert_32f_a_sse4_1(float* outputVector, 
const int16_t* inputVector, const float scalar, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int eighthPoints = num_points / 8;
-
-     float* outputVectorPtr = outputVector;
-    __m128 invScalar = _mm_set_ps1(1.0/scalar);
-    int16_t* inputPtr = (int16_t*)inputVector;
-    __m128i inputVal;
-    __m128i inputVal2;
-    __m128 ret;
-
-    for(;number < eighthPoints; number++){
-
-      // Load the 8 values
-      inputVal = _mm_loadu_si128((__m128i*)inputPtr);
-
-      // Shift the input data to the right by 64 bits ( 8 bytes )
-      inputVal2 = _mm_srli_si128(inputVal, 8);
-
-      // Convert the lower 4 values into 32 bit words
-      inputVal = _mm_cvtepi16_epi32(inputVal);
-      inputVal2 = _mm_cvtepi16_epi32(inputVal2);
-
-      ret = _mm_cvtepi32_ps(inputVal);
-      ret = _mm_mul_ps(ret, invScalar);
-      _mm_storeu_ps(outputVectorPtr, ret);
-      outputVectorPtr += 4;
-
-      ret = _mm_cvtepi32_ps(inputVal2);
-      ret = _mm_mul_ps(ret, invScalar);
-      _mm_storeu_ps(outputVectorPtr, ret);
-
-      outputVectorPtr += 4;
-
-      inputPtr += 8;
-    }
-
-    number = eighthPoints * 8;
-    for(; number < num_points; number++){
-      outputVector[number] =((float)(inputVector[number])) / scalar;
-    }
-}
-#endif /* LV_HAVE_SSE4_1 */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-
-  /*!
-    \brief Converts the input 16 bit integer data into floating point data, 
and divides the each floating point output data point by the scalar value
-    \param inputVector The 16 bit input data buffer
-    \param outputVector The floating point output data buffer
-    \param scalar The value divided against each point in the output buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_16i_s32f_convert_32f_a_sse(float* outputVector, const 
int16_t* inputVector, const float scalar, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    float* outputVectorPtr = outputVector;
-    __m128 invScalar = _mm_set_ps1(1.0/scalar);
-    int16_t* inputPtr = (int16_t*)inputVector;
-    __m128 ret;
-
-    for(;number < quarterPoints; number++){
-      ret = _mm_set_ps((float)(inputPtr[3]), (float)(inputPtr[2]), 
(float)(inputPtr[1]), (float)(inputPtr[0]));
-
-      ret = _mm_mul_ps(ret, invScalar);
-      _mm_storeu_ps(outputVectorPtr, ret);
-
-      inputPtr += 4;
-      outputVectorPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(; number < num_points; number++){
-      outputVector[number] = (float)(inputVector[number]) / scalar;
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Converts the input 16 bit integer data into floating point data, 
and divides the each floating point output data point by the scalar value
-    \param inputVector The 16 bit input data buffer
-    \param outputVector The floating point output data buffer
-    \param scalar The value divided against each point in the output buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_16i_s32f_convert_32f_a_generic(float* outputVector, 
const int16_t* inputVector, const float scalar, unsigned int num_points){
-  float* outputVectorPtr = outputVector;
-  const int16_t* inputVectorPtr = inputVector;
-  unsigned int number = 0;
-
-  for(number = 0; number < num_points; number++){
-    *outputVectorPtr++ = ((float)(*inputVectorPtr++)) / scalar;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_16i_s32f_convert_32f_a_H */
diff --git a/volk/kernels/volk/volk_16i_x4_quad_max_star_16i.h 
b/volk/kernels/volk/volk_16i_x4_quad_max_star_16i.h
deleted file mode 100644
index 4c67ef4..0000000
--- a/volk/kernels/volk/volk_16i_x4_quad_max_star_16i.h
+++ /dev/null
@@ -1,274 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_16i_x4_quad_max_star_16i_a_H
-#define INCLUDED_volk_16i_x4_quad_max_star_16i_a_H
-
-
-#include<inttypes.h>
-#include<stdio.h>
-
-
-
-
-
-#ifdef LV_HAVE_SSE2
-
-#include<emmintrin.h>
-
-static inline  void volk_16i_x4_quad_max_star_16i_a_sse2(short* target, short* 
src0, short* src1, short* src2, short* src3, unsigned int num_points) {
-
-       const unsigned int num_bytes = num_points*2;
-
-       int i = 0;
-
-       int bound = (num_bytes >> 4);
-       int bound_copy = bound;
-       int leftovers = (num_bytes >> 1) & 7;
-
-       __m128i *p_target, *p_src0, *p_src1, *p_src2, *p_src3;
-       p_target = (__m128i*) target;
-       p_src0 =  (__m128i*)src0;
-       p_src1 =  (__m128i*)src1;
-       p_src2 =  (__m128i*)src2;
-       p_src3 =  (__m128i*)src3;
-
-
-
-       __m128i xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
-
-       while(bound_copy > 0) {
-
-         xmm1 = _mm_load_si128(p_src0);
-         xmm2 = _mm_load_si128(p_src1);
-         xmm3 = _mm_load_si128(p_src2);
-         xmm4 = _mm_load_si128(p_src3);
-
-         xmm5 = _mm_setzero_si128();
-         xmm6 = _mm_setzero_si128();
-         xmm7 = xmm1;
-         xmm8 = xmm3;
-
-
-         xmm1 = _mm_sub_epi16(xmm2, xmm1);
-
-
-
-         xmm3 = _mm_sub_epi16(xmm4, xmm3);
-
-         xmm5 = _mm_cmpgt_epi16(xmm1, xmm5);
-         xmm6 = _mm_cmpgt_epi16(xmm3, xmm6);
-
-
-
-         xmm2 = _mm_and_si128(xmm5, xmm2);
-         xmm4 = _mm_and_si128(xmm6, xmm4);
-         xmm5 = _mm_andnot_si128(xmm5, xmm7);
-         xmm6 = _mm_andnot_si128(xmm6, xmm8);
-
-         xmm5 = _mm_add_epi16(xmm2, xmm5);
-         xmm6 = _mm_add_epi16(xmm4, xmm6);
-
-
-         xmm1 = _mm_xor_si128(xmm1, xmm1);
-         xmm2 = xmm5;
-         xmm5 = _mm_sub_epi16(xmm6, xmm5);
-         p_src0 += 1;
-         bound_copy -= 1;
-
-         xmm1 = _mm_cmpgt_epi16(xmm5, xmm1);
-         p_src1 += 1;
-
-         xmm6 = _mm_and_si128(xmm1, xmm6);
-
-         xmm1 = _mm_andnot_si128(xmm1, xmm2);
-         p_src2 += 1;
-
-
-
-         xmm1 = _mm_add_epi16(xmm6, xmm1);
-         p_src3 += 1;
-
-
-         _mm_store_si128(p_target, xmm1);
-         p_target += 1;
-
-       }
-
-
-       /*asm volatile
-               (
-                "volk_16i_x4_quad_max_star_16i_a_sse2_L1:\n\t"
-                "cmp $0, %[bound]\n\t"
-                "je volk_16i_x4_quad_max_star_16i_a_sse2_END\n\t"
-
-                "movaps (%[src0]), %%xmm1\n\t"
-                "movaps (%[src1]), %%xmm2\n\t"
-                "movaps (%[src2]), %%xmm3\n\t"
-                "movaps (%[src3]), %%xmm4\n\t"
-
-                "pxor %%xmm5, %%xmm5\n\t"
-                "pxor %%xmm6, %%xmm6\n\t"
-                "movaps %%xmm1, %%xmm7\n\t"
-                "movaps %%xmm3, %%xmm8\n\t"
-                "psubw %%xmm2, %%xmm1\n\t"
-                "psubw %%xmm4, %%xmm3\n\t"
-
-                "pcmpgtw %%xmm1, %%xmm5\n\t"
-                "pcmpgtw %%xmm3, %%xmm6\n\t"
-
-                "pand %%xmm5, %%xmm2\n\t"
-                "pand %%xmm6, %%xmm4\n\t"
-                "pandn %%xmm7, %%xmm5\n\t"
-                "pandn %%xmm8, %%xmm6\n\t"
-
-                "paddw %%xmm2, %%xmm5\n\t"
-                "paddw %%xmm4, %%xmm6\n\t"
-
-                "pxor %%xmm1, %%xmm1\n\t"
-                "movaps %%xmm5, %%xmm2\n\t"
-
-                "psubw %%xmm6, %%xmm5\n\t"
-                "add $16, %[src0]\n\t"
-                "add $-1, %[bound]\n\t"
-
-                "pcmpgtw %%xmm5, %%xmm1\n\t"
-                "add $16, %[src1]\n\t"
-
-                "pand %%xmm1, %%xmm6\n\t"
-
-                "pandn %%xmm2, %%xmm1\n\t"
-                "add $16, %[src2]\n\t"
-
-                "paddw %%xmm6, %%xmm1\n\t"
-                "add $16, %[src3]\n\t"
-
-                "movaps %%xmm1, (%[target])\n\t"
-                "addw $16, %[target]\n\t"
-                "jmp volk_16i_x4_quad_max_star_16i_a_sse2_L1\n\t"
-
-                "volk_16i_x4_quad_max_star_16i_a_sse2_END:\n\t"
-                :
-                :[bound]"r"(bound), [src0]"r"(src0), [src1]"r"(src1), 
[src2]"r"(src2), [src3]"r"(src3), [target]"r"(target)
-                :
-                );
-       */
-
-       short temp0 = 0;
-       short temp1 = 0;
-       for(i = bound * 8; i < (bound * 8) + leftovers; ++i) {
-         temp0 = ((short)(src0[i] - src1[i]) > 0) ? src0[i] : src1[i];
-         temp1 = ((short)(src2[i] - src3[i])>0) ? src2[i] : src3[i];
-         target[i] = ((short)(temp0 - temp1)>0) ? temp0 : temp1;
-       }
-       return;
-
-
-}
-
-#endif /*LV_HAVE_SSE2*/
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-static inline void volk_16i_x4_quad_max_star_16i_neon(short* target, short* 
src0, short* src1, short* src2, short* src3, unsigned int num_points) {
-    const unsigned int eighth_points = num_points / 8;
-    unsigned i;
-
-    int16x8_t src0_vec, src1_vec, src2_vec, src3_vec;
-    int16x8_t diff12, diff34;
-    int16x8_t comp0, comp1, comp2, comp3;
-    int16x8_t result1_vec, result2_vec;
-    int16x8_t zeros;
-    zeros = veorq_s16(zeros, zeros);
-    for(i=0; i < eighth_points; ++i) {
-        src0_vec = vld1q_s16(src0);
-        src1_vec = vld1q_s16(src1);
-        src2_vec = vld1q_s16(src2);
-        src3_vec = vld1q_s16(src3);
-        diff12 = vsubq_s16(src0_vec, src1_vec);
-        diff34  = vsubq_s16(src2_vec, src3_vec);
-        comp0 = (int16x8_t)vcgeq_s16(diff12, zeros);
-        comp1 = (int16x8_t)vcltq_s16(diff12, zeros);
-        comp2 = (int16x8_t)vcgeq_s16(diff34, zeros);
-        comp3 = (int16x8_t)vcltq_s16(diff34, zeros);
-        comp0 = vandq_s16(src0_vec, comp0);
-        comp1 = vandq_s16(src1_vec, comp1);
-        comp2 = vandq_s16(src2_vec, comp2);
-        comp3 = vandq_s16(src3_vec, comp3);
-
-        result1_vec = vaddq_s16(comp0, comp1);
-        result2_vec = vaddq_s16(comp2, comp3);
-
-        diff12 = vsubq_s16(result1_vec, result2_vec);
-        comp0 = (int16x8_t)vcgeq_s16(diff12, zeros);
-        comp1 = (int16x8_t)vcltq_s16(diff12, zeros);
-        comp0 = vandq_s16(result1_vec, comp0);
-        comp1 = vandq_s16(result2_vec, comp1);
-        result1_vec = vaddq_s16(comp0, comp1);
-        vst1q_s16(target, result1_vec);
-    src0 += 8;
-    src1 += 8;
-    src2 += 8;
-    src3 += 8;
-    target += 8;
-    }
-
-
-    short temp0 = 0;
-    short temp1 = 0;
-    for(i=eighth_points*8; i < num_points; ++i) {
-      temp0 = ((short)(*src0 - *src1) > 0) ? *src0 : *src1;
-      temp1 = ((short)(*src2 - *src3) > 0) ? *src2 : *src3;
-      *target++ = ((short)(temp0 - temp1)>0) ? temp0 : temp1;
-    src0++;
-    src1++;
-    src2++;
-    src3++;
-    }
-}
-#endif /* LV_HAVE_NEON */
-
-
-#ifdef LV_HAVE_GENERIC
-static inline void volk_16i_x4_quad_max_star_16i_generic(short* target, short* 
src0, short* src1, short* src2, short* src3, unsigned int num_points) {
-
-       const unsigned int num_bytes = num_points*2;
-
-       int i = 0;
-
-       int bound = num_bytes >> 1;
-
-       short temp0 = 0;
-       short temp1 = 0;
-       for(i = 0; i < bound; ++i) {
-         temp0 = ((short)(src0[i] - src1[i]) > 0) ? src0[i] : src1[i];
-         temp1 = ((short)(src2[i] - src3[i])>0) ? src2[i] : src3[i];
-         target[i] = ((short)(temp0 - temp1)>0) ? temp0 : temp1;
-       }
-}
-
-
-
-
-#endif /*LV_HAVE_GENERIC*/
-
-#endif /*INCLUDED_volk_16i_x4_quad_max_star_16i_a_H*/
diff --git a/volk/kernels/volk/volk_16i_x5_add_quad_16i_x4.h 
b/volk/kernels/volk/volk_16i_x5_add_quad_16i_x4.h
deleted file mode 100644
index 6a056ff..0000000
--- a/volk/kernels/volk/volk_16i_x5_add_quad_16i_x4.h
+++ /dev/null
@@ -1,205 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_16i_x5_add_quad_16i_x4_a_H
-#define INCLUDED_volk_16i_x5_add_quad_16i_x4_a_H
-
-
-#include<inttypes.h>
-#include<stdio.h>
-
-
-#ifdef LV_HAVE_SSE2
-#include<xmmintrin.h>
-#include<emmintrin.h>
-
-static inline  void volk_16i_x5_add_quad_16i_x4_a_sse2(short* target0, short* 
target1, short* target2, short* target3, short* src0, short* src1, short* src2, 
short* src3, short* src4, unsigned int num_points) {
-
-  const unsigned int num_bytes = num_points*2;
-
-  __m128i xmm0, xmm1, xmm2, xmm3, xmm4;
-  __m128i *p_target0, *p_target1, *p_target2, *p_target3,  *p_src0, *p_src1, 
*p_src2, *p_src3, *p_src4;
-  p_target0 = (__m128i*)target0;
-  p_target1 = (__m128i*)target1;
-  p_target2 = (__m128i*)target2;
-  p_target3 = (__m128i*)target3;
-
-  p_src0 = (__m128i*)src0;
-  p_src1 = (__m128i*)src1;
-  p_src2 = (__m128i*)src2;
-  p_src3 = (__m128i*)src3;
-  p_src4 = (__m128i*)src4;
-
-  int i = 0;
-
-  int bound = (num_bytes >> 4);
-  int leftovers = (num_bytes >> 1) & 7;
-
-  for(; i < bound; ++i) {
-    xmm0 = _mm_load_si128(p_src0);
-    xmm1 = _mm_load_si128(p_src1);
-    xmm2 = _mm_load_si128(p_src2);
-    xmm3 = _mm_load_si128(p_src3);
-    xmm4 = _mm_load_si128(p_src4);
-
-    p_src0 += 1;
-    p_src1 += 1;
-
-    xmm1 = _mm_add_epi16(xmm0, xmm1);
-    xmm2 = _mm_add_epi16(xmm0, xmm2);
-    xmm3 = _mm_add_epi16(xmm0, xmm3);
-    xmm4 = _mm_add_epi16(xmm0, xmm4);
-
-
-    p_src2 += 1;
-    p_src3 += 1;
-    p_src4 += 1;
-
-    _mm_store_si128(p_target0, xmm1);
-    _mm_store_si128(p_target1, xmm2);
-    _mm_store_si128(p_target2, xmm3);
-    _mm_store_si128(p_target3, xmm4);
-
-    p_target0 += 1;
-    p_target1 += 1;
-    p_target2 += 1;
-    p_target3 += 1;
-  }
-    /*asm volatile
-               (
-                ".%=volk_16i_x5_add_quad_16i_x4_a_sse2_L1:\n\t"
-                "cmp $0, %[bound]\n\t"
-                "je .%=volk_16i_x5_add_quad_16i_x4_a_sse2_END\n\t"
-                "movaps (%[src0]), %%xmm1\n\t"
-                "movaps (%[src1]), %%xmm2\n\t"
-                "movaps (%[src2]), %%xmm3\n\t"
-                "movaps (%[src3]), %%xmm4\n\t"
-                "movaps (%[src4]), %%xmm5\n\t"
-                "add $16, %[src0]\n\t"
-                "add $16, %[src1]\n\t"
-                "add $16, %[src2]\n\t"
-                "add $16, %[src3]\n\t"
-                "add $16, %[src4]\n\t"
-                "paddw %%xmm1, %%xmm2\n\t"
-                "paddw %%xmm1, %%xmm3\n\t"
-                "paddw %%xmm1, %%xmm4\n\t"
-                "paddw %%xmm1, %%xmm5\n\t"
-                "add $-1, %[bound]\n\t"
-                "movaps %%xmm2, (%[target0])\n\t"
-                "movaps %%xmm3, (%[target1])\n\t"
-                "movaps %%xmm4, (%[target2])\n\t"
-                "movaps %%xmm5, (%[target3])\n\t"
-                "add $16, %[target0]\n\t"
-                "add $16, %[target1]\n\t"
-                "add $16, %[target2]\n\t"
-                "add $16, %[target3]\n\t"
-                "jmp .%=volk_16i_x5_add_quad_16i_x4_a_sse2_L1\n\t"
-                ".%=volk_16i_x5_add_quad_16i_x4_a_sse2_END:\n\t"
-                :
-                :[bound]"r"(bound), [src0]"r"(src0), [src1]"r"(src1), 
[src2]"r"(src2), [src3]"r"(src3), [src4]"r"(src4), [target0]"r"(target0), 
[target1]"r"(target1), [target2]"r"(target2), [target3]"r"(target3)
-                :"xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
-                );
-
-    */
-
-
-  for(i = bound * 8; i < (bound * 8) + leftovers; ++i) {
-    target0[i] = src0[i] + src1[i];
-    target1[i] = src0[i] + src2[i];
-    target2[i] = src0[i] + src3[i];
-    target3[i] = src0[i] + src4[i];
-  }
-}
-#endif /*LV_HAVE_SSE2*/
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-static inline void volk_16i_x5_add_quad_16i_x4_neon(short* target0, short* 
target1, short* target2, short* target3, short* src0, short* src1, short* src2, 
short* src3, short* src4, unsigned int num_points) {
-
-    const unsigned int eighth_points = num_points / 8;
-    unsigned int number = 0;
-
-    int16x8_t src0_vec, src1_vec, src2_vec, src3_vec, src4_vec;
-    int16x8_t target0_vec, target1_vec, target2_vec, target3_vec;
-    for(number = 0; number < eighth_points; ++number) {
-        src0_vec = vld1q_s16(src0);
-        src1_vec = vld1q_s16(src1);
-        src2_vec = vld1q_s16(src2);
-        src3_vec = vld1q_s16(src3);
-        src4_vec = vld1q_s16(src4);
-
-        target0_vec = vaddq_s16(src0_vec , src1_vec);
-        target1_vec = vaddq_s16(src0_vec , src2_vec);
-        target2_vec = vaddq_s16(src0_vec , src3_vec);
-        target3_vec = vaddq_s16(src0_vec , src4_vec);
-
-        vst1q_s16(target0, target0_vec);
-        vst1q_s16(target1, target1_vec);
-        vst1q_s16(target2, target2_vec);
-        vst1q_s16(target3, target3_vec);
-        src0 += 8;
-        src1 += 8;
-        src2 += 8;
-        src3 += 8;
-        src4 += 8;
-        target0 += 8;
-        target1 += 8;
-        target2 += 8;
-        target3 += 8;
-    }
-
-    for(number = eighth_points * 8; number < num_points; ++number) {
-        *target0++ = *src0 + *src1++;
-        *target1++ = *src0 + *src2++;
-        *target2++ = *src0 + *src3++;
-        *target3++ = *src0++ + *src4++;
-    }
-}
-
-#endif /* LV_HAVE_NEON */
-
-
-#ifdef LV_HAVE_GENERIC
-
-static inline void volk_16i_x5_add_quad_16i_x4_generic(short* target0, short* 
target1, short* target2, short* target3, short* src0, short* src1, short* src2, 
short* src3, short* src4, unsigned int num_points) {
-
-       const unsigned int num_bytes = num_points*2;
-
-       int i = 0;
-
-       int bound = num_bytes >> 1;
-
-       for(i = 0; i < bound; ++i) {
-               target0[i] = src0[i] + src1[i];
-               target1[i] = src0[i] + src2[i];
-               target2[i] = src0[i] + src3[i];
-               target3[i] = src0[i] + src4[i];
-       }
-}
-
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-
-#endif /*INCLUDED_volk_16i_x5_add_quad_16i_x4_a_H*/
diff --git a/volk/kernels/volk/volk_16ic_deinterleave_16i_x2.h 
b/volk/kernels/volk/volk_16ic_deinterleave_16i_x2.h
deleted file mode 100644
index 26521e2..0000000
--- a/volk/kernels/volk/volk_16ic_deinterleave_16i_x2.h
+++ /dev/null
@@ -1,180 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_a_H
-#define INCLUDED_volk_16ic_deinterleave_16i_x2_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSSE3
-#include <tmmintrin.h>
-/*!
-  \brief Deinterleaves the complex 16 bit vector into I & Q vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param qBuffer The Q buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_16ic_deinterleave_16i_x2_a_ssse3(int16_t* iBuffer, 
int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const int8_t* complexVectorPtr = (int8_t*)complexVector;
-  int16_t* iBufferPtr = iBuffer;
-  int16_t* qBufferPtr = qBuffer;
-
-  __m128i iMoveMask1 = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 
0x80, 13, 12, 9, 8, 5, 4, 1, 0);
-  __m128i iMoveMask2 = _mm_set_epi8(13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 
0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
-
-  __m128i qMoveMask1 = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 
0x80, 15, 14, 11, 10, 7, 6, 3, 2);
-  __m128i qMoveMask2 = _mm_set_epi8(15, 14, 11, 10, 7, 6, 3, 2, 0x80, 0x80, 
0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
-
-  __m128i complexVal1, complexVal2, iOutputVal, qOutputVal;
-
-  unsigned int eighthPoints = num_points / 8;
-
-  for(number = 0; number < eighthPoints; number++){
-    complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);  
complexVectorPtr += 16;
-    complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);  
complexVectorPtr += 16;
-
-    iOutputVal = _mm_or_si128( _mm_shuffle_epi8(complexVal1, iMoveMask1) , 
_mm_shuffle_epi8(complexVal2, iMoveMask2));
-    qOutputVal = _mm_or_si128( _mm_shuffle_epi8(complexVal1, qMoveMask1) , 
_mm_shuffle_epi8(complexVal2, qMoveMask2));
-
-    _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
-    _mm_store_si128((__m128i*)qBufferPtr, qOutputVal);
-
-    iBufferPtr += 8;
-    qBufferPtr += 8;
-  }
-
-  number = eighthPoints * 8;
-  int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
-  for(; number < num_points; number++){
-    *iBufferPtr++ = *int16ComplexVectorPtr++;
-    *qBufferPtr++ = *int16ComplexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_SSSE3 */
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-/*!
-  \brief Deinterleaves the complex 16 bit vector into I & Q vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param qBuffer The Q buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_16ic_deinterleave_16i_x2_a_sse2(int16_t* iBuffer, 
int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const int16_t* complexVectorPtr = (int16_t*)complexVector;
-  int16_t* iBufferPtr = iBuffer;
-  int16_t* qBufferPtr = qBuffer;
-  __m128i complexVal1, complexVal2, iComplexVal1, iComplexVal2, qComplexVal1, 
qComplexVal2, iOutputVal, qOutputVal;
-  __m128i lowMask = _mm_set_epi32(0x0, 0x0, 0xFFFFFFFF, 0xFFFFFFFF);
-  __m128i highMask = _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0x0, 0x0);
-
-  unsigned int eighthPoints = num_points / 8;
-
-  for(number = 0; number < eighthPoints; number++){
-    complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);  
complexVectorPtr += 8;
-    complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);  
complexVectorPtr += 8;
-
-    iComplexVal1 = _mm_shufflelo_epi16(complexVal1, _MM_SHUFFLE(3,1,2,0));
-
-    iComplexVal1 = _mm_shufflehi_epi16(iComplexVal1, _MM_SHUFFLE(3,1,2,0));
-
-    iComplexVal1 = _mm_shuffle_epi32(iComplexVal1, _MM_SHUFFLE(3,1,2,0));
-
-    iComplexVal2 = _mm_shufflelo_epi16(complexVal2, _MM_SHUFFLE(3,1,2,0));
-
-    iComplexVal2 = _mm_shufflehi_epi16(iComplexVal2, _MM_SHUFFLE(3,1,2,0));
-
-    iComplexVal2 = _mm_shuffle_epi32(iComplexVal2, _MM_SHUFFLE(2,0,3,1));
-
-    iOutputVal = _mm_or_si128(_mm_and_si128(iComplexVal1, lowMask), 
_mm_and_si128(iComplexVal2, highMask));
-
-    _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
-
-    qComplexVal1 = _mm_shufflelo_epi16(complexVal1, _MM_SHUFFLE(2,0,3,1));
-
-    qComplexVal1 = _mm_shufflehi_epi16(qComplexVal1, _MM_SHUFFLE(2,0,3,1));
-
-    qComplexVal1 = _mm_shuffle_epi32(qComplexVal1, _MM_SHUFFLE(3,1,2,0));
-
-    qComplexVal2 = _mm_shufflelo_epi16(complexVal2, _MM_SHUFFLE(2,0,3,1));
-
-    qComplexVal2 = _mm_shufflehi_epi16(qComplexVal2, _MM_SHUFFLE(2,0,3,1));
-
-    qComplexVal2 = _mm_shuffle_epi32(qComplexVal2, _MM_SHUFFLE(2,0,3,1));
-
-    qOutputVal = _mm_or_si128(_mm_and_si128(qComplexVal1, lowMask), 
_mm_and_si128(qComplexVal2, highMask));
-
-    _mm_store_si128((__m128i*)qBufferPtr, qOutputVal);
-
-    iBufferPtr += 8;
-    qBufferPtr += 8;
-  }
-
-  number = eighthPoints * 8;
-  for(; number < num_points; number++){
-    *iBufferPtr++ = *complexVectorPtr++;
-    *qBufferPtr++ = *complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_SSE2 */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Deinterleaves the complex 16 bit vector into I & Q vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param qBuffer The Q buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_16ic_deinterleave_16i_x2_generic(int16_t* iBuffer, 
int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
-  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
-  int16_t* iBufferPtr = iBuffer;
-  int16_t* qBufferPtr = qBuffer;
-  unsigned int number;
-  for(number = 0; number < num_points; number++){
-    *iBufferPtr++ = *complexVectorPtr++;
-    *qBufferPtr++ = *complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_ORC
-/*!
-  \brief Deinterleaves the complex 16 bit vector into I & Q vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param qBuffer The Q buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-extern void volk_16ic_deinterleave_16i_x2_a_orc_impl(int16_t* iBuffer, 
int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points);
-static inline void volk_16ic_deinterleave_16i_x2_u_orc(int16_t* iBuffer, 
int16_t* qBuffer, const lv_16sc_t* complexVector, unsigned int num_points){
-    volk_16ic_deinterleave_16i_x2_a_orc_impl(iBuffer, qBuffer, complexVector, 
num_points);
-}
-#endif /* LV_HAVE_ORC */
-
-
-#endif /* INCLUDED_volk_16ic_deinterleave_16i_x2_a_H */
diff --git a/volk/kernels/volk/volk_16ic_deinterleave_real_16i.h 
b/volk/kernels/volk/volk_16ic_deinterleave_real_16i.h
deleted file mode 100644
index 5c29887..0000000
--- a/volk/kernels/volk/volk_16ic_deinterleave_real_16i.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_16ic_deinterleave_real_16i_a_H
-#define INCLUDED_volk_16ic_deinterleave_real_16i_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSSE3
-#include <tmmintrin.h>
-/*!
-  \brief Deinterleaves the complex 16 bit vector into I vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_16ic_deinterleave_real_16i_a_ssse3(int16_t* iBuffer, 
const lv_16sc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const int16_t* complexVectorPtr = (int16_t*)complexVector;
-  int16_t* iBufferPtr = iBuffer;
-
-  __m128i iMoveMask1 = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 
0x80, 13, 12, 9, 8, 5, 4, 1, 0);
-  __m128i iMoveMask2 = _mm_set_epi8(13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 
0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
-
-  __m128i complexVal1, complexVal2, iOutputVal;
-
-  unsigned int eighthPoints = num_points / 8;
-
-  for(number = 0; number < eighthPoints; number++){
-    complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);  
complexVectorPtr += 8;
-    complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);  
complexVectorPtr += 8;
-
-    complexVal1 = _mm_shuffle_epi8(complexVal1, iMoveMask1);
-    complexVal2 = _mm_shuffle_epi8(complexVal2, iMoveMask2);
-
-    iOutputVal = _mm_or_si128(complexVal1, complexVal2);
-
-    _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
-
-    iBufferPtr += 8;
-  }
-
-  number = eighthPoints * 8;
-  for(; number < num_points; number++){
-    *iBufferPtr++ = *complexVectorPtr++;
-    complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_SSSE3 */
-
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-/*!
-  \brief Deinterleaves the complex 16 bit vector into I vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_16ic_deinterleave_real_16i_a_sse2(int16_t* iBuffer, 
const lv_16sc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const int16_t* complexVectorPtr = (int16_t*)complexVector;
-  int16_t* iBufferPtr = iBuffer;
-  __m128i complexVal1, complexVal2, iOutputVal;
-  __m128i lowMask = _mm_set_epi32(0x0, 0x0, 0xFFFFFFFF, 0xFFFFFFFF);
-  __m128i highMask = _mm_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0x0, 0x0);
-
-  unsigned int eighthPoints = num_points / 8;
-
-  for(number = 0; number < eighthPoints; number++){
-    complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);  
complexVectorPtr += 8;
-    complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);  
complexVectorPtr += 8;
-
-    complexVal1 = _mm_shufflelo_epi16(complexVal1, _MM_SHUFFLE(3,1,2,0));
-
-    complexVal1 = _mm_shufflehi_epi16(complexVal1, _MM_SHUFFLE(3,1,2,0));
-
-    complexVal1 = _mm_shuffle_epi32(complexVal1, _MM_SHUFFLE(3,1,2,0));
-
-    complexVal2 = _mm_shufflelo_epi16(complexVal2, _MM_SHUFFLE(3,1,2,0));
-
-    complexVal2 = _mm_shufflehi_epi16(complexVal2, _MM_SHUFFLE(3,1,2,0));
-
-    complexVal2 = _mm_shuffle_epi32(complexVal2, _MM_SHUFFLE(2,0,3,1));
-
-    iOutputVal = _mm_or_si128(_mm_and_si128(complexVal1, lowMask), 
_mm_and_si128(complexVal2, highMask));
-
-    _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
-
-    iBufferPtr += 8;
-  }
-
-  number = eighthPoints * 8;
-  for(; number < num_points; number++){
-    *iBufferPtr++ = *complexVectorPtr++;
-    complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_SSE2 */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Deinterleaves the complex 16 bit vector into I vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_16ic_deinterleave_real_16i_generic(int16_t* iBuffer, 
const lv_16sc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const int16_t* complexVectorPtr = (int16_t*)complexVector;
-  int16_t* iBufferPtr = iBuffer;
-  for(number = 0; number < num_points; number++){
-    *iBufferPtr++ = *complexVectorPtr++;
-    complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_16ic_deinterleave_real_16i_a_H */
diff --git a/volk/kernels/volk/volk_16ic_deinterleave_real_8i.h 
b/volk/kernels/volk/volk_16ic_deinterleave_real_8i.h
deleted file mode 100644
index b2b1e2a..0000000
--- a/volk/kernels/volk/volk_16ic_deinterleave_real_8i.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_16ic_deinterleave_real_8i_a_H
-#define INCLUDED_volk_16ic_deinterleave_real_8i_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSSE3
-#include <tmmintrin.h>
-/*!
-  \brief Deinterleaves the complex 16 bit vector into 8 bit I vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_16ic_deinterleave_real_8i_a_ssse3(int8_t* iBuffer, 
const lv_16sc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const int8_t* complexVectorPtr = (int8_t*)complexVector;
-  int8_t* iBufferPtr = iBuffer;
-  __m128i iMoveMask1 = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 
0x80, 13, 12, 9, 8, 5, 4, 1, 0);
-  __m128i iMoveMask2 = _mm_set_epi8(13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 
0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
-  __m128i complexVal1, complexVal2, complexVal3, complexVal4, iOutputVal;
-
-  unsigned int sixteenthPoints = num_points / 16;
-
-  for(number = 0; number < sixteenthPoints; number++){
-    complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);  
complexVectorPtr += 16;
-    complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);  
complexVectorPtr += 16;
-
-    complexVal3 = _mm_load_si128((__m128i*)complexVectorPtr);  
complexVectorPtr += 16;
-    complexVal4 = _mm_load_si128((__m128i*)complexVectorPtr);  
complexVectorPtr += 16;
-
-    complexVal1 = _mm_shuffle_epi8(complexVal1, iMoveMask1);
-    complexVal2 = _mm_shuffle_epi8(complexVal2, iMoveMask2);
-
-    complexVal1 = _mm_or_si128(complexVal1, complexVal2);
-
-    complexVal3 = _mm_shuffle_epi8(complexVal3, iMoveMask1);
-    complexVal4 = _mm_shuffle_epi8(complexVal4, iMoveMask2);
-
-    complexVal3 = _mm_or_si128(complexVal3, complexVal4);
-
-
-    complexVal1 = _mm_srai_epi16(complexVal1, 8);
-    complexVal3 = _mm_srai_epi16(complexVal3, 8);
-
-    iOutputVal = _mm_packs_epi16(complexVal1, complexVal3);
-
-    _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);
-
-    iBufferPtr += 16;
-  }
-
-  number = sixteenthPoints * 16;
-  int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
-  for(; number < num_points; number++){
-    *iBufferPtr++ = ((int8_t)(*int16ComplexVectorPtr++ >> 8));
-    int16ComplexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_SSSE3 */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Deinterleaves the complex 16 bit vector into 8 bit I vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_16ic_deinterleave_real_8i_generic(int8_t* iBuffer, 
const lv_16sc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  int16_t* complexVectorPtr = (int16_t*)complexVector;
-  int8_t* iBufferPtr = iBuffer;
-  for(number = 0; number < num_points; number++){
-    *iBufferPtr++ = ((int8_t)(*complexVectorPtr++ >> 8));
-    complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_ORC
-/*!
-  \brief Deinterleaves the complex 16 bit vector into 8 bit I vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-extern void volk_16ic_deinterleave_real_8i_a_orc_impl(int8_t* iBuffer, const 
lv_16sc_t* complexVector, unsigned int num_points);
-static inline void volk_16ic_deinterleave_real_8i_u_orc(int8_t* iBuffer, const 
lv_16sc_t* complexVector, unsigned int num_points){
-    volk_16ic_deinterleave_real_8i_a_orc_impl(iBuffer, complexVector, 
num_points);
-}
-#endif /* LV_HAVE_ORC */
-
-
-#endif /* INCLUDED_volk_16ic_deinterleave_real_8i_a_H */
diff --git a/volk/kernels/volk/volk_16ic_magnitude_16i.h 
b/volk/kernels/volk/volk_16ic_magnitude_16i.h
deleted file mode 100644
index fb29bfd..0000000
--- a/volk/kernels/volk/volk_16ic_magnitude_16i.h
+++ /dev/null
@@ -1,213 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_16ic_magnitude_16i_a_H
-#define INCLUDED_volk_16ic_magnitude_16i_a_H
-
-#include <volk/volk_common.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <math.h>
-
-#ifdef LV_HAVE_SSE3
-#include <pmmintrin.h>
-/*!
-  \brief Calculates the magnitude of the complexVector and stores the results 
in the magnitudeVector
-  \param complexVector The vector containing the complex input values
-  \param magnitudeVector The vector containing the real output values
-  \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-*/
-static inline void volk_16ic_magnitude_16i_a_sse3(int16_t* magnitudeVector, 
const lv_16sc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;
-
-  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
-  int16_t* magnitudeVectorPtr = magnitudeVector;
-
-  __m128 vScalar = _mm_set_ps1(32768.0);
-  __m128 invScalar = _mm_set_ps1(1.0/32768.0);
-
-  __m128 cplxValue1, cplxValue2, result;
-
-  __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8];
-  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
-
-  for(;number < quarterPoints; number++){
-
-    inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
-    inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
-    inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
-    inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
-
-    inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
-    inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
-    inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
-    inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
-
-    cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
-    cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
-
-    complexVectorPtr += 8;
-
-    cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
-    cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
-
-    cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
-    cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
-
-    result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
-
-    result = _mm_sqrt_ps(result); // Square root the values
-
-    result = _mm_mul_ps(result, vScalar); // Scale the results
-
-    _mm_store_ps(outputFloatBuffer, result);
-    *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[0]);
-    *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[1]);
-    *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[2]);
-    *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[3]);
-  }
-
-  number = quarterPoints * 4;
-  magnitudeVectorPtr = &magnitudeVector[number];
-  complexVectorPtr = (const int16_t*)&complexVector[number];
-  for(; number < num_points; number++){
-    const float val1Real = (float)(*complexVectorPtr++) / 32768.0;
-    const float val1Imag = (float)(*complexVectorPtr++) / 32768.0;
-    const float val1Result = sqrtf((val1Real * val1Real) + (val1Imag * 
val1Imag)) * 32768.0;
-    *magnitudeVectorPtr++ = (int16_t)(val1Result);
-  }
-}
-#endif /* LV_HAVE_SSE3 */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Calculates the magnitude of the complexVector and stores the results 
in the magnitudeVector
-  \param complexVector The vector containing the complex input values
-  \param magnitudeVector The vector containing the real output values
-  \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-*/
-static inline void volk_16ic_magnitude_16i_a_sse(int16_t* magnitudeVector, 
const lv_16sc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;
-
-  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
-  int16_t* magnitudeVectorPtr = magnitudeVector;
-
-  __m128 vScalar = _mm_set_ps1(32768.0);
-  __m128 invScalar = _mm_set_ps1(1.0/32768.0);
-
-  __m128 cplxValue1, cplxValue2, iValue, qValue, result;
-
-  __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[4];
-  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
-
-  for(;number < quarterPoints; number++){
-
-    inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
-    inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
-    inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
-    inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
-
-    cplxValue1 = _mm_load_ps(inputFloatBuffer);
-    complexVectorPtr += 4;
-
-    inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
-    inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
-    inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
-    inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
-
-    cplxValue2 = _mm_load_ps(inputFloatBuffer);
-    complexVectorPtr += 4;
-
-    cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
-    cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
-
-    // Arrange in i1i2i3i4 format
-    iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
-    // Arrange in q1q2q3q4 format
-    qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
-
-    iValue = _mm_mul_ps(iValue, iValue); // Square the I values
-    qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
-
-    result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
-
-    result = _mm_sqrt_ps(result); // Square root the values
-
-    result = _mm_mul_ps(result, vScalar); // Scale the results
-
-    _mm_store_ps(outputFloatBuffer, result);
-    *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[0]);
-    *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[1]);
-    *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[2]);
-    *magnitudeVectorPtr++ = (int16_t)(outputFloatBuffer[3]);
-  }
-
-  number = quarterPoints * 4;
-  magnitudeVectorPtr = &magnitudeVector[number];
-  complexVectorPtr = (const int16_t*)&complexVector[number];
-  for(; number < num_points; number++){
-    const float val1Real = (float)(*complexVectorPtr++) / 32768.0;
-    const float val1Imag = (float)(*complexVectorPtr++) / 32768.0;
-    const float val1Result = sqrtf((val1Real * val1Real) + (val1Imag * 
val1Imag)) * 32768.0;
-    *magnitudeVectorPtr++ = (int16_t)(val1Result);
-  }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Calculates the magnitude of the complexVector and stores the results 
in the magnitudeVector
-  \param complexVector The vector containing the complex input values
-  \param magnitudeVector The vector containing the real output values
-  \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-*/
-static inline void volk_16ic_magnitude_16i_generic(int16_t* magnitudeVector, 
const lv_16sc_t* complexVector, unsigned int num_points){
-  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
-  int16_t* magnitudeVectorPtr = magnitudeVector;
-  unsigned int number = 0;
-  const float scalar = 32768.0;
-  for(number = 0; number < num_points; number++){
-    float real = ((float)(*complexVectorPtr++)) / scalar;
-    float imag = ((float)(*complexVectorPtr++)) / scalar;
-    *magnitudeVectorPtr++ = (int16_t)(sqrtf((real*real) + (imag*imag)) * 
scalar);
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_ORC_DISABLED
-/*!
-  \brief Calculates the magnitude of the complexVector and stores the results 
in the magnitudeVector
-  \param complexVector The vector containing the complex input values
-  \param magnitudeVector The vector containing the real output values
-  \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-*/
-extern void volk_16ic_magnitude_16i_a_orc_impl(int16_t* magnitudeVector, const 
lv_16sc_t* complexVector, float scalar, unsigned int num_points);
-static inline void volk_16ic_magnitude_16i_u_orc(int16_t* magnitudeVector, 
const lv_16sc_t* complexVector, unsigned int num_points){
-    volk_16ic_magnitude_16i_a_orc_impl(magnitudeVector, complexVector, 
32768.0, num_points);
-}
-#endif /* LV_HAVE_ORC */
-
-
-#endif /* INCLUDED_volk_16ic_magnitude_16i_a_H */
diff --git a/volk/kernels/volk/volk_16ic_s32f_deinterleave_32f_x2.h 
b/volk/kernels/volk/volk_16ic_s32f_deinterleave_32f_x2.h
deleted file mode 100644
index cc702de..0000000
--- a/volk/kernels/volk/volk_16ic_s32f_deinterleave_32f_x2.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a_H
-#define INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a_H
-
-#include <volk/volk_common.h>
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-  /*!
-    \brief Converts the complex 16 bit vector into floats,scales each data 
point, and deinterleaves into I & Q vector data
-    \param complexVector The complex input vector
-    \param iBuffer The I buffer output data
-    \param qBuffer The Q buffer output data
-    \param scalar The data value to be divided against each input data value 
of the input complex vector
-    \param num_points The number of complex data values to be deinterleaved
-  */
-static inline void volk_16ic_s32f_deinterleave_32f_x2_a_sse(float* iBuffer, 
float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned 
int num_points){
-    float* iBufferPtr = iBuffer;
-    float* qBufferPtr = qBuffer;
-
-    uint64_t number = 0;
-    const uint64_t quarterPoints = num_points / 4;
-    __m128 cplxValue1, cplxValue2, iValue, qValue;
-
-    __m128 invScalar = _mm_set_ps1(1.0/scalar);
-    int16_t* complexVectorPtr = (int16_t*)complexVector;
-
-    __VOLK_ATTR_ALIGNED(16) float floatBuffer[8];
-
-    for(;number < quarterPoints; number++){
-
-      floatBuffer[0] = (float)(complexVectorPtr[0]);
-      floatBuffer[1] = (float)(complexVectorPtr[1]);
-      floatBuffer[2] = (float)(complexVectorPtr[2]);
-      floatBuffer[3] = (float)(complexVectorPtr[3]);
-
-      floatBuffer[4] = (float)(complexVectorPtr[4]);
-      floatBuffer[5] = (float)(complexVectorPtr[5]);
-      floatBuffer[6] = (float)(complexVectorPtr[6]);
-      floatBuffer[7] = (float)(complexVectorPtr[7]);
-
-      cplxValue1 = _mm_load_ps(&floatBuffer[0]);
-      cplxValue2 = _mm_load_ps(&floatBuffer[4]);
-
-      complexVectorPtr += 8;
-
-      cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
-      cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
-
-      // Arrange in i1i2i3i4 format
-      iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
-      // Arrange in q1q2q3q4 format
-      qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
-
-      _mm_store_ps(iBufferPtr, iValue);
-      _mm_store_ps(qBufferPtr, qValue);
-
-      iBufferPtr += 4;
-      qBufferPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    complexVectorPtr = (int16_t*)&complexVector[number];
-    for(; number < num_points; number++){
-      *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
-      *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Converts the complex 16 bit vector into floats,scales each data 
point, and deinterleaves into I & Q vector data
-    \param complexVector The complex input vector
-    \param iBuffer The I buffer output data
-    \param qBuffer The Q buffer output data
-    \param scalar The data value to be divided against each input data value 
of the input complex vector
-    \param num_points The number of complex data values to be deinterleaved
-  */
-static inline void volk_16ic_s32f_deinterleave_32f_x2_generic(float* iBuffer, 
float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned 
int num_points){
-  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
-  float* iBufferPtr = iBuffer;
-  float* qBufferPtr = qBuffer;
-  unsigned int number;
-  for(number = 0; number < num_points; number++){
-    *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
-    *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_ORC
-  /*!
-    \brief Converts the complex 16 bit vector into floats,scales each data 
point, and deinterleaves into I & Q vector data
-    \param complexVector The complex input vector
-    \param iBuffer The I buffer output data
-    \param qBuffer The Q buffer output data
-    \param scalar The data value to be divided against each input data value 
of the input complex vector
-    \param num_points The number of complex data values to be deinterleaved
-  */
-extern void volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl(float* iBuffer, 
float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned 
int num_points);
-static inline void volk_16ic_s32f_deinterleave_32f_x2_u_orc(float* iBuffer, 
float* qBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned 
int num_points){
-    volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl(iBuffer, qBuffer, 
complexVector, scalar, num_points);
-}
-#endif /* LV_HAVE_ORC */
-
-
-#endif /* INCLUDED_volk_16ic_s32f_deinterleave_32f_x2_a_H */
diff --git a/volk/kernels/volk/volk_16ic_s32f_deinterleave_real_32f.h 
b/volk/kernels/volk/volk_16ic_s32f_deinterleave_real_32f.h
deleted file mode 100644
index 2dd9188..0000000
--- a/volk/kernels/volk/volk_16ic_s32f_deinterleave_real_32f.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
-#define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
-
-#include <volk/volk_common.h>
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Deinterleaves the complex 16 bit vector into I float vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param scalar The scaling value being multiplied against each data point
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_16ic_s32f_deinterleave_real_32f_a_sse4_1(float* 
iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int 
num_points){
-  float* iBufferPtr = iBuffer;
-
-  unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;
-
-  __m128 iFloatValue;
-
-  const float iScalar= 1.0 / scalar;
-  __m128 invScalar = _mm_set_ps1(iScalar);
-  __m128i complexVal, iIntVal;
-  int8_t* complexVectorPtr = (int8_t*)complexVector;
-
-  __m128i moveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 
0x80, 13, 12, 9, 8, 5, 4, 1, 0);
-
-  for(;number < quarterPoints; number++){
-    complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr 
+= 16;
-    complexVal = _mm_shuffle_epi8(complexVal, moveMask);
-
-    iIntVal = _mm_cvtepi16_epi32(complexVal);
-    iFloatValue = _mm_cvtepi32_ps(iIntVal);
-
-    iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
-
-    _mm_store_ps(iBufferPtr, iFloatValue);
-
-    iBufferPtr += 4;
-  }
-
-  number = quarterPoints * 4;
-  int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
-  for(; number < num_points; number++){
-    *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
-    sixteenTComplexVectorPtr++;
-  }
-
-}
-#endif /* LV_HAVE_SSE4_1 */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Deinterleaves the complex 16 bit vector into I float vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param scalar The scaling value being multiplied against each data point
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_16ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer, 
const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
-  float* iBufferPtr = iBuffer;
-
-  unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;
-  __m128 iValue;
-
-  const float iScalar = 1.0/scalar;
-  __m128 invScalar = _mm_set_ps1(iScalar);
-  int16_t* complexVectorPtr = (int16_t*)complexVector;
-
-  __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
-
-  for(;number < quarterPoints; number++){
-    floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2;
-    floatBuffer[1] = (float)(*complexVectorPtr); complexVectorPtr += 2;
-    floatBuffer[2] = (float)(*complexVectorPtr); complexVectorPtr += 2;
-    floatBuffer[3] = (float)(*complexVectorPtr); complexVectorPtr += 2;
-
-    iValue = _mm_load_ps(floatBuffer);
-
-    iValue = _mm_mul_ps(iValue, invScalar);
-
-    _mm_store_ps(iBufferPtr, iValue);
-
-    iBufferPtr += 4;
-  }
-
-  number = quarterPoints * 4;
-  complexVectorPtr = (int16_t*)&complexVector[number];
-  for(; number < num_points; number++){
-    *iBufferPtr++ = ((float)(*complexVectorPtr++)) * iScalar;
-    complexVectorPtr++;
-  }
-
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Deinterleaves the complex 16 bit vector into I float vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param scalar The scaling value being multiplied against each data point
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_16ic_s32f_deinterleave_real_32f_generic(float* 
iBuffer, const lv_16sc_t* complexVector, const float scalar, unsigned int 
num_points){
-  unsigned int number = 0;
-  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
-  float* iBufferPtr = iBuffer;
-  const float invScalar = 1.0 / scalar;
-  for(number = 0; number < num_points; number++){
-    *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
-    complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H */
diff --git a/volk/kernels/volk/volk_16ic_s32f_magnitude_32f.h 
b/volk/kernels/volk/volk_16ic_s32f_magnitude_32f.h
deleted file mode 100644
index ba06450..0000000
--- a/volk/kernels/volk/volk_16ic_s32f_magnitude_32f.h
+++ /dev/null
@@ -1,202 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
-#define INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
-
-#include <volk/volk_common.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <math.h>
-
-#ifdef LV_HAVE_SSE3
-#include <pmmintrin.h>
-/*!
-  \brief Calculates the magnitude of the complexVector and stores the results 
in the magnitudeVector
-  \param complexVector The vector containing the complex input values
-  \param magnitudeVector The vector containing the real output values
-  \param scalar The data value to be divided against each input data value of 
the input complex vector
-  \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-*/
-static inline void volk_16ic_s32f_magnitude_32f_a_sse3(float* magnitudeVector, 
const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
-  unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;
-
-  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
-  float* magnitudeVectorPtr = magnitudeVector;
-
-  __m128 invScalar = _mm_set_ps1(1.0/scalar);
-
-  __m128 cplxValue1, cplxValue2, result;
-
-  __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8];
-
-  for(;number < quarterPoints; number++){
-
-    inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
-    inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
-    inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
-    inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
-
-    inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
-    inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
-    inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
-    inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
-
-    cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
-    cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
-
-    complexVectorPtr += 8;
-
-    cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
-    cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
-
-    cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
-    cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
-
-    result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
-
-    result = _mm_sqrt_ps(result); // Square root the values
-
-    _mm_store_ps(magnitudeVectorPtr, result);
-
-    magnitudeVectorPtr += 4;
-  }
-
-  number = quarterPoints * 4;
-  magnitudeVectorPtr = &magnitudeVector[number];
-  complexVectorPtr = (const int16_t*)&complexVector[number];
-  for(; number < num_points; number++){
-    float val1Real = (float)(*complexVectorPtr++) / scalar;
-    float val1Imag = (float)(*complexVectorPtr++) / scalar;
-    *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * 
val1Imag));
-  }
-}
-#endif /* LV_HAVE_SSE3 */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Calculates the magnitude of the complexVector and stores the results 
in the magnitudeVector
-  \param complexVector The vector containing the complex input values
-  \param magnitudeVector The vector containing the real output values
-  \param scalar The data value to be divided against each input data value of 
the input complex vector
-  \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-*/
-static inline void volk_16ic_s32f_magnitude_32f_a_sse(float* magnitudeVector, 
const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
-  unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;
-
-  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
-  float* magnitudeVectorPtr = magnitudeVector;
-
-  const float iScalar = 1.0 / scalar;
-  __m128 invScalar = _mm_set_ps1(iScalar);
-
-  __m128 cplxValue1, cplxValue2, result, re, im;
-
-  __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8];
-
-  for(;number < quarterPoints; number++){
-    inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
-    inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
-    inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
-    inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
-
-    inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
-    inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
-    inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
-    inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
-
-    cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
-    cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
-
-    re = _mm_shuffle_ps(cplxValue1, cplxValue2, 0x88);
-    im = _mm_shuffle_ps(cplxValue1, cplxValue2, 0xdd);
-
-    complexVectorPtr += 8;
-
-    cplxValue1 = _mm_mul_ps(re, invScalar);
-    cplxValue2 = _mm_mul_ps(im, invScalar);
-
-    cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
-    cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
-
-    result = _mm_add_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
-
-    result = _mm_sqrt_ps(result); // Square root the values
-
-    _mm_store_ps(magnitudeVectorPtr, result);
-
-    magnitudeVectorPtr += 4;
-  }
-
-  number = quarterPoints * 4;
-  magnitudeVectorPtr = &magnitudeVector[number];
-  complexVectorPtr = (const int16_t*)&complexVector[number];
-  for(; number < num_points; number++){
-    float val1Real = (float)(*complexVectorPtr++) * iScalar;
-    float val1Imag = (float)(*complexVectorPtr++) * iScalar;
-    *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * 
val1Imag));
-  }
-}
-
-
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Calculates the magnitude of the complexVector and stores the results 
in the magnitudeVector
-  \param complexVector The vector containing the complex input values
-  \param magnitudeVector The vector containing the real output values
-  \param scalar The data value to be divided against each input data value of 
the input complex vector
-  \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-*/
-static inline void volk_16ic_s32f_magnitude_32f_generic(float* 
magnitudeVector, const lv_16sc_t* complexVector, const float scalar, unsigned 
int num_points){
-  const int16_t* complexVectorPtr = (const int16_t*)complexVector;
-  float* magnitudeVectorPtr = magnitudeVector;
-  unsigned int number = 0;
-  const float invScalar = 1.0 / scalar;
-  for(number = 0; number < num_points; number++){
-    float real = ( (float) (*complexVectorPtr++)) * invScalar;
-    float imag = ( (float) (*complexVectorPtr++)) * invScalar;
-    *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_ORC_DISABLED
-/*!
-  \brief Calculates the magnitude of the complexVector and stores the results 
in the magnitudeVector
-  \param complexVector The vector containing the complex input values
-  \param magnitudeVector The vector containing the real output values
-  \param scalar The data value to be divided against each input data value of 
the input complex vector
-  \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-*/
-extern void volk_16ic_s32f_magnitude_32f_a_orc_impl(float* magnitudeVector, 
const lv_16sc_t* complexVector, const float scalar, unsigned int num_points);
-static inline void volk_16ic_s32f_magnitude_32f_u_orc(float* magnitudeVector, 
const lv_16sc_t* complexVector, const float scalar, unsigned int num_points){
-    volk_16ic_s32f_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, 
scalar, num_points);
-}
-#endif /* LV_HAVE_ORC */
-
-
-#endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_a_H */
diff --git a/volk/kernels/volk/volk_16u_byteswap.h 
b/volk/kernels/volk/volk_16u_byteswap.h
deleted file mode 100644
index 3b2f9e2..0000000
--- a/volk/kernels/volk/volk_16u_byteswap.h
+++ /dev/null
@@ -1,244 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_16u_byteswap_u_H
-#define INCLUDED_volk_16u_byteswap_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-
-/*!
-  \brief Byteswaps (in-place) an unaligned vector of int16_t's.
-  \param intsToSwap The vector of data to byte swap
-  \param numDataPoints The number of data points
-*/
-static inline void volk_16u_byteswap_u_sse2(uint16_t* intsToSwap, unsigned int 
num_points){
-  unsigned int number = 0;
-  uint16_t* inputPtr = intsToSwap;
-  __m128i input, left, right, output;
-
-  const unsigned int eighthPoints = num_points / 8;
-  for(;number < eighthPoints; number++){
-    // Load the 16t values, increment inputPtr later since we're doing it 
in-place.
-    input = _mm_loadu_si128((__m128i*)inputPtr);
-    // Do the two shifts
-    left = _mm_slli_epi16(input, 8);
-    right = _mm_srli_epi16(input, 8);
-    // Or the left and right halves together
-    output = _mm_or_si128(left, right);
-    // Store the results
-    _mm_storeu_si128((__m128i*)inputPtr, output);
-    inputPtr += 8;
-  }
-
-  // Byteswap any remaining points:
-  number = eighthPoints*8;
-  for(; number < num_points; number++){
-    uint16_t outputVal = *inputPtr;
-    outputVal = (((outputVal >> 8) & 0xff) | ((outputVal << 8) & 0xff00));
-    *inputPtr = outputVal;
-    inputPtr++;
-  }
-}
-#endif /* LV_HAVE_SSE2 */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Byteswaps (in-place) an unaligned vector of int16_t's.
-  \param intsToSwap The vector of data to byte swap
-  \param numDataPoints The number of data points
-*/
-static inline void volk_16u_byteswap_generic(uint16_t* intsToSwap, unsigned 
int num_points){
-  unsigned int point;
-  uint16_t* inputPtr = intsToSwap;
-  for(point = 0; point < num_points; point++){
-    uint16_t output = *inputPtr;
-    output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
-    *inputPtr = output;
-    inputPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#endif /* INCLUDED_volk_16u_byteswap_u_H */
-#ifndef INCLUDED_volk_16u_byteswap_a_H
-#define INCLUDED_volk_16u_byteswap_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-
-/*!
-  \brief Byteswaps (in-place) an aligned vector of int16_t's.
-  \param intsToSwap The vector of data to byte swap
-  \param numDataPoints The number of data points
-*/
-static inline void volk_16u_byteswap_a_sse2(uint16_t* intsToSwap, unsigned int 
num_points){
-  unsigned int number = 0;
-  uint16_t* inputPtr = intsToSwap;
-  __m128i input, left, right, output;
-
-  const unsigned int eighthPoints = num_points / 8;
-  for(;number < eighthPoints; number++){
-    // Load the 16t values, increment inputPtr later since we're doing it 
in-place.
-    input = _mm_load_si128((__m128i*)inputPtr);
-    // Do the two shifts
-    left = _mm_slli_epi16(input, 8);
-    right = _mm_srli_epi16(input, 8);
-    // Or the left and right halves together
-    output = _mm_or_si128(left, right);
-    // Store the results
-    _mm_store_si128((__m128i*)inputPtr, output);
-    inputPtr += 8;
-  }
-
-
-  // Byteswap any remaining points:
-  number = eighthPoints*8;
-  for(; number < num_points; number++){
-    uint16_t outputVal = *inputPtr;
-    outputVal = (((outputVal >> 8) & 0xff) | ((outputVal << 8) & 0xff00));
-    *inputPtr = outputVal;
-    inputPtr++;
-  }
-}
-#endif /* LV_HAVE_SSE2 */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-/*!
-  \brief Byteswaps (in-place) an unaligned vector of int16_t's.
-  \param intsToSwap The vector of data to byte swap
-  \param numDataPoints The number of data points
-*/
-static inline void volk_16u_byteswap_neon(uint16_t* intsToSwap, unsigned int 
num_points){
-  unsigned int number;
-  unsigned int eighth_points = num_points / 8;
-  uint16x8_t input, output;
-  uint16_t* inputPtr = intsToSwap;
-
-  for(number = 0; number < eighth_points; number++) {
-    input = vld1q_u16(inputPtr);
-    output = vsriq_n_u16(output, input, 8);
-    output = vsliq_n_u16(output, input, 8);
-    vst1q_u16(inputPtr, output);
-    inputPtr += 8;
-  }
-
-  for(number = eighth_points * 8; number < num_points; number++){
-    uint16_t output = *inputPtr;
-    output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
-    *inputPtr = output;
-    inputPtr++;
-  }
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-/*!
-  \brief Byteswaps (in-place) an aligned vector of int32_t's.
-  \param intsToSwap The vector of data to byte swap
-  \param numDataPoints The number of data points
-*/
-static inline void volk_16u_byteswap_neon_table(uint16_t* intsToSwap, unsigned 
int num_points){
-  uint16_t* inputPtr = intsToSwap;
-  unsigned int number = 0;
-  unsigned int n16points = num_points / 16;
-
-  uint8x8x4_t input_table;
-  uint8x8_t int_lookup01, int_lookup23, int_lookup45, int_lookup67;
-  uint8x8_t swapped_int01, swapped_int23, swapped_int45, swapped_int67;
-
-  /* these magic numbers are used as byte-indeces in the LUT.
-     they are pre-computed to save time. A simple C program
-     can calculate them; for example for lookup01:
-    uint8_t chars[8] = {24, 16, 8, 0, 25, 17, 9, 1};
-    for(ii=0; ii < 8; ++ii) {
-        index += ((uint64_t)(*(chars+ii))) << (ii*8);
-    }
-  */
-  int_lookup01 = vcreate_u8(1232017111498883080);
-  int_lookup23 = vcreate_u8(1376697457175036426);
-  int_lookup45 = vcreate_u8(1521377802851189772);
-  int_lookup67 = vcreate_u8(1666058148527343118);
-
-  for(number = 0; number < n16points; ++number){
-    input_table = vld4_u8((uint8_t*) inputPtr);
-    swapped_int01 = vtbl4_u8(input_table, int_lookup01);
-    swapped_int23 = vtbl4_u8(input_table, int_lookup23);
-    swapped_int45 = vtbl4_u8(input_table, int_lookup45);
-    swapped_int67 = vtbl4_u8(input_table, int_lookup67);
-    vst1_u8((uint8_t*)inputPtr, swapped_int01);
-    vst1_u8((uint8_t*)(inputPtr+4), swapped_int23);
-    vst1_u8((uint8_t*)(inputPtr+8), swapped_int45);
-    vst1_u8((uint8_t*)(inputPtr+12), swapped_int67);
-
-    inputPtr += 16;
-  }
-
-  for(number = n16points * 16; number < num_points; ++number){
-    uint16_t output = *inputPtr;
-    output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
-    *inputPtr = output;
-    inputPtr++;
-  }
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Byteswaps (in-place) an aligned vector of int16_t's.
-  \param intsToSwap The vector of data to byte swap
-  \param numDataPoints The number of data points
-*/
-static inline void volk_16u_byteswap_a_generic(uint16_t* intsToSwap, unsigned 
int num_points){
-  unsigned int point;
-  uint16_t* inputPtr = intsToSwap;
-  for(point = 0; point < num_points; point++){
-    uint16_t output = *inputPtr;
-    output = (((output >> 8) & 0xff) | ((output << 8) & 0xff00));
-    *inputPtr = output;
-    inputPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_ORC
-/*!
-  \brief Byteswaps (in-place) an aligned vector of int16_t's.
-  \param intsToSwap The vector of data to byte swap
-  \param numDataPoints The number of data points
-*/
-extern void volk_16u_byteswap_a_orc_impl(uint16_t* intsToSwap, unsigned int 
num_points);
-static inline void volk_16u_byteswap_u_orc(uint16_t* intsToSwap, unsigned int 
num_points){
-    volk_16u_byteswap_a_orc_impl(intsToSwap, num_points);
-}
-#endif /* LV_HAVE_ORC */
-
-
-#endif /* INCLUDED_volk_16u_byteswap_a_H */
diff --git a/volk/kernels/volk/volk_16u_byteswappuppet_16u.h 
b/volk/kernels/volk/volk_16u_byteswappuppet_16u.h
deleted file mode 100644
index 74745d3..0000000
--- a/volk/kernels/volk/volk_16u_byteswappuppet_16u.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#ifndef INCLUDED_volk_16u_byteswappuppet_16u_H
-#define INCLUDED_volk_16u_byteswappuppet_16u_H
-
-
-#include <stdint.h>
-#include <volk/volk_16u_byteswap.h>
-#include <string.h>
-
-#ifdef LV_HAVE_GENERIC
-static inline void volk_16u_byteswappuppet_16u_generic(uint16_t*output, 
uint16_t* intsToSwap, unsigned int num_points){
-
-    volk_16u_byteswap_generic((uint16_t*)intsToSwap, num_points);
-    memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint16_t));
-
-}
-#endif
-
-#ifdef LV_HAVE_NEON
-static inline void volk_16u_byteswappuppet_16u_neon(uint16_t*output, uint16_t* 
intsToSwap, unsigned int num_points){
-
-    volk_16u_byteswap_neon((uint16_t*)intsToSwap, num_points);
-    memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint16_t));
-
-}
-#endif
-
-#ifdef LV_HAVE_NEON
-static inline void volk_16u_byteswappuppet_16u_neon_table(uint16_t*output, 
uint16_t* intsToSwap, unsigned int num_points){
-
-    volk_16u_byteswap_neon_table((uint16_t*)intsToSwap, num_points);
-    memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint16_t));
-
-}
-#endif
-
-#ifdef LV_HAVE_SSE2
-static inline void volk_16u_byteswappuppet_16u_u_sse2(uint16_t *output, 
uint16_t* intsToSwap, unsigned int num_points){
-
-    volk_16u_byteswap_u_sse2((uint16_t*)intsToSwap, num_points);
-    memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint16_t));
-
-}
-#endif
-
-#ifdef LV_HAVE_SSE2
-static inline void volk_16u_byteswappuppet_16u_a_sse2(uint16_t *output, 
uint16_t* intsToSwap, unsigned int num_points){
-
-    volk_16u_byteswap_a_sse2((uint16_t*)intsToSwap, num_points);
-    memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint16_t));
-
-}
-#endif
-
-
-#endif
diff --git a/volk/kernels/volk/volk_32f_accumulator_s32f.h 
b/volk/kernels/volk/volk_32f_accumulator_s32f.h
deleted file mode 100644
index e8995fb..0000000
--- a/volk/kernels/volk/volk_32f_accumulator_s32f.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_accumulator_s32f_a_H
-#define INCLUDED_volk_32f_accumulator_s32f_a_H
-
-#include <volk/volk_common.h>
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Accumulates the values in the input buffer
-  \param result The accumulated result
-  \param inputBuffer The buffer of data to be accumulated
-  \param num_points The number of values in inputBuffer to be accumulated
-*/
-static inline void volk_32f_accumulator_s32f_a_sse(float* result, const float* 
inputBuffer, unsigned int num_points){
-  float returnValue = 0;
-  unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;
-
-  const float* aPtr = inputBuffer;
-  __VOLK_ATTR_ALIGNED(16) float tempBuffer[4];
-
-  __m128 accumulator = _mm_setzero_ps();
-  __m128 aVal = _mm_setzero_ps();
-
-  for(;number < quarterPoints; number++){
-    aVal = _mm_load_ps(aPtr);
-    accumulator = _mm_add_ps(accumulator, aVal);
-    aPtr += 4;
-  }
-  _mm_store_ps(tempBuffer,accumulator); // Store the results back into the C 
container
-  returnValue = tempBuffer[0];
-  returnValue += tempBuffer[1];
-  returnValue += tempBuffer[2];
-  returnValue += tempBuffer[3];
-
-  number = quarterPoints * 4;
-  for(;number < num_points; number++){
-    returnValue += (*aPtr++);
-  }
-  *result = returnValue;
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Accumulates the values in the input buffer
-  \param result The accumulated result
-  \param inputBuffer The buffer of data to be accumulated
-  \param num_points The number of values in inputBuffer to be accumulated
-*/
-static inline void volk_32f_accumulator_s32f_generic(float* result, const 
float* inputBuffer, unsigned int num_points){
-  const float* aPtr = inputBuffer;
-  unsigned int number = 0;
-  float returnValue = 0;
-
-  for(;number < num_points; number++){
-    returnValue += (*aPtr++);
-  }
-  *result = returnValue;
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32f_accumulator_s32f_a_H */
diff --git a/volk/kernels/volk/volk_32f_acos_32f.h 
b/volk/kernels/volk/volk_32f_acos_32f.h
deleted file mode 100644
index 1b2b734..0000000
--- a/volk/kernels/volk/volk_32f_acos_32f.h
+++ /dev/null
@@ -1,194 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#include <stdio.h>
-#include <math.h>
-#include <inttypes.h>
-
-/* This is the number of terms of Taylor series to evaluate, increase this for 
more accuracy*/
-#define ACOS_TERMS 2
-
-#ifndef INCLUDED_volk_32f_acos_32f_a_H
-#define INCLUDED_volk_32f_acos_32f_a_H
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Computes arccosine of input vector and stores results in output vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which arccosine is to be computed
-*/
-static inline void volk_32f_acos_32f_a_sse4_1(float* bVector, const float* 
aVector, unsigned int num_points){
-
-  float* bPtr = bVector;
-  const float* aPtr = aVector;
-
-  unsigned int number = 0;
-  unsigned int quarterPoints = num_points / 4;
-  int i, j;
-
-  __m128 aVal, d, pi, pio2, x, y, z, arccosine;
-  __m128 fzeroes, fones, ftwos, ffours, condition;
-
-  pi = _mm_set1_ps(3.14159265358979323846);
-  pio2 = _mm_set1_ps(3.14159265358979323846/2);
-  fzeroes = _mm_setzero_ps();
-  fones = _mm_set1_ps(1.0);
-  ftwos = _mm_set1_ps(2.0);
-  ffours = _mm_set1_ps(4.0);
-
-  for(;number < quarterPoints; number++){
-    aVal = _mm_load_ps(aPtr);
-    d = aVal;
-    aVal = _mm_div_ps(_mm_sqrt_ps(_mm_mul_ps(_mm_add_ps(fones, aVal), 
_mm_sub_ps(fones, aVal))), aVal);
-    z = aVal;
-    condition = _mm_cmplt_ps(z, fzeroes);
-    z = _mm_sub_ps(z, _mm_and_ps(_mm_mul_ps(z, ftwos), condition));
-    x = z;
-    condition = _mm_cmplt_ps(z, fones);
-    x = _mm_add_ps(x, _mm_and_ps(_mm_sub_ps(_mm_div_ps(fones, z), z), 
condition));
-
-    for(i = 0; i < 2; i++)
-      x = _mm_add_ps(x, _mm_sqrt_ps(_mm_add_ps(fones, _mm_mul_ps(x, x))));
-    x = _mm_div_ps(fones, x);
-    y = fzeroes;
-    for(j = ACOS_TERMS - 1; j >=0 ; j--)
-      y = _mm_add_ps(_mm_mul_ps(y, _mm_mul_ps(x, x)), 
_mm_set1_ps(pow(-1,j)/(2*j+1)));
-
-    y = _mm_mul_ps(y, _mm_mul_ps(x, ffours));
-    condition = _mm_cmpgt_ps(z, fones);
-
-    y = _mm_add_ps(y, _mm_and_ps(_mm_sub_ps(pio2, _mm_mul_ps(y, ftwos)), 
condition));
-    arccosine = y;
-    condition = _mm_cmplt_ps(aVal, fzeroes);
-    arccosine = _mm_sub_ps(arccosine, _mm_and_ps(_mm_mul_ps(arccosine, ftwos), 
condition));
-    condition = _mm_cmplt_ps(d, fzeroes);
-    arccosine = _mm_add_ps(arccosine, _mm_and_ps(pi, condition));
-
-    _mm_store_ps(bPtr, arccosine);
-    aPtr += 4;
-    bPtr += 4;
-  }
-
-  number = quarterPoints * 4;
-  for(;number < num_points; number++){
-    *bPtr++ = acos(*aPtr++);
-  }
-}
-
-#endif /* LV_HAVE_SSE4_1 for aligned */
-
-#endif /* INCLUDED_volk_32f_acos_32f_a_H */
-
-
-#ifndef INCLUDED_volk_32f_acos_32f_u_H
-#define INCLUDED_volk_32f_acos_32f_u_H
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Computes arccosine of input vector and stores results in output vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which arccosine is to be computed
-*/
-static inline void volk_32f_acos_32f_u_sse4_1(float* bVector, const float* 
aVector, unsigned int num_points){
-  float* bPtr = bVector;
-  const float* aPtr = aVector;
-
-  unsigned int number = 0;
-  unsigned int quarterPoints = num_points / 4;
-  int i, j;
-
-  __m128 aVal, d, pi, pio2, x, y, z, arccosine;
-  __m128 fzeroes, fones, ftwos, ffours, condition;
-
-  pi = _mm_set1_ps(3.14159265358979323846);
-  pio2 = _mm_set1_ps(3.14159265358979323846/2);
-  fzeroes = _mm_setzero_ps();
-  fones = _mm_set1_ps(1.0);
-  ftwos = _mm_set1_ps(2.0);
-  ffours = _mm_set1_ps(4.0);
-
-  for(;number < quarterPoints; number++){
-    aVal = _mm_loadu_ps(aPtr);
-    d = aVal;
-    aVal = _mm_div_ps(_mm_sqrt_ps(_mm_mul_ps(_mm_add_ps(fones, aVal), 
_mm_sub_ps(fones, aVal))), aVal);
-    z = aVal;
-    condition = _mm_cmplt_ps(z, fzeroes);
-    z = _mm_sub_ps(z, _mm_and_ps(_mm_mul_ps(z, ftwos), condition));
-    x = z;
-    condition = _mm_cmplt_ps(z, fones);
-    x = _mm_add_ps(x, _mm_and_ps(_mm_sub_ps(_mm_div_ps(fones, z), z), 
condition));
-
-    for(i = 0; i < 2; i++)
-      x = _mm_add_ps(x, _mm_sqrt_ps(_mm_add_ps(fones, _mm_mul_ps(x, x))));
-    x = _mm_div_ps(fones, x);
-    y = fzeroes;
-
-    for(j = ACOS_TERMS - 1; j >=0 ; j--)
-      y = _mm_add_ps(_mm_mul_ps(y, _mm_mul_ps(x, x)), 
_mm_set1_ps(pow(-1,j)/(2*j+1)));
-
-    y = _mm_mul_ps(y, _mm_mul_ps(x, ffours));
-    condition = _mm_cmpgt_ps(z, fones);
-
-    y = _mm_add_ps(y, _mm_and_ps(_mm_sub_ps(pio2, _mm_mul_ps(y, ftwos)), 
condition));
-    arccosine = y;
-    condition = _mm_cmplt_ps(aVal, fzeroes);
-    arccosine = _mm_sub_ps(arccosine, _mm_and_ps(_mm_mul_ps(arccosine, ftwos), 
condition));
-    condition = _mm_cmplt_ps(d, fzeroes);
-    arccosine = _mm_add_ps(arccosine, _mm_and_ps(pi, condition));
-
-    _mm_storeu_ps(bPtr, arccosine);
-    aPtr += 4;
-    bPtr += 4;
-  }
-
-  number = quarterPoints * 4;
-  for(;number < num_points; number++){
-    *bPtr++ = acos(*aPtr++);
-  }
-}
-
-#endif /* LV_HAVE_SSE4_1 for aligned */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Computes arccosine of input vector and stores results in output vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which arccosine is to be computed
-*/
-static inline void volk_32f_acos_32f_generic(float* bVector, const float* 
aVector, unsigned int num_points){
-  float* bPtr = bVector;
-  const float* aPtr = aVector;
-  unsigned int number = 0;
-
-  for(number = 0; number < num_points; number++){
-    *bPtr++ = acos(*aPtr++);
-  }
-
-}
-#endif /* LV_HAVE_GENERIC */
-
-#endif /* INCLUDED_volk_32f_acos_32f_u_H */
diff --git a/volk/kernels/volk/volk_32f_asin_32f.h 
b/volk/kernels/volk/volk_32f_asin_32f.h
deleted file mode 100644
index 7b80de7..0000000
--- a/volk/kernels/volk/volk_32f_asin_32f.h
+++ /dev/null
@@ -1,189 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#include <stdio.h>
-#include <math.h>
-#include <inttypes.h>
-
-/* This is the number of terms of Taylor series to evaluate, increase this for 
more accuracy*/
-#define ASIN_TERMS 2
-
-#ifndef INCLUDED_volk_32f_asin_32f_a_H
-#define INCLUDED_volk_32f_asin_32f_a_H
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Computes arcsine of input vector and stores results in output vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which arcsine is to be computed
-*/
-static inline void volk_32f_asin_32f_a_sse4_1(float* bVector, const float* 
aVector, unsigned int num_points){
-
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-
-    unsigned int number = 0;
-    unsigned int quarterPoints = num_points / 4;
-    int i, j;
-
-    __m128 aVal, pio2, x, y, z, arcsine;
-    __m128 fzeroes, fones, ftwos, ffours, condition;
-
-    pio2 = _mm_set1_ps(3.14159265358979323846/2);
-    fzeroes = _mm_setzero_ps();
-    fones = _mm_set1_ps(1.0);
-    ftwos = _mm_set1_ps(2.0);
-    ffours = _mm_set1_ps(4.0);
-
-    for(;number < quarterPoints; number++){
-        aVal = _mm_load_ps(aPtr);
-        aVal = _mm_div_ps(aVal, _mm_sqrt_ps(_mm_mul_ps(_mm_add_ps(fones, 
aVal), _mm_sub_ps(fones, aVal))));
-        z = aVal;
-        condition = _mm_cmplt_ps(z, fzeroes);
-        z = _mm_sub_ps(z, _mm_and_ps(_mm_mul_ps(z, ftwos), condition));
-        x = z;
-        condition = _mm_cmplt_ps(z, fones);
-        x = _mm_add_ps(x, _mm_and_ps(_mm_sub_ps(_mm_div_ps(fones, z), z), 
condition));
-
-        for(i = 0; i < 2; i++){
-            x = _mm_add_ps(x, _mm_sqrt_ps(_mm_add_ps(fones, _mm_mul_ps(x, 
x))));
-        }
-        x = _mm_div_ps(fones, x);
-        y = fzeroes;
-        for(j = ASIN_TERMS - 1; j >=0 ; j--){
-            y = _mm_add_ps(_mm_mul_ps(y, _mm_mul_ps(x, x)), 
_mm_set1_ps(pow(-1,j)/(2*j+1)));
-        }
-
-        y = _mm_mul_ps(y, _mm_mul_ps(x, ffours));
-        condition = _mm_cmpgt_ps(z, fones);
-
-        y = _mm_add_ps(y, _mm_and_ps(_mm_sub_ps(pio2, _mm_mul_ps(y, ftwos)), 
condition));
-        arcsine = y;
-        condition = _mm_cmplt_ps(aVal, fzeroes);
-        arcsine = _mm_sub_ps(arcsine, _mm_and_ps(_mm_mul_ps(arcsine, ftwos), 
condition));
-
-        _mm_store_ps(bPtr, arcsine);
-        aPtr += 4;
-        bPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-        *bPtr++ = asin(*aPtr++);
-    }
-}
-
-#endif /* LV_HAVE_SSE4_1 for aligned */
-
-#endif /* INCLUDED_volk_32f_asin_32f_a_H */
-
-#ifndef INCLUDED_volk_32f_asin_32f_u_H
-#define INCLUDED_volk_32f_asin_32f_u_H
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Computes arcsine of input vector and stores results in output vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which arcsine is to be computed
-*/
-static inline void volk_32f_asin_32f_u_sse4_1(float* bVector, const float* 
aVector, unsigned int num_points){
-
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-
-    unsigned int number = 0;
-        unsigned int quarterPoints = num_points / 4;
-    int i, j;
-
-    __m128 aVal, pio2, x, y, z, arcsine;
-    __m128 fzeroes, fones, ftwos, ffours, condition;
-
-    pio2 = _mm_set1_ps(3.14159265358979323846/2);
-    fzeroes = _mm_setzero_ps();
-    fones = _mm_set1_ps(1.0);
-    ftwos = _mm_set1_ps(2.0);
-    ffours = _mm_set1_ps(4.0);
-
-    for(;number < quarterPoints; number++){
-        aVal = _mm_loadu_ps(aPtr);
-        aVal = _mm_div_ps(aVal, _mm_sqrt_ps(_mm_mul_ps(_mm_add_ps(fones, 
aVal), _mm_sub_ps(fones, aVal))));
-        z = aVal;
-        condition = _mm_cmplt_ps(z, fzeroes);
-        z = _mm_sub_ps(z, _mm_and_ps(_mm_mul_ps(z, ftwos), condition));
-        x = z;
-        condition = _mm_cmplt_ps(z, fones);
-        x = _mm_add_ps(x, _mm_and_ps(_mm_sub_ps(_mm_div_ps(fones, z), z), 
condition));
-
-        for(i = 0; i < 2; i++){
-            x = _mm_add_ps(x, _mm_sqrt_ps(_mm_add_ps(fones, _mm_mul_ps(x, 
x))));
-        }
-        x = _mm_div_ps(fones, x);
-        y = fzeroes;
-        for(j = ASIN_TERMS - 1; j >=0 ; j--){
-            y = _mm_add_ps(_mm_mul_ps(y, _mm_mul_ps(x, x)), 
_mm_set1_ps(pow(-1,j)/(2*j+1)));
-        }
-
-        y = _mm_mul_ps(y, _mm_mul_ps(x, ffours));
-        condition = _mm_cmpgt_ps(z, fones);
-
-        y = _mm_add_ps(y, _mm_and_ps(_mm_sub_ps(pio2, _mm_mul_ps(y, ftwos)), 
condition));
-        arcsine = y;
-        condition = _mm_cmplt_ps(aVal, fzeroes);
-        arcsine = _mm_sub_ps(arcsine, _mm_and_ps(_mm_mul_ps(arcsine, ftwos), 
condition));
-
-        _mm_storeu_ps(bPtr, arcsine);
-        aPtr += 4;
-        bPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-       *bPtr++ = asin(*aPtr++);
-    }
-}
-
-#endif /* LV_HAVE_SSE4_1 for unaligned */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Computes arcsine of input vector and stores results in output vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which arcsine is to be computed
-*/
-static inline void volk_32f_asin_32f_u_generic(float* bVector, const float* 
aVector, unsigned int num_points){
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-        *bPtr++ = asin(*aPtr++);
-    }
-
-}
-#endif /* LV_HAVE_GENERIC */
-
-#endif /* INCLUDED_volk_32f_asin_32f_u_H */
diff --git a/volk/kernels/volk/volk_32f_atan_32f.h 
b/volk/kernels/volk/volk_32f_atan_32f.h
deleted file mode 100644
index 60742bb..0000000
--- a/volk/kernels/volk/volk_32f_atan_32f.h
+++ /dev/null
@@ -1,183 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#include <stdio.h>
-#include <math.h>
-#include <inttypes.h>
-
-/* This is the number of terms of Taylor series to evaluate, increase this for 
more accuracy*/
-#define TERMS 2
-
-#ifndef INCLUDED_volk_32f_atan_32f_a_H
-#define INCLUDED_volk_32f_atan_32f_a_H
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Computes arctangent of input vector and stores results in output 
vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which arctangent is to be computed
-*/
-static inline void volk_32f_atan_32f_a_sse4_1(float* bVector, const float* 
aVector, unsigned int num_points){
-
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-
-    unsigned int number = 0;
-    unsigned int quarterPoints = num_points / 4;
-    int i, j;
-
-    __m128 aVal, pio2, x, y, z, arctangent;
-    __m128 fzeroes, fones, ftwos, ffours, condition;
-
-    pio2 = _mm_set1_ps(3.14159265358979323846/2);
-    fzeroes = _mm_setzero_ps();
-    fones = _mm_set1_ps(1.0);
-    ftwos = _mm_set1_ps(2.0);
-    ffours = _mm_set1_ps(4.0);
-
-    for(;number < quarterPoints; number++){
-        aVal = _mm_load_ps(aPtr);
-        z = aVal;
-        condition = _mm_cmplt_ps(z, fzeroes);
-        z = _mm_sub_ps(z, _mm_and_ps(_mm_mul_ps(z, ftwos), condition));
-        x = z;
-        condition = _mm_cmplt_ps(z, fones);
-        x = _mm_add_ps(x, _mm_and_ps(_mm_sub_ps(_mm_div_ps(fones, z), z), 
condition));
-
-        for(i = 0; i < 2; i++){
-            x = _mm_add_ps(x, _mm_sqrt_ps(_mm_add_ps(fones, _mm_mul_ps(x, 
x))));
-        }
-        x = _mm_div_ps(fones, x);
-        y = fzeroes;
-        for(j = TERMS - 1; j >=0 ; j--){
-            y = _mm_add_ps(_mm_mul_ps(y, _mm_mul_ps(x, x)), 
_mm_set1_ps(pow(-1,j)/(2*j+1)));
-        }
-
-        y = _mm_mul_ps(y, _mm_mul_ps(x, ffours));
-        condition = _mm_cmpgt_ps(z, fones);
-
-        y = _mm_add_ps(y, _mm_and_ps(_mm_sub_ps(pio2, _mm_mul_ps(y, ftwos)), 
condition));
-        arctangent = y;
-        condition = _mm_cmplt_ps(aVal, fzeroes);
-        arctangent = _mm_sub_ps(arctangent, _mm_and_ps(_mm_mul_ps(arctangent, 
ftwos), condition));
-
-        _mm_store_ps(bPtr, arctangent);
-        aPtr += 4;
-        bPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-        *bPtr++ = atan(*aPtr++);
-    }
-}
-
-#endif /* LV_HAVE_SSE4_1 for aligned */
-
-#endif /* INCLUDED_volk_32f_atan_32f_a_H */
-
-#ifndef INCLUDED_volk_32f_atan_32f_u_H
-#define INCLUDED_volk_32f_atan_32f_u_H
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Computes arctangent of input vector and stores results in output 
vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which arctangent is to be computed
-*/
-static inline void volk_32f_atan_32f_u_sse4_1(float* bVector, const float* 
aVector, unsigned int num_points){
-
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-
-    unsigned int number = 0;
-    unsigned int quarterPoints = num_points / 4;
-    int i, j;
-
-    __m128 aVal, pio2, x, y, z, arctangent;
-    __m128 fzeroes, fones, ftwos, ffours, condition;
-
-    pio2 = _mm_set1_ps(3.14159265358979323846/2);
-    fzeroes = _mm_setzero_ps();
-    fones = _mm_set1_ps(1.0);
-    ftwos = _mm_set1_ps(2.0);
-    ffours = _mm_set1_ps(4.0);
-
-    for(;number < quarterPoints; number++){
-        aVal = _mm_loadu_ps(aPtr);
-        z = aVal;
-        condition = _mm_cmplt_ps(z, fzeroes);
-        z = _mm_sub_ps(z, _mm_and_ps(_mm_mul_ps(z, ftwos), condition));
-        x = z;
-        condition = _mm_cmplt_ps(z, fones);
-        x = _mm_add_ps(x, _mm_and_ps(_mm_sub_ps(_mm_div_ps(fones, z), z), 
condition));
-
-        for(i = 0; i < 2; i++)  x = _mm_add_ps(x, 
_mm_sqrt_ps(_mm_add_ps(fones, _mm_mul_ps(x, x))));
-        x = _mm_div_ps(fones, x);
-        y = fzeroes;
-        for(j = TERMS - 1; j >= 0; j--) y = _mm_add_ps(_mm_mul_ps(y, 
_mm_mul_ps(x, x)), _mm_set1_ps(pow(-1,j)/(2*j+1)));
-
-        y = _mm_mul_ps(y, _mm_mul_ps(x, ffours));
-        condition = _mm_cmpgt_ps(z, fones);
-
-        y = _mm_add_ps(y, _mm_and_ps(_mm_sub_ps(pio2, _mm_mul_ps(y, ftwos)), 
condition));
-        arctangent = y;
-        condition = _mm_cmplt_ps(aVal, fzeroes);
-        arctangent = _mm_sub_ps(arctangent, _mm_and_ps(_mm_mul_ps(arctangent, 
ftwos), condition));
-
-        _mm_storeu_ps(bPtr, arctangent);
-        aPtr += 4;
-        bPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-        *bPtr++ = atan(*aPtr++);
-    }
-}
-
-#endif /* LV_HAVE_SSE4_1 for unaligned */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Computes arctangent of input vector and stores results in output 
vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which arctangent is to be computed
-*/
-static inline void volk_32f_atan_32f_generic(float* bVector, const float* 
aVector, unsigned int num_points){
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-        *bPtr++ = atan(*aPtr++);
-    }
-
-}
-#endif /* LV_HAVE_GENERIC */
-
-#endif /* INCLUDED_volk_32f_atan_32f_u_H */
diff --git a/volk/kernels/volk/volk_32f_binary_slicer_32i.h 
b/volk/kernels/volk/volk_32f_binary_slicer_32i.h
deleted file mode 100644
index 41f5914..0000000
--- a/volk/kernels/volk/volk_32f_binary_slicer_32i.h
+++ /dev/null
@@ -1,259 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_binary_slicer_32i_H
-#define INCLUDED_volk_32f_binary_slicer_32i_H
-
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Returns integer 1 if float input is greater than or equal to 0, 1 
otherwise
-  \param cVector The int output (either 0 or 1)
-  \param aVector The float input
-  \param num_points The number of values in aVector and stored into cVector
-*/
-static inline void volk_32f_binary_slicer_32i_generic(int* cVector, const 
float* aVector, unsigned int num_points){
-    int* cPtr = cVector;
-    const float* aPtr = aVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-        if( *aPtr++ >= 0) {
-            *cPtr++ = 1;
-        }
-        else {
-            *cPtr++ = 0;
-        }
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Returns integer 1 if float input is greater than or equal to 0, 1 
otherwise
-  \param cVector The int output (either 0 or 1)
-  \param aVector The float input
-  \param num_points The number of values in aVector and stored into cVector
-*/
-static inline void volk_32f_binary_slicer_32i_generic_branchless(int* cVector, 
const float* aVector, unsigned int num_points){
-    int* cPtr = cVector;
-    const float* aPtr = aVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-        *cPtr++ = (*aPtr++ >= 0);
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-/*!
-  \brief Returns integer 1 if float input is greater than or equal to 0, 1 
otherwise
-  \param cVector The int output (either 0 or 1)
-  \param aVector The float input
-  \param num_points The number of values in aVector and stored into cVector
-*/
-static inline void volk_32f_binary_slicer_32i_a_sse2(int* cVector, const 
float* aVector, unsigned int num_points){
-    int* cPtr = cVector;
-    const float* aPtr = aVector;
-    unsigned int number = 0;
-
-    unsigned int quarter_points = num_points / 4;
-    __m128 a_val, res_f;
-    __m128i res_i, binary_i;
-    __m128 zero_val;
-    zero_val = _mm_set1_ps (0.0f);
-
-    for(number = 0; number < quarter_points; number++){
-        a_val = _mm_load_ps(aPtr);
-
-        res_f = _mm_cmpge_ps (a_val, zero_val);
-        res_i = _mm_cvtps_epi32 (res_f);
-        binary_i = _mm_srli_epi32 (res_i, 31);
-
-
-        _mm_store_si128((__m128i*)cPtr, binary_i);
-
-
-        cPtr += 4;
-        aPtr += 4;
-    }
-
-    for(number = quarter_points * 4; number < num_points; number++){
-        if( *aPtr++ >= 0) {
-            *cPtr++ = 1;
-        }
-        else {
-            *cPtr++ = 0;
-        }
-    }
-}
-#endif /* LV_HAVE_SSE2 */
-
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-/*!
-  \brief Returns integer 1 if float input is greater than or equal to 0, 1 
otherwise
-  \param cVector The int output (either 0 or 1)
-  \param aVector The float input
-  \param num_points The number of values in aVector and stored into cVector
-*/
-static inline void volk_32f_binary_slicer_32i_a_avx(int* cVector, const float* 
aVector, unsigned int num_points){
-    int* cPtr = cVector;
-    const float* aPtr = aVector;
-    unsigned int number = 0;
-
-    unsigned int quarter_points = num_points / 8;
-    __m256 a_val, res_f, binary_f;
-    __m256i binary_i;
-    __m256 zero_val, one_val;
-    zero_val = _mm256_set1_ps (0.0f);
-    one_val = _mm256_set1_ps (1.0f);
-
-    for(number = 0; number < quarter_points; number++){
-        a_val = _mm256_load_ps(aPtr);
-
-        res_f = _mm256_cmp_ps (a_val, zero_val, 13);
-        binary_f = _mm256_and_ps (res_f, one_val);
-        binary_i = _mm256_cvtps_epi32(binary_f);
-
-
-
-        _mm256_store_si256((__m256i *)cPtr, binary_i);
-
-
-        cPtr += 8;
-        aPtr += 8;
-    }
-
-    for(number = quarter_points * 8; number < num_points; number++){
-        if( *aPtr++ >= 0) {
-            *cPtr++ = 1;
-        }
-        else {
-            *cPtr++ = 0;
-        }
-    }
-}
-#endif /* LV_HAVE_AVX */
-
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-/*!
-  \brief Returns integer 1 if float input is greater than or equal to 0, 1 
otherwise
-  \param cVector The int output (either 0 or 1)
-  \param aVector The float input
-  \param num_points The number of values in aVector and stored into cVector
-*/
-static inline void volk_32f_binary_slicer_32i_u_sse2(int* cVector, const 
float* aVector, unsigned int num_points){
-    int* cPtr = cVector;
-    const float* aPtr = aVector;
-    unsigned int number = 0;
-
-    unsigned int quarter_points = num_points / 4;
-    __m128 a_val, res_f;
-    __m128i res_i, binary_i;
-    __m128 zero_val;
-    zero_val = _mm_set1_ps (0.0f);
-
-    for(number = 0; number < quarter_points; number++){
-        a_val = _mm_loadu_ps(aPtr);
-
-        res_f = _mm_cmpge_ps (a_val, zero_val);
-        res_i = _mm_cvtps_epi32 (res_f);
-        binary_i = _mm_srli_epi32 (res_i, 31);
-
-
-        _mm_storeu_si128((__m128i*)cPtr, binary_i);
-
-
-        cPtr += 4;
-        aPtr += 4;
-    }
-
-    for(number = quarter_points * 4; number < num_points; number++){
-        if( *aPtr++ >= 0) {
-            *cPtr++ = 1;
-        }
-        else {
-            *cPtr++ = 0;
-        }
-    }
-}
-#endif /* LV_HAVE_SSE2 */
-
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-/*!
-  \brief Returns integer 1 if float input is greater than or equal to 0, 1 
otherwise
-  \param cVector The int output (either 0 or 1)
-  \param aVector The float input
-  \param num_points The number of values in aVector and stored into cVector
-*/
-static inline void volk_32f_binary_slicer_32i_u_avx(int* cVector, const float* 
aVector, unsigned int num_points){
-    int* cPtr = cVector;
-    const float* aPtr = aVector;
-    unsigned int number = 0;
-
-    unsigned int quarter_points = num_points / 8;
-    __m256 a_val, res_f, binary_f;
-    __m256i binary_i;
-    __m256 zero_val, one_val;
-    zero_val = _mm256_set1_ps (0.0f);
-    one_val = _mm256_set1_ps (1.0f);
-
-    for(number = 0; number < quarter_points; number++){
-        a_val = _mm256_loadu_ps(aPtr);
-
-        res_f = _mm256_cmp_ps (a_val, zero_val, 13);
-        binary_f = _mm256_and_ps (res_f, one_val);
-        binary_i = _mm256_cvtps_epi32(binary_f);
-
-
-
-        _mm256_storeu_si256((__m256i*)cPtr, binary_i);
-
-
-        cPtr += 8;
-        aPtr += 8;
-    }
-
-    for(number = quarter_points * 8; number < num_points; number++){
-        if( *aPtr++ >= 0) {
-            *cPtr++ = 1;
-        }
-        else {
-            *cPtr++ = 0;
-        }
-    }
-}
-#endif /* LV_HAVE_AVX */
-
-
-
-#endif /* INCLUDED_volk_32f_binary_slicer_32i_H */
diff --git a/volk/kernels/volk/volk_32f_binary_slicer_8i.h 
b/volk/kernels/volk/volk_32f_binary_slicer_8i.h
deleted file mode 100644
index ae4420b..0000000
--- a/volk/kernels/volk/volk_32f_binary_slicer_8i.h
+++ /dev/null
@@ -1,289 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_binary_slicer_8i_H
-#define INCLUDED_volk_32f_binary_slicer_8i_H
-
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Returns integer 1 if float input is greater than or equal to 0, 1 
otherwise
-  \param cVector The char (int8_t) output (either 0 or 1)
-  \param aVector The float input
-  \param num_points The number of values in aVector and stored into cVector
-*/
-static inline void
-volk_32f_binary_slicer_8i_generic(int8_t* cVector, const float* aVector,
-                                  unsigned int num_points)
-{
-  int8_t* cPtr = cVector;
-  const float* aPtr = aVector;
-  unsigned int number = 0;
-
-  for(number = 0; number < num_points; number++) {
-    if(*aPtr++ >= 0) {
-      *cPtr++ = 1;
-    }
-    else {
-      *cPtr++ = 0;
-    }
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Returns integer 1 if float input is greater than or equal to 0, 1 
otherwise
-  \param cVector The char (int8_t) output (either 0 or 1)
-  \param aVector The float input
-  \param num_points The number of values in aVector and stored into cVector
-*/
-static inline void
-volk_32f_binary_slicer_8i_generic_branchless(int8_t* cVector, const float* 
aVector,
-                                             unsigned int num_points)
-{
-  int8_t* cPtr = cVector;
-  const float* aPtr = aVector;
-  unsigned int number = 0;
-
-  for(number = 0; number < num_points; number++){
-    *cPtr++ = (*aPtr++ >= 0);
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-/*!
-  \brief Returns integer 1 if float input is greater than or equal to 0, 1 
otherwise
-  \param cVector The char (int8_t) output (either 0 or 1)
-  \param aVector The float input
-  \param num_points The number of values in aVector and stored into cVector
-*/
-static inline void
-volk_32f_binary_slicer_8i_a_sse2(int8_t* cVector, const float* aVector,
-                                 unsigned int num_points)
-{
-  int8_t* cPtr = cVector;
-  const float* aPtr = aVector;
-  unsigned int number = 0;
-
-  unsigned int n16points = num_points / 16;
-  __m128 a0_val, a1_val, a2_val, a3_val;
-  __m128 res0_f, res1_f, res2_f, res3_f;
-  __m128i res0_i, res1_i, res2_i, res3_i;
-  __m128 zero_val;
-  zero_val = _mm_set1_ps(0.0f);
-
-  for(number = 0; number < n16points; number++) {
-    a0_val = _mm_load_ps(aPtr);
-    a1_val = _mm_load_ps(aPtr+4);
-    a2_val = _mm_load_ps(aPtr+8);
-    a3_val = _mm_load_ps(aPtr+12);
-
-    // compare >= 0; return float
-    res0_f = _mm_cmpge_ps(a0_val, zero_val);
-    res1_f = _mm_cmpge_ps(a1_val, zero_val);
-    res2_f = _mm_cmpge_ps(a2_val, zero_val);
-    res3_f = _mm_cmpge_ps(a3_val, zero_val);
-
-    // convert to 32i and >> 31
-    res0_i = _mm_srli_epi32(_mm_cvtps_epi32(res0_f), 31);
-    res1_i = _mm_srli_epi32(_mm_cvtps_epi32(res1_f), 31);
-    res2_i = _mm_srli_epi32(_mm_cvtps_epi32(res2_f), 31);
-    res3_i = _mm_srli_epi32(_mm_cvtps_epi32(res3_f), 31);
-
-    // pack into 16-bit results
-    res0_i = _mm_packs_epi32(res0_i, res1_i);
-    res2_i = _mm_packs_epi32(res2_i, res3_i);
-
-    // pack into 8-bit results
-    res0_i = _mm_packs_epi16(res0_i, res2_i);
-
-    _mm_store_si128((__m128i*)cPtr, res0_i);
-
-    cPtr += 16;
-    aPtr += 16;
-  }
-
-  for(number = n16points * 16; number < num_points; number++) {
-    if( *aPtr++ >= 0) {
-      *cPtr++ = 1;
-    }
-    else {
-      *cPtr++ = 0;
-    }
-  }
-}
-#endif /* LV_HAVE_SSE2 */
-
-
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-/*!
-  \brief Returns integer 1 if float input is greater than or equal to 0, 1 
otherwise
-  \param cVector The char (int8_t) output (either 0 or 1)
-  \param aVector The float input
-  \param num_points The number of values in aVector and stored into cVector
-*/
-static inline void
-volk_32f_binary_slicer_8i_u_sse2(int8_t* cVector, const float* aVector,
-                                  unsigned int num_points)
-{
-  int8_t* cPtr = cVector;
-  const float* aPtr = aVector;
-  unsigned int number = 0;
-
-  unsigned int n16points = num_points / 16;
-  __m128 a0_val, a1_val, a2_val, a3_val;
-  __m128 res0_f, res1_f, res2_f, res3_f;
-  __m128i res0_i, res1_i, res2_i, res3_i;
-  __m128 zero_val;
-  zero_val = _mm_set1_ps (0.0f);
-
-  for(number = 0; number < n16points; number++) {
-    a0_val = _mm_loadu_ps(aPtr);
-    a1_val = _mm_loadu_ps(aPtr+4);
-    a2_val = _mm_loadu_ps(aPtr+8);
-    a3_val = _mm_loadu_ps(aPtr+12);
-
-    // compare >= 0; return float
-    res0_f = _mm_cmpge_ps(a0_val, zero_val);
-    res1_f = _mm_cmpge_ps(a1_val, zero_val);
-    res2_f = _mm_cmpge_ps(a2_val, zero_val);
-    res3_f = _mm_cmpge_ps(a3_val, zero_val);
-
-    // convert to 32i and >> 31
-    res0_i = _mm_srli_epi32(_mm_cvtps_epi32(res0_f), 31);
-    res1_i = _mm_srli_epi32(_mm_cvtps_epi32(res1_f), 31);
-    res2_i = _mm_srli_epi32(_mm_cvtps_epi32(res2_f), 31);
-    res3_i = _mm_srli_epi32(_mm_cvtps_epi32(res3_f), 31);
-
-    // pack into 16-bit results
-    res0_i = _mm_packs_epi32(res0_i, res1_i);
-    res2_i = _mm_packs_epi32(res2_i, res3_i);
-
-    // pack into 8-bit results
-    res0_i = _mm_packs_epi16(res0_i, res2_i);
-
-    _mm_storeu_si128((__m128i*)cPtr, res0_i);
-
-    cPtr += 16;
-    aPtr += 16;
-  }
-
-  for(number = n16points * 16; number < num_points; number++) {
-    if( *aPtr++ >= 0) {
-      *cPtr++ = 1;
-    }
-    else {
-      *cPtr++ = 0;
-    }
-  }
-}
-#endif /* LV_HAVE_SSE2 */
-
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-/*!
-  \brief Returns integer 1 if float input is greater than or equal to 0, 1 
otherwise
-  \param cVector The char (int8_t) output (either 0 or 1)
-  \param aVector The float input
-  \param num_points The number of values in aVector and stored into cVector
-*/
-static inline void
-volk_32f_binary_slicer_8i_neon(int8_t* cVector, const float* aVector,
-                                  unsigned int num_points)
-{
-  int8_t* cPtr = cVector;
-  const float* aPtr = aVector;
-  unsigned int number = 0;
-  unsigned int n16points = num_points / 16;
-
-  float32x4x2_t input_val0, input_val1;
-  float32x4_t zero_val;
-  uint32x4x2_t res0_u32, res1_u32;
-  uint16x4x2_t res0_u16x4, res1_u16x4;
-  uint16x8x2_t res_u16x8;
-  uint8x8x2_t res_u8;
-  uint8x8_t one;
-
-  zero_val = vdupq_n_f32(0.0);
-  one = vdup_n_u8(0x01);
-  
-  // TODO: this is a good candidate for asm because the vcombines
-  // can be eliminated simply by picking dst registers that are
-  // adjacent.
-  for(number = 0; number < n16points; number++) {
-    input_val0 = vld2q_f32(aPtr);
-    input_val1 = vld2q_f32(aPtr+8);
-
-    // test against 0; return uint32
-    res0_u32.val[0] = vcgeq_f32(input_val0.val[0], zero_val);
-    res0_u32.val[1] = vcgeq_f32(input_val0.val[1], zero_val);
-    res1_u32.val[0] = vcgeq_f32(input_val1.val[0], zero_val);
-    res1_u32.val[1] = vcgeq_f32(input_val1.val[1], zero_val);
-
-    // narrow uint32 -> uint16 followed by combine to 8-element vectors
-    res0_u16x4.val[0] = vmovn_u32(res0_u32.val[0]);
-    res0_u16x4.val[1] = vmovn_u32(res0_u32.val[1]);
-    res1_u16x4.val[0] = vmovn_u32(res1_u32.val[0]);
-    res1_u16x4.val[1] = vmovn_u32(res1_u32.val[1]);
-
-    res_u16x8.val[0] = vcombine_u16(res0_u16x4.val[0], res1_u16x4.val[0]);
-    res_u16x8.val[1] = vcombine_u16(res0_u16x4.val[1], res1_u16x4.val[1]);
-    
-    // narrow uint16x8 -> uint8x8
-    res_u8.val[0] = vmovn_u16(res_u16x8.val[0]);
-    res_u8.val[1] = vmovn_u16(res_u16x8.val[1]);
-    // we *could* load twice as much data and do another vcombine here
-    // to get a uint8x16x2 vector, still only do 2 vandqs and a single store
-    // but that turns out to be ~16% slower than this version on zc702
-    // it's possible register contention in GCC scheduler slows it down
-    // and a hand-written asm with quad-word u8 registers is much faster.
-
-    res_u8.val[0] = vand_u8(one, res_u8.val[0]);
-    res_u8.val[1] = vand_u8(one, res_u8.val[1]);
-
-    vst2_u8((unsigned char*)cPtr, res_u8);
-    cPtr += 16;
-    aPtr += 16;
-
-  }
-
-  for(number = n16points * 16; number < num_points; number++) {
-    if(*aPtr++ >= 0) {
-      *cPtr++ = 1;
-    }
-    else {
-      *cPtr++ = 0;
-    }
-  }
-}
-#endif /* LV_HAVE_NEON */
-
-
-#endif /* INCLUDED_volk_32f_binary_slicer_8i_H */
diff --git a/volk/kernels/volk/volk_32f_convert_64f.h 
b/volk/kernels/volk/volk_32f_convert_64f.h
deleted file mode 100644
index 96a411c..0000000
--- a/volk/kernels/volk/volk_32f_convert_64f.h
+++ /dev/null
@@ -1,234 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_convert_64f_u_H
-#define INCLUDED_volk_32f_convert_64f_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-  /*!
-    \brief Converts the float values into double values
-    \param dVector The converted double vector values
-    \param fVector The float vector values to be converted
-    \param num_points The number of points in the two vectors to be converted
-  */
-
-static inline void volk_32f_convert_64f_u_avx(double* outputVector, const 
float* inputVector, unsigned int num_points){
-  unsigned int number = 0;
-
-  const unsigned int quarterPoints = num_points / 4;
-
-  const float* inputVectorPtr = (const float*)inputVector;
-  double* outputVectorPtr = outputVector;
-  __m256d ret;
-  __m128 inputVal;
-
-  for(;number < quarterPoints; number++){
-    inputVal = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
-
-    ret = _mm256_cvtps_pd(inputVal);
-    _mm256_storeu_pd(outputVectorPtr, ret);
-
-    outputVectorPtr += 4;
-  }
-
-  number = quarterPoints * 4;
-  for(; number < num_points; number++){
-    outputVector[number] = (double)(inputVector[number]);
-  }
-}
-
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-  /*!
-    \brief Converts the float values into double values
-    \param dVector The converted double vector values
-    \param fVector The float vector values to be converted
-    \param num_points The number of points in the two vectors to be converted
-  */
-static inline void volk_32f_convert_64f_u_sse2(double* outputVector, const 
float* inputVector, unsigned int num_points){
-  unsigned int number = 0;
-
-  const unsigned int quarterPoints = num_points / 4;
-
-  const float* inputVectorPtr = (const float*)inputVector;
-  double* outputVectorPtr = outputVector;
-  __m128d ret;
-  __m128 inputVal;
-
-  for(;number < quarterPoints; number++){
-    inputVal = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
-
-    ret = _mm_cvtps_pd(inputVal);
-
-    _mm_storeu_pd(outputVectorPtr, ret);
-    outputVectorPtr += 2;
-
-    inputVal = _mm_movehl_ps(inputVal, inputVal);
-
-    ret = _mm_cvtps_pd(inputVal);
-
-    _mm_storeu_pd(outputVectorPtr, ret);
-    outputVectorPtr += 2;
-  }
-
-  number = quarterPoints * 4;
-  for(; number < num_points; number++){
-    outputVector[number] = (double)(inputVector[number]);
-  }
-}
-#endif /* LV_HAVE_SSE2 */
-
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Converts the float values into double values
-  \param dVector The converted double vector values
-  \param fVector The float vector values to be converted
-  \param num_points The number of points in the two vectors to be converted
-*/
-static inline void volk_32f_convert_64f_generic(double* outputVector, const 
float* inputVector, unsigned int num_points){
-  double* outputVectorPtr = outputVector;
-  const float* inputVectorPtr = inputVector;
-  unsigned int number = 0;
-
-  for(number = 0; number < num_points; number++){
-    *outputVectorPtr++ = ((double)(*inputVectorPtr++));
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32f_convert_64f_u_H */
-
-
-#ifndef INCLUDED_volk_32f_convert_64f_a_H
-#define INCLUDED_volk_32f_convert_64f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-  /*!
-    \brief Converts the float values into double values
-    \param dVector The converted double vector values
-    \param fVector The float vector values to be converted
-    \param num_points The number of points in the two vectors to be converted
-  */
-static inline void volk_32f_convert_64f_a_avx(double* outputVector, const 
float* inputVector, unsigned int num_points){
-  unsigned int number = 0;
-
-  const unsigned int quarterPoints = num_points / 4;
-
-  const float* inputVectorPtr = (const float*)inputVector;
-  double* outputVectorPtr = outputVector;
-  __m256d ret;
-  __m128 inputVal;
-
-  for(;number < quarterPoints; number++){
-    inputVal = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
-
-    ret = _mm256_cvtps_pd(inputVal);
-    _mm256_store_pd(outputVectorPtr, ret);
-
-    outputVectorPtr += 4;
-  }
-
-  number = quarterPoints * 4;
-  for(; number < num_points; number++){
-    outputVector[number] = (double)(inputVector[number]);
-  }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-  /*!
-    \brief Converts the float values into double values
-    \param dVector The converted double vector values
-    \param fVector The float vector values to be converted
-    \param num_points The number of points in the two vectors to be converted
-  */
-static inline void volk_32f_convert_64f_a_sse2(double* outputVector, const 
float* inputVector, unsigned int num_points){
-  unsigned int number = 0;
-
-  const unsigned int quarterPoints = num_points / 4;
-
-  const float* inputVectorPtr = (const float*)inputVector;
-  double* outputVectorPtr = outputVector;
-  __m128d ret;
-  __m128 inputVal;
-
-  for(;number < quarterPoints; number++){
-    inputVal = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
-
-    ret = _mm_cvtps_pd(inputVal);
-
-    _mm_store_pd(outputVectorPtr, ret);
-    outputVectorPtr += 2;
-
-    inputVal = _mm_movehl_ps(inputVal, inputVal);
-
-    ret = _mm_cvtps_pd(inputVal);
-
-    _mm_store_pd(outputVectorPtr, ret);
-    outputVectorPtr += 2;
-  }
-
-  number = quarterPoints * 4;
-  for(; number < num_points; number++){
-    outputVector[number] = (double)(inputVector[number]);
-  }
-}
-#endif /* LV_HAVE_SSE2 */
-
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Converts the float values into double values
-  \param dVector The converted double vector values
-  \param fVector The float vector values to be converted
-  \param num_points The number of points in the two vectors to be converted
-*/
-static inline void volk_32f_convert_64f_a_generic(double* outputVector, const 
float* inputVector, unsigned int num_points){
-  double* outputVectorPtr = outputVector;
-  const float* inputVectorPtr = inputVector;
-  unsigned int number = 0;
-
-  for(number = 0; number < num_points; number++){
-    *outputVectorPtr++ = ((double)(*inputVectorPtr++));
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32f_convert_64f_a_H */
diff --git a/volk/kernels/volk/volk_32f_cos_32f.h 
b/volk/kernels/volk/volk_32f_cos_32f.h
deleted file mode 100644
index 49c5658..0000000
--- a/volk/kernels/volk/volk_32f_cos_32f.h
+++ /dev/null
@@ -1,211 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#include <stdio.h>
-#include <math.h>
-#include <inttypes.h>
-
-#ifndef INCLUDED_volk_32f_cos_32f_a_H
-#define INCLUDED_volk_32f_cos_32f_a_H
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Computes cosine of input vector and stores results in output vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which cosine is to be computed
-*/
-static inline void volk_32f_cos_32f_a_sse4_1(float* bVector, const float* 
aVector, unsigned int num_points){
-
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-
-    unsigned int number = 0;
-    unsigned int quarterPoints = num_points / 4;
-    unsigned int i = 0;
-
-    __m128 aVal, s, m4pi, pio4A, pio4B, cp1, cp2, cp3, cp4, cp5, ffours, 
ftwos, fones, fzeroes;
-    __m128 sine, cosine, condition1, condition3;
-    __m128i q, r, ones, twos, fours;
-
-    m4pi = _mm_set1_ps(1.273239545);
-    pio4A = _mm_set1_ps(0.78515625);
-    pio4B = _mm_set1_ps(0.241876e-3);
-    ffours = _mm_set1_ps(4.0);
-    ftwos = _mm_set1_ps(2.0);
-    fones = _mm_set1_ps(1.0);
-    fzeroes = _mm_setzero_ps();
-    ones = _mm_set1_epi32(1);
-    twos = _mm_set1_epi32(2);
-    fours = _mm_set1_epi32(4);
-
-    cp1 = _mm_set1_ps(1.0);
-    cp2 = _mm_set1_ps(0.83333333e-1);
-    cp3 = _mm_set1_ps(0.2777778e-2);
-    cp4 = _mm_set1_ps(0.49603e-4);
-    cp5 = _mm_set1_ps(0.551e-6);
-
-    for(;number < quarterPoints; number++){
-
-    aVal = _mm_load_ps(aPtr);
-    s = _mm_sub_ps(aVal, _mm_and_ps(_mm_mul_ps(aVal, ftwos), 
_mm_cmplt_ps(aVal, fzeroes)));
-    q = _mm_cvtps_epi32(_mm_mul_ps(s, m4pi));
-    r = _mm_add_epi32(q, _mm_and_si128(q, ones));
-
-    s = _mm_sub_ps(s, _mm_mul_ps(_mm_cvtepi32_ps(r), pio4A));
-    s = _mm_sub_ps(s, _mm_mul_ps(_mm_cvtepi32_ps(r), pio4B));
-
-    s = _mm_div_ps(s, _mm_set1_ps(8.0));    // The constant is 2^N, for 3 
times argument reduction
-    s = _mm_mul_ps(s, s);
-    // Evaluate Taylor series
-    s = 
_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_sub_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_sub_ps(_mm_mul_ps(s,
 cp5), cp4), s), cp3), s), cp2), s), cp1), s);
-
-    for(i = 0; i < 3; i++)  s = _mm_mul_ps(s, _mm_sub_ps(ffours, s));
-    s = _mm_div_ps(s, ftwos);
-
-    sine = _mm_sqrt_ps(_mm_mul_ps(_mm_sub_ps(ftwos, s), s));
-    cosine = _mm_sub_ps(fones, s);
-
-    condition1 = _mm_cmpneq_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_add_epi32(q, 
ones), twos)), fzeroes);
-
-    condition3 = _mm_cmpneq_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_add_epi32(q, 
twos), fours)), fzeroes);
-
-    cosine = _mm_add_ps(cosine, _mm_and_ps(_mm_sub_ps(sine, cosine), 
condition1));
-    cosine = _mm_sub_ps(cosine, _mm_and_ps(_mm_mul_ps(cosine, 
_mm_set1_ps(2.0f)), condition3));
-    _mm_store_ps(bPtr, cosine);
-    aPtr += 4;
-    bPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-       *bPtr++ = cos(*aPtr++);
-    }
-}
-
-#endif /* LV_HAVE_SSE4_1 for aligned */
-
-#endif /* INCLUDED_volk_32f_cos_32f_a_H */
-
-#ifndef INCLUDED_volk_32f_cos_32f_u_H
-#define INCLUDED_volk_32f_cos_32f_u_H
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Computes cosine of input vector and stores results in output vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which cosine is to be computed
-*/
-static inline void volk_32f_cos_32f_u_sse4_1(float* bVector, const float* 
aVector, unsigned int num_points){
-
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-
-    unsigned int number = 0;
-        unsigned int quarterPoints = num_points / 4;
-    unsigned int i = 0;
-
-    __m128 aVal, s, m4pi, pio4A, pio4B, cp1, cp2, cp3, cp4, cp5, ffours, 
ftwos, fones, fzeroes;
-    __m128 sine, cosine, condition1, condition3;
-    __m128i q, r, ones, twos, fours;
-
-    m4pi = _mm_set1_ps(1.273239545);
-    pio4A = _mm_set1_ps(0.78515625);
-    pio4B = _mm_set1_ps(0.241876e-3);
-    ffours = _mm_set1_ps(4.0);
-    ftwos = _mm_set1_ps(2.0);
-    fones = _mm_set1_ps(1.0);
-    fzeroes = _mm_setzero_ps();
-    ones = _mm_set1_epi32(1);
-    twos = _mm_set1_epi32(2);
-    fours = _mm_set1_epi32(4);
-
-    cp1 = _mm_set1_ps(1.0);
-    cp2 = _mm_set1_ps(0.83333333e-1);
-    cp3 = _mm_set1_ps(0.2777778e-2);
-    cp4 = _mm_set1_ps(0.49603e-4);
-    cp5 = _mm_set1_ps(0.551e-6);
-
-    for(;number < quarterPoints; number++){
-        aVal = _mm_loadu_ps(aPtr);
-        s = _mm_sub_ps(aVal, _mm_and_ps(_mm_mul_ps(aVal, ftwos), 
_mm_cmplt_ps(aVal, fzeroes)));
-        q = _mm_cvtps_epi32(_mm_mul_ps(s, m4pi));
-        r = _mm_add_epi32(q, _mm_and_si128(q, ones));
-
-        s = _mm_sub_ps(s, _mm_mul_ps(_mm_cvtepi32_ps(r), pio4A));
-        s = _mm_sub_ps(s, _mm_mul_ps(_mm_cvtepi32_ps(r), pio4B));
-
-        s = _mm_div_ps(s, _mm_set1_ps(8.0));    // The constant is 2^N, for 3 
times argument reduction
-        s = _mm_mul_ps(s, s);
-        // Evaluate Taylor series
-        s = 
_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_sub_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_sub_ps(_mm_mul_ps(s,
 cp5), cp4), s), cp3), s), cp2), s), cp1), s);
-
-        for(i = 0; i < 3; i++){
-            s = _mm_mul_ps(s, _mm_sub_ps(ffours, s));
-        }
-        s = _mm_div_ps(s, ftwos);
-
-        sine = _mm_sqrt_ps(_mm_mul_ps(_mm_sub_ps(ftwos, s), s));
-        cosine = _mm_sub_ps(fones, s);
-
-        condition1 = 
_mm_cmpneq_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_add_epi32(q, ones), twos)), 
fzeroes);
-
-        condition3 = 
_mm_cmpneq_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_add_epi32(q, twos), fours)), 
fzeroes);
-
-        cosine = _mm_add_ps(cosine, _mm_and_ps(_mm_sub_ps(sine, cosine), 
condition1));
-        cosine = _mm_sub_ps(cosine, _mm_and_ps(_mm_mul_ps(cosine, 
_mm_set1_ps(2.0f)), condition3));
-        _mm_storeu_ps(bPtr, cosine);
-        aPtr += 4;
-        bPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-        *bPtr++ = cos(*aPtr++);
-    }
-}
-
-#endif /* LV_HAVE_SSE4_1 for unaligned */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Computes cosine of input vector and stores results in output vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which cosine is to be computed
-*/
-static inline void volk_32f_cos_32f_generic(float* bVector, const float* 
aVector, unsigned int num_points){
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-    unsigned int number = 0;
-
-    for(; number < num_points; number++){
-        *bPtr++ = cos(*aPtr++);
-    }
-
-}
-#endif /* LV_HAVE_GENERIC */
-
-#endif /* INCLUDED_volk_32f_cos_32f_u_H */
diff --git a/volk/kernels/volk/volk_32f_expfast_32f.h 
b/volk/kernels/volk/volk_32f_expfast_32f.h
deleted file mode 100644
index b64e45c..0000000
--- a/volk/kernels/volk/volk_32f_expfast_32f.h
+++ /dev/null
@@ -1,216 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#include <stdio.h>
-#include <math.h>
-#include <inttypes.h>
-
-#define Mln2 0.6931471805f
-#define A 8388608.0f
-#define B 1065353216.0f
-#define C 60801.0f
-
-
-#ifndef INCLUDED_volk_32f_expfast_32f_a_H
-#define INCLUDED_volk_32f_expfast_32f_a_H
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-/*!
-  \brief Computes fast exp (max 7% error) of input vector and stores results 
in output vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which exp is to be computed
-*/
-static inline void volk_32f_expfast_32f_a_avx(float* bVector, const float* 
aVector, unsigned int num_points){
-
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-
-    unsigned int number = 0;
-    const unsigned int eighthPoints = num_points / 8;
-
-    __m256 aVal, bVal, a, b;
-    __m256i exp;
-    a = _mm256_set1_ps(A/Mln2);
-    b = _mm256_set1_ps(B-C);
-
-    for(;number < eighthPoints; number++){
-        aVal = _mm256_load_ps(aPtr);
-        exp = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(a,aVal), b));
-        bVal = _mm256_castsi256_ps(exp);
-
-        _mm256_store_ps(bPtr, bVal);
-        aPtr += 8;
-        bPtr += 8;
-    }
-
-    number = eighthPoints * 8;
-    for(;number < num_points; number++){
-        *bPtr++ = expf(*aPtr++);
-    }
-}
-
-#endif /* LV_HAVE_AVX for aligned */
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Computes fast exp (max 7% error) of input vector and stores results 
in output vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which exp is to be computed
-*/
-static inline void volk_32f_expfast_32f_a_sse4_1(float* bVector, const float* 
aVector, unsigned int num_points){
-
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    __m128 aVal, bVal, a, b;
-    __m128i exp;
-    a = _mm_set1_ps(A/Mln2);
-    b = _mm_set1_ps(B-C);
-
-    for(;number < quarterPoints; number++){
-        aVal = _mm_load_ps(aPtr);
-        exp = _mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(a,aVal), b));
-        bVal = _mm_castsi128_ps(exp);
-
-        _mm_store_ps(bPtr, bVal);
-        aPtr += 4;
-        bPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-        *bPtr++ = expf(*aPtr++);
-    }
-}
-
-#endif /* LV_HAVE_SSE4_1 for aligned */
-
-#endif /* INCLUDED_volk_32f_expfast_32f_a_H */
-
-#ifndef INCLUDED_volk_32f_expfast_32f_u_H
-#define INCLUDED_volk_32f_expfast_32f_u_H
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-/*!
-  \brief Computes fast exp (max 7% error) of input vector and stores results 
in output vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which exp is to be computed
-*/
-static inline void volk_32f_expfast_32f_u_avx(float* bVector, const float* 
aVector, unsigned int num_points){
-
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-
-    unsigned int number = 0;
-    const unsigned int eighthPoints = num_points / 8;
-
-    __m256 aVal, bVal, a, b;
-    __m256i exp;
-    a = _mm256_set1_ps(A/Mln2);
-    b = _mm256_set1_ps(B-C);
-
-    for(;number < eighthPoints; number++){
-        aVal = _mm256_loadu_ps(aPtr);
-        exp = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(a,aVal), b));
-        bVal = _mm256_castsi256_ps(exp);
-
-        _mm256_storeu_ps(bPtr, bVal);
-        aPtr += 8;
-        bPtr += 8;
-    }
-
-    number = eighthPoints * 8;
-    for(;number < num_points; number++){
-        *bPtr++ = expf(*aPtr++);
-    }
-}
-
-#endif /* LV_HAVE_AVX for aligned */
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Computes fast exp (max 7% error) of input vector and stores results 
in output vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which log is to be computed
-*/
-static inline void volk_32f_expfast_32f_u_sse4_1(float* bVector, const float* 
aVector, unsigned int num_points){
-
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    __m128 aVal, bVal, a, b;
-    __m128i exp;
-    a = _mm_set1_ps(A/Mln2);
-    b = _mm_set1_ps(B-C);
-
-    for(;number < quarterPoints; number++){
-        aVal = _mm_loadu_ps(aPtr);
-        exp = _mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(a,aVal), b));
-        bVal = _mm_castsi128_ps(exp);
-
-        _mm_storeu_ps(bPtr, bVal);
-        aPtr += 4;
-        bPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-        *bPtr++ = expf(*aPtr++);
-    }
-}
-
-#endif /* LV_HAVE_SSE4_1 for unaligned */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Computes fast exp (max 7% error) of input vector and stores results 
in output vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which log is to be computed
-*/
-static inline void volk_32f_expfast_32f_generic(float* bVector, const float* 
aVector, unsigned int num_points){
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-        *bPtr++ = expf(*aPtr++);
-    }
-
-}
-#endif /* LV_HAVE_GENERIC */
-
-#endif /* INCLUDED_volk_32f_expfast_32f_u_H */
diff --git a/volk/kernels/volk/volk_32f_index_max_16u.h 
b/volk/kernels/volk/volk_32f_index_max_16u.h
deleted file mode 100644
index a5482a6..0000000
--- a/volk/kernels/volk/volk_32f_index_max_16u.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_index_max_16u_a_H
-#define INCLUDED_volk_32f_index_max_16u_a_H
-
-#include <volk/volk_common.h>
-#include <volk/volk_common.h>
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE4_1
-#include<smmintrin.h>
-
-static inline void volk_32f_index_max_16u_a_sse4_1(unsigned int* target, const 
float* src0, unsigned int num_points) {
-  if(num_points > 0){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    float* inputPtr = (float*)src0;
-
-    __m128 indexIncrementValues = _mm_set1_ps(4);
-    __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
-
-    float max = src0[0];
-    float index = 0;
-    __m128 maxValues = _mm_set1_ps(max);
-    __m128 maxValuesIndex = _mm_setzero_ps();
-    __m128 compareResults;
-    __m128 currentValues;
-
-    __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
-    __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
-
-    for(;number < quarterPoints; number++){
-
-      currentValues  = _mm_load_ps(inputPtr); inputPtr += 4;
-      currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
-
-      compareResults = _mm_cmpgt_ps(maxValues, currentValues);
-
-      maxValuesIndex = _mm_blendv_ps(currentIndexes, maxValuesIndex, 
compareResults);
-      maxValues      = _mm_blendv_ps(currentValues, maxValues, compareResults);
-    }
-
-    // Calculate the largest value from the remaining 4 points
-    _mm_store_ps(maxValuesBuffer, maxValues);
-    _mm_store_ps(maxIndexesBuffer, maxValuesIndex);
-
-    for(number = 0; number < 4; number++){
-      if(maxValuesBuffer[number] > max){
-       index = maxIndexesBuffer[number];
-       max = maxValuesBuffer[number];
-      }
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-      if(src0[number] > max){
-       index = number;
-       max = src0[number];
-      }
-    }
-    target[0] = (unsigned int)index;
-  }
-}
-
-#endif /*LV_HAVE_SSE4_1*/
-
-#ifdef LV_HAVE_SSE
-#include<xmmintrin.h>
-
-static inline void volk_32f_index_max_16u_a_sse(unsigned int* target, const 
float* src0, unsigned int num_points) {
-  if(num_points > 0){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    float* inputPtr = (float*)src0;
-
-    __m128 indexIncrementValues = _mm_set1_ps(4);
-    __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
-
-    float max = src0[0];
-    float index = 0;
-    __m128 maxValues = _mm_set1_ps(max);
-    __m128 maxValuesIndex = _mm_setzero_ps();
-    __m128 compareResults;
-    __m128 currentValues;
-
-    __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
-    __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
-
-    for(;number < quarterPoints; number++){
-
-      currentValues  = _mm_load_ps(inputPtr); inputPtr += 4;
-      currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
-
-      compareResults = _mm_cmpgt_ps(maxValues, currentValues);
-
-      maxValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, maxValuesIndex) , 
_mm_andnot_ps(compareResults, currentIndexes));
-
-      maxValues      = _mm_or_ps(_mm_and_ps(compareResults, maxValues) , 
_mm_andnot_ps(compareResults, currentValues));
-    }
-
-    // Calculate the largest value from the remaining 4 points
-    _mm_store_ps(maxValuesBuffer, maxValues);
-    _mm_store_ps(maxIndexesBuffer, maxValuesIndex);
-
-    for(number = 0; number < 4; number++){
-      if(maxValuesBuffer[number] > max){
-       index = maxIndexesBuffer[number];
-       max = maxValuesBuffer[number];
-      }
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-      if(src0[number] > max){
-       index = number;
-       max = src0[number];
-      }
-    }
-    target[0] = (unsigned int)index;
-  }
-}
-
-#endif /*LV_HAVE_SSE*/
-
-#ifdef LV_HAVE_GENERIC
-static inline void volk_32f_index_max_16u_generic(unsigned int* target, const 
float* src0, unsigned int num_points) {
-  if(num_points > 0){
-    float max = src0[0];
-    unsigned int index = 0;
-
-    unsigned int i = 1;
-
-    for(; i < num_points; ++i) {
-
-      if(src0[i] > max){
-       index = i;
-       max = src0[i];
-      }
-
-    }
-    target[0] = index;
-  }
-}
-
-#endif /*LV_HAVE_GENERIC*/
-
-
-#endif /*INCLUDED_volk_32f_index_max_16u_a_H*/
diff --git a/volk/kernels/volk/volk_32f_invsqrt_32f.h 
b/volk/kernels/volk/volk_32f_invsqrt_32f.h
deleted file mode 100644
index 21ed125..0000000
--- a/volk/kernels/volk/volk_32f_invsqrt_32f.h
+++ /dev/null
@@ -1,196 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_invsqrt_32f_a_H
-#define INCLUDED_volk_32f_invsqrt_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <math.h>
-#include <string.h>
-
-static inline float Q_rsqrt( float number )
-{
-  float x2;
-  const float threehalfs = 1.5F;
-  union f32_to_i32 {
-    int32_t i;
-    float f;
-  } u;
-
-  x2 = number * 0.5F;
-  u.f = number;
-  u.i = 0x5f3759df - ( u.i >> 1 );               // what the fuck?
-  u.f = u.f * ( threehalfs - ( x2 * u.f * u.f ) );   // 1st iteration
-  //u.f  = u.f * ( threehalfs - ( x2 * u.f * u.f ) );   // 2nd iteration, this 
can be removed
-
-  return u.f;
-}
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-/*!
-\brief Sqrts the two input vectors and store their results in the third vector
-\param cVector The vector where the results will be stored
-\param aVector One of the vectors to be invsqrted
-\param num_points The number of values in aVector and bVector to be invsqrted 
together and stored into cVector
-*/
-static inline void volk_32f_invsqrt_32f_a_avx(float* cVector, const float* 
aVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int eighthPoints = num_points / 8;
-
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    __m256 aVal, cVal;
-    for (; number < eighthPoints; number++)
-    {
-        aVal = _mm256_load_ps(aPtr);
-        cVal = _mm256_rsqrt_ps(aVal);
-        _mm256_store_ps(cPtr, cVal);
-        aPtr += 8;
-       cPtr += 8;
-    }
-
-    number = eighthPoints * 8;
-    for(;number < num_points; number++)
-      *cPtr++ = Q_rsqrt(*aPtr++);
-
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Sqrts the two input vectors and store their results in the third 
vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be invsqrted
-  \param num_points The number of values in aVector and bVector to be 
invsqrted together and stored into cVector
-*/
-static inline void volk_32f_invsqrt_32f_a_sse(float* cVector, const float* 
aVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-
-    __m128 aVal, cVal;
-    for(;number < quarterPoints; number++){
-
-      aVal = _mm_load_ps(aPtr);
-
-      cVal = _mm_rsqrt_ps(aVal);
-
-      _mm_store_ps(cPtr,cVal); // Store the results back into the C container
-
-      aPtr += 4;
-      cPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-      *cPtr++ = Q_rsqrt(*aPtr++);
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-/*!
-\brief Sqrts the two input vectors and store their results in the third vector
-\param cVector The vector where the results will be stored
-\param aVector One of the vectors to be invsqrted
-\param num_points The number of values in aVector and bVector to be invsqrted 
together and stored into cVector
-*/
-static inline void volk_32f_invsqrt_32f_neon(float* cVector, const float* 
aVector, unsigned int num_points){
-    unsigned int number;
-    const unsigned int quarter_points = num_points / 4;
-
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    float32x4_t a_val, c_val;
-    for (number = 0; number < quarter_points; ++number)
-    {
-        a_val = vld1q_f32(aPtr);
-        c_val = vrsqrteq_f32(a_val);
-        vst1q_f32(cPtr, c_val);
-        aPtr += 4;
-        cPtr += 4;
-    }
-
-    for(number=quarter_points * 4;number < num_points; number++)
-      *cPtr++ = Q_rsqrt(*aPtr++);
-
-}
-#endif /* LV_HAVE_NEON */
-
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Sqrts the two input vectors and store their results in the third 
vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be invsqrted
-  \param num_points The number of values in aVector and bVector to be 
invsqrted together and stored into cVector
-*/
-static inline void volk_32f_invsqrt_32f_generic(float* cVector, const float* 
aVector, unsigned int num_points){
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    unsigned int number = 0;
-    for(number = 0; number < num_points; number++){
-        *cPtr++ = Q_rsqrt(*aPtr++);
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-/*!
-\brief Sqrts the two input vectors and store their results in the third vector
-\param cVector The vector where the results will be stored
-\param aVector One of the vectors to be invsqrted
-\param num_points The number of values in aVector and bVector to be invsqrted 
together and stored into cVector
-*/
-static inline void volk_32f_invsqrt_32f_u_avx(float* cVector, const float* 
aVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int eighthPoints = num_points / 8;
-
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    __m256 aVal, cVal;
-    for (; number < eighthPoints; number++)
-    {
-        aVal = _mm256_loadu_ps(aPtr);
-        cVal = _mm256_rsqrt_ps(aVal);
-        _mm256_storeu_ps(cPtr, cVal);
-        aPtr += 8;
-       cPtr += 8;
-    }
-
-    number = eighthPoints * 8;
-    for(;number < num_points; number++)
-      *cPtr++ = Q_rsqrt(*aPtr++);
-
-}
-#endif /* LV_HAVE_AVX */
-
-
-
-#endif /* INCLUDED_volk_32f_invsqrt_32f_a_H */
diff --git a/volk/kernels/volk/volk_32f_log2_32f.h 
b/volk/kernels/volk/volk_32f_log2_32f.h
deleted file mode 100644
index 892eeb1..0000000
--- a/volk/kernels/volk/volk_32f_log2_32f.h
+++ /dev/null
@@ -1,332 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-/*
- * This kernel was adapted from Jose Fonseca's Fast SSE2 log implementation
- * http://jrfonseca.blogspot.in/2008/09/fast-sse2-pow-tables-or-polynomials.htm
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
- * This is the MIT License (MIT)
-
-*/
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <inttypes.h>
-#include <math.h>
-
-#define POLY0(x, c0) _mm_set1_ps(c0)
-#define POLY1(x, c0, c1) _mm_add_ps(_mm_mul_ps(POLY0(x, c1), x), 
_mm_set1_ps(c0))
-#define POLY2(x, c0, c1, c2) _mm_add_ps(_mm_mul_ps(POLY1(x, c1, c2), x), 
_mm_set1_ps(c0))
-#define POLY3(x, c0, c1, c2, c3) _mm_add_ps(_mm_mul_ps(POLY2(x, c1, c2, c3), 
x), _mm_set1_ps(c0))
-#define POLY4(x, c0, c1, c2, c3, c4) _mm_add_ps(_mm_mul_ps(POLY3(x, c1, c2, 
c3, c4), x), _mm_set1_ps(c0))
-#define POLY5(x, c0, c1, c2, c3, c4, c5) _mm_add_ps(_mm_mul_ps(POLY4(x, c1, 
c2, c3, c4, c5), x), _mm_set1_ps(c0))
-
-#define LOG_POLY_DEGREE 6
-
-
-#ifndef INCLUDED_volk_32f_log2_32f_a_H
-#define INCLUDED_volk_32f_log2_32f_a_H
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Computes base 2 log of input vector and stores results in output 
vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which log is to be computed
-*/
-static inline void volk_32f_log2_32f_generic(float* bVector, const float* 
aVector, unsigned int num_points){
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      *bPtr++ = log2(*aPtr++);
-    }
-
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Computes base 2 log of input vector and stores results in output 
vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which log is to be computed
-*/
-static inline void volk_32f_log2_32f_a_sse4_1(float* bVector, const float* 
aVector, unsigned int num_points){
-
-       float* bPtr = bVector;
-       const float* aPtr = aVector;
-
-       unsigned int number = 0;
-        const unsigned int quarterPoints = num_points / 4;
-
-       __m128 aVal, bVal, mantissa, frac, leadingOne;
-       __m128i bias, exp;
-
-       for(;number < quarterPoints; number++){
-
-       aVal = _mm_load_ps(aPtr);
-       bias = _mm_set1_epi32(127);
-       leadingOne = _mm_set1_ps(1.0f);
-       exp = 
_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(_mm_castps_si128(aVal), 
_mm_set1_epi32(0x7f800000)), 23), bias);
-       bVal = _mm_cvtepi32_ps(exp);
-
-       // Now to extract mantissa
-       frac = _mm_or_ps(leadingOne, _mm_and_ps(aVal, 
_mm_castsi128_ps(_mm_set1_epi32(0x7fffff))));
-
-       #if LOG_POLY_DEGREE == 6
-         mantissa = POLY5( frac, 3.1157899f, -3.3241990f, 2.5988452f, 
-1.2315303f,  3.1821337e-1f, -3.4436006e-2f);
-       #elif LOG_POLY_DEGREE == 5
-         mantissa = POLY4( frac, 2.8882704548164776201f, 
-2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 
0.0596515482674574969533f);
-       #elif LOG_POLY_DEGREE == 4
-         mantissa = POLY3( frac, 2.61761038894603480148f, 
-1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f);
-       #elif LOG_POLY_DEGREE == 3
-         mantissa = POLY2( frac, 2.28330284476918490682f, 
-1.04913055217340124191f, 0.204446009836232697516f);
-       #else
-       #error
-       #endif
-
-       bVal = _mm_add_ps(bVal, _mm_mul_ps(mantissa, _mm_sub_ps(frac, 
leadingOne)));
-       _mm_store_ps(bPtr, bVal);
-
-
-       aPtr += 4;
-       bPtr += 4;
-       }
-
-       number = quarterPoints * 4;
-       for(;number < num_points; number++){
-          *bPtr++ = log2(*aPtr++);
-       }
-}
-
-#endif /* LV_HAVE_SSE4_1 for aligned */
-
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-
-/* these macros allow us to embed logs in other kernels */
-#define VLOG2Q_NEON_PREAMBLE()                                  \
-    int32x4_t one = vdupq_n_s32(0x000800000);                   \
-    /* minimax polynomial */                                    \
-    float32x4_t p0 = vdupq_n_f32(-3.0400402727048585);          \
-    float32x4_t p1 = vdupq_n_f32(6.1129631282966113);           \
-    float32x4_t p2 = vdupq_n_f32(-5.3419892024633207);          \
-    float32x4_t p3 = vdupq_n_f32(3.2865287703753912);           \
-    float32x4_t p4 = vdupq_n_f32(-1.2669182593441635);          \
-    float32x4_t p5 = vdupq_n_f32(0.2751487703421256);           \
-    float32x4_t p6 = vdupq_n_f32(-0.0256910888150985);          \
-    int32x4_t exp_mask = vdupq_n_s32(0x7f800000);               \
-    int32x4_t sig_mask = vdupq_n_s32(0x007fffff);               \
-    int32x4_t exp_bias = vdupq_n_s32(127);
-
-
-#define VLOG2Q_NEON_F32(log2_approx, aval)                              \
-        int32x4_t exponent_i = vandq_s32(aval, exp_mask);               \
-        int32x4_t significand_i = vandq_s32(aval, sig_mask);            \
-        exponent_i = vshrq_n_s32(exponent_i, 23);                       \
-                                                                        \
-        /* extract the exponent and significand                         \
-         we can treat this as fixed point to save ~9% on the            \
-         conversion + float add */                                      \
-        significand_i = vorrq_s32(one, significand_i);                  \
-        float32x4_t significand_f = vcvtq_n_f32_s32(significand_i,23);  \
-        /* debias the exponent and convert to float */                  \
-        exponent_i = vsubq_s32(exponent_i, exp_bias);                   \
-        float32x4_t exponent_f = vcvtq_f32_s32(exponent_i);             \
-                                                                        \
-        /* put the significand through a polynomial fit of log2(x) [1,2]\
-         add the result to the exponent */                              \
-        log2_approx = vaddq_f32(exponent_f, p0); /* p0 */               \
-        float32x4_t tmp1 = vmulq_f32(significand_f, p1); /* p1 * x */   \
-        log2_approx = vaddq_f32(log2_approx, tmp1);                     \
-        float32x4_t sig_2 = vmulq_f32(significand_f, significand_f); /* x^2 */ 
\
-        tmp1 = vmulq_f32(sig_2, p2); /* p2 * x^2 */                     \
-        log2_approx = vaddq_f32(log2_approx, tmp1);                     \
-                                                                        \
-        float32x4_t sig_3 = vmulq_f32(sig_2, significand_f); /* x^3 */  \
-        tmp1 = vmulq_f32(sig_3, p3); /* p3 * x^3 */                     \
-        log2_approx = vaddq_f32(log2_approx, tmp1);                     \
-        float32x4_t sig_4 = vmulq_f32(sig_2, sig_2); /* x^4 */          \
-        tmp1 = vmulq_f32(sig_4, p4); /* p4 * x^4 */                     \
-        log2_approx = vaddq_f32(log2_approx, tmp1);                     \
-        float32x4_t sig_5 = vmulq_f32(sig_3, sig_2); /* x^5 */          \
-        tmp1 = vmulq_f32(sig_5, p5); /* p5 * x^5 */                     \
-        log2_approx = vaddq_f32(log2_approx, tmp1);                     \
-        float32x4_t sig_6 = vmulq_f32(sig_3, sig_3); /* x^6 */          \
-        tmp1 = vmulq_f32(sig_6, p6); /* p6 * x^6 */                     \
-        log2_approx = vaddq_f32(log2_approx, tmp1);
-
-/*!
-  \brief Computes base 2 log of input vector and stores results in output 
vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which log is to be computed
-*/
-static inline void volk_32f_log2_32f_neon(float* bVector, const float* 
aVector, unsigned int num_points){
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-    unsigned int number;
-    const unsigned int quarterPoints = num_points / 4;
-
-    int32x4_t aval;
-    float32x4_t log2_approx;
-
-    VLOG2Q_NEON_PREAMBLE()
-    // lms
-    //p0 = vdupq_n_f32(-1.649132280361871);
-    //p1 = vdupq_n_f32(1.995047138579499);
-    //p2 = vdupq_n_f32(-0.336914839219728);
-
-    // keep in mind a single precision float is represented as
-    //   (-1)^sign * 2^exp * 1.significand, so the log2 is
-    // log2(2^exp * sig) = exponent + log2(1 + significand/(1<<23)
-    for(number = 0; number < quarterPoints; ++number){
-        // load float in to an int register without conversion
-        aval = vld1q_s32((int*)aPtr);
-
-        VLOG2Q_NEON_F32(log2_approx, aval)
-
-        vst1q_f32(bPtr, log2_approx);
-
-        aPtr += 4;
-        bPtr += 4;
-    }
-
-    for(number = quarterPoints * 4; number < num_points; number++){
-       *bPtr++ = log2(*aPtr++);
-    }
-}
-
-#endif /* LV_HAVE_NEON */
-
-
-#endif /* INCLUDED_volk_32f_log2_32f_a_H */
-
-#ifndef INCLUDED_volk_32f_log2_32f_u_H
-#define INCLUDED_volk_32f_log2_32f_u_H
-
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Computes base 2 log of input vector and stores results in output 
vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which log is to be computed
-*/
-static inline void volk_32f_log2_32f_u_generic(float* bVector, const float* 
aVector, unsigned int num_points){
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      *bPtr++ = log2(*aPtr++);
-    }
-
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Computes base 2 log of input vector and stores results in output 
vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which log is to be computed
-*/
-static inline void volk_32f_log2_32f_u_sse4_1(float* bVector, const float* 
aVector, unsigned int num_points){
-
-       float* bPtr = bVector;
-       const float* aPtr = aVector;
-
-       unsigned int number = 0;
-        const unsigned int quarterPoints = num_points / 4;
-
-       __m128 aVal, bVal, mantissa, frac, leadingOne;
-       __m128i bias, exp;
-
-       for(;number < quarterPoints; number++){
-
-       aVal = _mm_loadu_ps(aPtr);
-       bias = _mm_set1_epi32(127);
-       leadingOne = _mm_set1_ps(1.0f);
-       exp = 
_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(_mm_castps_si128(aVal), 
_mm_set1_epi32(0x7f800000)), 23), bias);
-       bVal = _mm_cvtepi32_ps(exp);
-
-       // Now to extract mantissa
-       frac = _mm_or_ps(leadingOne, _mm_and_ps(aVal, 
_mm_castsi128_ps(_mm_set1_epi32(0x7fffff))));
-
-       #if LOG_POLY_DEGREE == 6
-         mantissa = POLY5( frac, 3.1157899f, -3.3241990f, 2.5988452f, 
-1.2315303f,  3.1821337e-1f, -3.4436006e-2f);
-       #elif LOG_POLY_DEGREE == 5
-         mantissa = POLY4( frac, 2.8882704548164776201f, 
-2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 
0.0596515482674574969533f);
-       #elif LOG_POLY_DEGREE == 4
-         mantissa = POLY3( frac, 2.61761038894603480148f, 
-1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f);
-       #elif LOG_POLY_DEGREE == 3
-         mantissa = POLY2( frac, 2.28330284476918490682f, 
-1.04913055217340124191f, 0.204446009836232697516f);
-       #else
-       #error
-       #endif
-
-       bVal = _mm_add_ps(bVal, _mm_mul_ps(mantissa, _mm_sub_ps(frac, 
leadingOne)));
-       _mm_storeu_ps(bPtr, bVal);
-
-
-       aPtr += 4;
-       bPtr += 4;
-       }
-
-       number = quarterPoints * 4;
-       for(;number < num_points; number++){
-          *bPtr++ = log2(*aPtr++);
-       }
-}
-
-#endif /* LV_HAVE_SSE4_1 for unaligned */
-
-#endif /* INCLUDED_volk_32f_log2_32f_u_H */
diff --git a/volk/kernels/volk/volk_32f_s32f_32f_fm_detect_32f.h 
b/volk/kernels/volk/volk_32f_s32f_32f_fm_detect_32f.h
deleted file mode 100644
index f03d80b..0000000
--- a/volk/kernels/volk/volk_32f_s32f_32f_fm_detect_32f.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H
-#define INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief performs the FM-detect differentiation on the input vector and stores 
the results in the output vector.
-  \param outputVector The byte-aligned vector where the results will be stored.
-  \param inputVector The byte-aligned input vector containing phase data (must 
be on the interval (-bound,bound] )
-  \param bound The interval that the input phase data is in, which is used to 
modulo the differentiation
-  \param saveValue A pointer to a float which contains the phase value of the 
sample before the first input sample.
-  \param num_noints The number of real values in the input vector.
-*/
-static inline void volk_32f_s32f_32f_fm_detect_32f_a_sse(float* outputVector, 
const float* inputVector, const float bound, float* saveValue, unsigned int 
num_points){
-  if (num_points < 1) {
-    return;
-  }
-  unsigned int number = 1;
-  unsigned int j = 0;
-  // num_points-1 keeps Fedora 7's gcc from crashing...
-  // num_points won't work.  :(
-  const unsigned int quarterPoints = (num_points-1) / 4;
-
-  float* outPtr = outputVector;
-  const float* inPtr = inputVector;
-  __m128 upperBound = _mm_set_ps1(bound);
-  __m128 lowerBound = _mm_set_ps1(-bound);
-  __m128 next3old1;
-  __m128 next4;
-  __m128 boundAdjust;
-  __m128 posBoundAdjust = _mm_set_ps1(-2*bound); // Subtract when we're above.
-  __m128 negBoundAdjust = _mm_set_ps1(2*bound); // Add when we're below.
-  // Do the first 4 by hand since we're going in from the saveValue:
-  *outPtr = *inPtr - *saveValue;
-  if (*outPtr >  bound) *outPtr -= 2*bound;
-  if (*outPtr < -bound) *outPtr += 2*bound;
-  inPtr++;
-  outPtr++;
-  for (j = 1; j < ( (4 < num_points) ? 4 : num_points); j++) {
-    *outPtr = *(inPtr) - *(inPtr-1);
-    if (*outPtr >  bound) *outPtr -= 2*bound;
-    if (*outPtr < -bound) *outPtr += 2*bound;
-    inPtr++;
-    outPtr++;
-  }
-
-  for (; number < quarterPoints; number++) {
-    // Load data
-    next3old1 = _mm_loadu_ps((float*) (inPtr-1));
-    next4 = _mm_load_ps(inPtr);
-    inPtr += 4;
-    // Subtract and store:
-    next3old1 = _mm_sub_ps(next4, next3old1);
-    // Bound:
-    boundAdjust = _mm_cmpgt_ps(next3old1, upperBound);
-    boundAdjust = _mm_and_ps(boundAdjust, posBoundAdjust);
-    next4 = _mm_cmplt_ps(next3old1, lowerBound);
-    next4 = _mm_and_ps(next4, negBoundAdjust);
-    boundAdjust = _mm_or_ps(next4, boundAdjust);
-    // Make sure we're in the bounding interval:
-    next3old1 = _mm_add_ps(next3old1, boundAdjust);
-    _mm_store_ps(outPtr,next3old1); // Store the results back into the output
-    outPtr += 4;
-  }
-
-  for (number = (4 > (quarterPoints*4) ? 4 : (4 * quarterPoints)); number < 
num_points; number++) {
-    *outPtr = *(inPtr) - *(inPtr-1);
-    if (*outPtr >  bound) *outPtr -= 2*bound;
-    if (*outPtr < -bound) *outPtr += 2*bound;
-    inPtr++;
-    outPtr++;
-  }
-
-  *saveValue = inputVector[num_points-1];
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief performs the FM-detect differentiation on the input vector and stores 
the results in the output vector.
-  \param outputVector The byte-aligned vector where the results will be stored.
-  \param inputVector The byte-aligned input vector containing phase data (must 
be on the interval (-bound,bound] )
-  \param bound The interval that the input phase data is in, which is used to 
modulo the differentiation
-  \param saveValue A pointer to a float which contains the phase value of the 
sample before the first input sample.
-  \param num_points The number of real values in the input vector.
-*/
-static inline void volk_32f_s32f_32f_fm_detect_32f_generic(float* 
outputVector, const float* inputVector, const float bound, float* saveValue, 
unsigned int num_points){
-  if (num_points < 1) {
-    return;
-  }
-  unsigned int number = 0;
-  float* outPtr = outputVector;
-  const float* inPtr = inputVector;
-
-  // Do the first 1 by hand since we're going in from the saveValue:
-  *outPtr = *inPtr - *saveValue;
-  if (*outPtr >  bound) *outPtr -= 2*bound;
-  if (*outPtr < -bound) *outPtr += 2*bound;
-  inPtr++;
-  outPtr++;
-
-  for (number = 1; number < num_points; number++) {
-    *outPtr = *(inPtr) - *(inPtr-1);
-    if (*outPtr >  bound) *outPtr -= 2*bound;
-    if (*outPtr < -bound) *outPtr += 2*bound;
-    inPtr++;
-    outPtr++;
-  }
-
-  *saveValue = inputVector[num_points-1];
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32f_s32f_32f_fm_detect_32f_a_H */
diff --git a/volk/kernels/volk/volk_32f_s32f_calc_spectral_noise_floor_32f.h 
b/volk/kernels/volk/volk_32f_s32f_calc_spectral_noise_floor_32f.h
deleted file mode 100644
index 9b9f5d3..0000000
--- a/volk/kernels/volk/volk_32f_s32f_calc_spectral_noise_floor_32f.h
+++ /dev/null
@@ -1,190 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a_H
-#define INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a_H
-
-#include <volk/volk_common.h>
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Calculates the spectral noise floor of an input power spectrum
-
-  Calculates the spectral noise floor of an input power spectrum by 
determining the mean of the input power spectrum, then recalculating the mean 
excluding any power spectrum values that exceed the mean by the 
spectralExclusionValue (in dB).  Provides a rough estimation of the signal 
noise floor.
-
-  \param realDataPoints The input power spectrum
-  \param num_points The number of data points in the input power spectrum 
vector
-  \param spectralExclusionValue The number of dB above the noise floor that a 
data point must be to be excluded from the noise floor calculation - default 
value is 20
-  \param noiseFloorAmplitude The noise floor of the input spectrum, in dB
-*/
-static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_a_sse(float* 
noiseFloorAmplitude, const float* realDataPoints, const float 
spectralExclusionValue, const unsigned int num_points){
-  unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;
-
-  const float* dataPointsPtr = realDataPoints;
-  __VOLK_ATTR_ALIGNED(16) float avgPointsVector[4];
-
-  __m128 dataPointsVal;
-  __m128 avgPointsVal = _mm_setzero_ps();
-  // Calculate the sum (for mean) for all points
-  for(; number < quarterPoints; number++){
-
-    dataPointsVal = _mm_load_ps(dataPointsPtr);
-
-    dataPointsPtr += 4;
-
-    avgPointsVal = _mm_add_ps(avgPointsVal, dataPointsVal);
-  }
-
-  _mm_store_ps(avgPointsVector, avgPointsVal);
-
-  float sumMean = 0.0;
-  sumMean += avgPointsVector[0];
-  sumMean += avgPointsVector[1];
-  sumMean += avgPointsVector[2];
-  sumMean += avgPointsVector[3];
-
-  number = quarterPoints * 4;
-  for(;number < num_points; number++){
-    sumMean += realDataPoints[number];
-  }
-
-  // calculate the spectral mean
-  // +20 because for the comparison below we only want to throw out bins
-  // that are significantly higher (and would, thus, affect the mean more
-  const float meanAmplitude = (sumMean / ((float)num_points)) + 
spectralExclusionValue;
-
-  dataPointsPtr = realDataPoints; // Reset the dataPointsPtr
-  __m128 vMeanAmplitudeVector = _mm_set_ps1(meanAmplitude);
-  __m128 vOnesVector = _mm_set_ps1(1.0);
-  __m128 vValidBinCount = _mm_setzero_ps();
-  avgPointsVal = _mm_setzero_ps();
-  __m128 compareMask;
-  number = 0;
-  // Calculate the sum (for mean) for any points which do NOT exceed the mean 
amplitude
-  for(; number < quarterPoints; number++){
-
-    dataPointsVal = _mm_load_ps(dataPointsPtr);
-
-    dataPointsPtr += 4;
-
-    // Identify which items do not exceed the mean amplitude
-    compareMask = _mm_cmple_ps(dataPointsVal, vMeanAmplitudeVector);
-
-    // Mask off the items that exceed the mean amplitude and add the avg 
Points that do not exceed the mean amplitude
-    avgPointsVal = _mm_add_ps(avgPointsVal, _mm_and_ps(compareMask, 
dataPointsVal));
-
-    // Count the number of bins which do not exceed the mean amplitude
-    vValidBinCount = _mm_add_ps(vValidBinCount, _mm_and_ps(compareMask, 
vOnesVector));
-  }
-
-  // Calculate the mean from the remaining data points
-  _mm_store_ps(avgPointsVector, avgPointsVal);
-
-  sumMean = 0.0;
-  sumMean += avgPointsVector[0];
-  sumMean += avgPointsVector[1];
-  sumMean += avgPointsVector[2];
-  sumMean += avgPointsVector[3];
-
-  // Calculate the number of valid bins from the remaning count
-  __VOLK_ATTR_ALIGNED(16) float validBinCountVector[4];
-  _mm_store_ps(validBinCountVector, vValidBinCount);
-
-  float validBinCount = 0;
-  validBinCount += validBinCountVector[0];
-  validBinCount += validBinCountVector[1];
-  validBinCount += validBinCountVector[2];
-  validBinCount += validBinCountVector[3];
-
-  number = quarterPoints * 4;
-  for(;number < num_points; number++){
-    if(realDataPoints[number] <= meanAmplitude){
-      sumMean += realDataPoints[number];
-      validBinCount += 1.0;
-    }
-  }
-
-  float localNoiseFloorAmplitude = 0;
-  if(validBinCount > 0.0){
-    localNoiseFloorAmplitude = sumMean / validBinCount;
-  }
-  else{
-    localNoiseFloorAmplitude = meanAmplitude; // For the odd case that all the 
amplitudes are equal...
-  }
-
-  *noiseFloorAmplitude = localNoiseFloorAmplitude;
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Calculates the spectral noise floor of an input power spectrum
-
-  Calculates the spectral noise floor of an input power spectrum by 
determining the mean of the input power spectrum, then recalculating the mean 
excluding any power spectrum values that exceed the mean by the 
spectralExclusionValue (in dB).  Provides a rough estimation of the signal 
noise floor.
-
-  \param realDataPoints The input power spectrum
-  \param num_points The number of data points in the input power spectrum 
vector
-  \param spectralExclusionValue The number of dB above the noise floor that a 
data point must be to be excluded from the noise floor calculation - default 
value is 20
-  \param noiseFloorAmplitude The noise floor of the input spectrum, in dB
-*/
-static inline void volk_32f_s32f_calc_spectral_noise_floor_32f_generic(float* 
noiseFloorAmplitude, const float* realDataPoints, const float 
spectralExclusionValue, const unsigned int num_points){
-  float sumMean = 0.0;
-  unsigned int number;
-  // find the sum (for mean), etc
-  for(number = 0; number < num_points; number++){
-    // sum (for mean)
-    sumMean += realDataPoints[number];
-  }
-
-  // calculate the spectral mean
-  // +20 because for the comparison below we only want to throw out bins
-  // that are significantly higher (and would, thus, affect the mean more)
-  const float meanAmplitude = (sumMean / num_points) + spectralExclusionValue;
-
-  // now throw out any bins higher than the mean
-  sumMean = 0.0;
-  unsigned int newNumDataPoints = num_points;
-  for(number = 0; number < num_points; number++){
-    if (realDataPoints[number] <= meanAmplitude)
-      sumMean += realDataPoints[number];
-    else
-      newNumDataPoints--;
-  }
-
-  float localNoiseFloorAmplitude = 0.0;
-  if (newNumDataPoints == 0)             // in the odd case that all
-    localNoiseFloorAmplitude = meanAmplitude; // amplitudes are equal!
-  else
-    localNoiseFloorAmplitude = sumMean / ((float)newNumDataPoints);
-
-  *noiseFloorAmplitude = localNoiseFloorAmplitude;
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32f_s32f_calc_spectral_noise_floor_32f_a_H */
diff --git a/volk/kernels/volk/volk_32f_s32f_convert_16i.h 
b/volk/kernels/volk/volk_32f_s32f_convert_16i.h
deleted file mode 100644
index c11c13b..0000000
--- a/volk/kernels/volk/volk_32f_s32f_convert_16i.h
+++ /dev/null
@@ -1,438 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_s32f_convert_16i_u_H
-#define INCLUDED_volk_32f_s32f_convert_16i_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <math.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-  /*!
-    \brief Multiplies each point in the input buffer by the scalar value, then 
converts the result into a 16 bit integer value
-    \param inputVector The floating point input data buffer
-    \param outputVector The 16 bit output data buffer
-    \param scalar The value multiplied against each point in the input buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_32f_s32f_convert_16i_u_avx(int16_t* outputVector, 
const float* inputVector, const float scalar, unsigned int num_points){
-  unsigned int number = 0;
-
-  const unsigned int eighthPoints = num_points / 8;
-
-  const float* inputVectorPtr = (const float*)inputVector;
-  int16_t* outputVectorPtr = outputVector;
-
-  float min_val = -32768;
-  float max_val = 32767;
-  float r;
-
-  __m256 vScalar = _mm256_set1_ps(scalar);
-  __m256 inputVal, ret;
-  __m256i intInputVal;
-  __m128i intInputVal1, intInputVal2;
-  __m256 vmin_val = _mm256_set1_ps(min_val);
-  __m256 vmax_val = _mm256_set1_ps(max_val);
-
-  for(;number < eighthPoints; number++){
-    inputVal = _mm256_loadu_ps(inputVectorPtr); inputVectorPtr += 8;
-
-    // Scale and clip
-    ret = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal, vScalar), 
vmax_val), vmin_val);
-
-    intInputVal = _mm256_cvtps_epi32(ret);
-
-    intInputVal1 = _mm256_extractf128_si256(intInputVal, 0);
-    intInputVal2 = _mm256_extractf128_si256(intInputVal, 1);
-
-    intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
-
-    _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
-    outputVectorPtr += 8;
-  }
-
-  number = eighthPoints * 8;
-  for(; number < num_points; number++){
-    r = inputVector[number] * scalar;
-    if(r > max_val)
-      r = max_val;
-    else if(r < min_val)
-      r = min_val;
-    outputVector[number] = (int16_t)rintf(r);
-  }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-  /*!
-    \brief Multiplies each point in the input buffer by the scalar value, then 
converts the result into a 16 bit integer value
-    \param inputVector The floating point input data buffer
-    \param outputVector The 16 bit output data buffer
-    \param scalar The value multiplied against each point in the input buffer
-    \param num_points The number of data values to be converted
-    \note Input buffer does NOT need to be properly aligned
-  */
-static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, 
const float* inputVector, const float scalar, unsigned int num_points){
-  unsigned int number = 0;
-
-  const unsigned int eighthPoints = num_points / 8;
-
-  const float* inputVectorPtr = (const float*)inputVector;
-  int16_t* outputVectorPtr = outputVector;
-
-  float min_val = -32768;
-  float max_val = 32767;
-  float r;
-
-  __m128 vScalar = _mm_set_ps1(scalar);
-  __m128 inputVal1, inputVal2;
-  __m128i intInputVal1, intInputVal2;
-  __m128 ret1, ret2;
-  __m128 vmin_val = _mm_set_ps1(min_val);
-  __m128 vmax_val = _mm_set_ps1(max_val);
-
-  for(;number < eighthPoints; number++){
-    inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
-    inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
-
-    // Scale and clip
-    ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), 
vmin_val);
-    ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), 
vmin_val);
-
-    intInputVal1 = _mm_cvtps_epi32(ret1);
-    intInputVal2 = _mm_cvtps_epi32(ret2);
-
-    intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
-
-    _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
-    outputVectorPtr += 8;
-  }
-
-  number = eighthPoints * 8;
-  for(; number < num_points; number++){
-    r = inputVector[number] * scalar;
-    if(r > max_val)
-      r = max_val;
-    else if(r < min_val)
-      r = min_val;
-    outputVector[number] = (int16_t)rintf(r);
-  }
-}
-#endif /* LV_HAVE_SSE2 */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-  /*!
-    \brief Multiplies each point in the input buffer by the scalar value, then 
converts the result into a 16 bit integer value
-    \param inputVector The floating point input data buffer
-    \param outputVector The 16 bit output data buffer
-    \param scalar The value multiplied against each point in the input buffer
-    \param num_points The number of data values to be converted
-    \note Input buffer does NOT need to be properly aligned
-  */
-static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, 
const float* inputVector, const float scalar, unsigned int num_points){
-  unsigned int number = 0;
-
-  const unsigned int quarterPoints = num_points / 4;
-
-  const float* inputVectorPtr = (const float*)inputVector;
-  int16_t* outputVectorPtr = outputVector;
-
-  float min_val = -32768;
-  float max_val = 32767;
-  float r;
-
-  __m128 vScalar = _mm_set_ps1(scalar);
-  __m128 ret;
-  __m128 vmin_val = _mm_set_ps1(min_val);
-  __m128 vmax_val = _mm_set_ps1(max_val);
-
-  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
-
-  for(;number < quarterPoints; number++){
-    ret = _mm_loadu_ps(inputVectorPtr);
-    inputVectorPtr += 4;
-
-    // Scale and clip
-    ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
-
-    _mm_store_ps(outputFloatBuffer, ret);
-    *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]);
-    *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]);
-    *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]);
-    *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]);
-  }
-
-  number = quarterPoints * 4;
-  for(; number < num_points; number++){
-    r = inputVector[number] * scalar;
-    if(r > max_val)
-      r = max_val;
-    else if(r < min_val)
-      r = min_val;
-    outputVector[number] = (int16_t)rintf(r);
-  }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Multiplies each point in the input buffer by the scalar value, then 
converts the result into a 16 bit integer value
-    \param inputVector The floating point input data buffer
-    \param outputVector The 16 bit output data buffer
-    \param scalar The value multiplied against each point in the input buffer
-    \param num_points The number of data values to be converted
-    \note Input buffer does NOT need to be properly aligned
-  */
-static inline void volk_32f_s32f_convert_16i_generic(int16_t* outputVector, 
const float* inputVector, const float scalar, unsigned int num_points){
-  int16_t* outputVectorPtr = outputVector;
-  const float* inputVectorPtr = inputVector;
-  unsigned int number = 0;
-  float min_val = -32768;
-  float max_val = 32767;
-  float r;
-
-  for(number = 0; number < num_points; number++){
-    r = *inputVectorPtr++  * scalar;
-    if(r > max_val)
-      r = max_val;
-    else if(r < min_val)
-      r = min_val;
-    *outputVectorPtr++ = (int16_t)rintf(r);
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32f_s32f_convert_16i_u_H */
-#ifndef INCLUDED_volk_32f_s32f_convert_16i_a_H
-#define INCLUDED_volk_32f_s32f_convert_16i_a_H
-
-#include <volk/volk_common.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <math.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-  /*!
-    \brief Multiplies each point in the input buffer by the scalar value, then 
converts the result into a 16 bit integer value
-    \param inputVector The floating point input data buffer
-    \param outputVector The 16 bit output data buffer
-    \param scalar The value multiplied against each point in the input buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_32f_s32f_convert_16i_a_avx(int16_t* outputVector, 
const float* inputVector, const float scalar, unsigned int num_points){
-  unsigned int number = 0;
-
-  const unsigned int eighthPoints = num_points / 8;
-
-  const float* inputVectorPtr = (const float*)inputVector;
-  int16_t* outputVectorPtr = outputVector;
-
-  float min_val = -32768;
-  float max_val = 32767;
-  float r;
-
-  __m256 vScalar = _mm256_set1_ps(scalar);
-  __m256 inputVal, ret;
-  __m256i intInputVal;
-  __m128i intInputVal1, intInputVal2;
-  __m256 vmin_val = _mm256_set1_ps(min_val);
-  __m256 vmax_val = _mm256_set1_ps(max_val);
-
-  for(;number < eighthPoints; number++){
-    inputVal = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8;
-
-    // Scale and clip
-    ret = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal, vScalar), 
vmax_val), vmin_val);
-
-    intInputVal = _mm256_cvtps_epi32(ret);
-
-    intInputVal1 = _mm256_extractf128_si256(intInputVal, 0);
-    intInputVal2 = _mm256_extractf128_si256(intInputVal, 1);
-
-    intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
-
-    _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
-    outputVectorPtr += 8;
-  }
-
-  number = eighthPoints * 8;
-  for(; number < num_points; number++){
-    r = inputVector[number] * scalar;
-    if(r > max_val)
-      r = max_val;
-    else if(r < min_val)
-      r = min_val;
-    outputVector[number] = (int16_t)rintf(r);
-  }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-  /*!
-    \brief Multiplies each point in the input buffer by the scalar value, then 
converts the result into a 16 bit integer value
-    \param inputVector The floating point input data buffer
-    \param outputVector The 16 bit output data buffer
-    \param scalar The value multiplied against each point in the input buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_32f_s32f_convert_16i_a_sse2(int16_t* outputVector, 
const float* inputVector, const float scalar, unsigned int num_points){
-  unsigned int number = 0;
-
-  const unsigned int eighthPoints = num_points / 8;
-
-  const float* inputVectorPtr = (const float*)inputVector;
-  int16_t* outputVectorPtr = outputVector;
-
-  float min_val = -32768;
-  float max_val = 32767;
-  float r;
-
-  __m128 vScalar = _mm_set_ps1(scalar);
-  __m128 inputVal1, inputVal2;
-  __m128i intInputVal1, intInputVal2;
-  __m128 ret1, ret2;
-  __m128 vmin_val = _mm_set_ps1(min_val);
-  __m128 vmax_val = _mm_set_ps1(max_val);
-
-  for(;number < eighthPoints; number++){
-    inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
-    inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
-
-    // Scale and clip
-    ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), 
vmin_val);
-    ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), 
vmin_val);
-
-    intInputVal1 = _mm_cvtps_epi32(ret1);
-    intInputVal2 = _mm_cvtps_epi32(ret2);
-
-    intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
-
-    _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
-    outputVectorPtr += 8;
-  }
-
-  number = eighthPoints * 8;
-  for(; number < num_points; number++){
-    r = inputVector[number] * scalar;
-    if(r > max_val)
-      r = max_val;
-    else if(r < min_val)
-      r = min_val;
-    outputVector[number] = (int16_t)rintf(r);
-  }
-}
-#endif /* LV_HAVE_SSE2 */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-  /*!
-    \brief Multiplies each point in the input buffer by the scalar value, then 
converts the result into a 16 bit integer value
-    \param inputVector The floating point input data buffer
-    \param outputVector The 16 bit output data buffer
-    \param scalar The value multiplied against each point in the input buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, 
const float* inputVector, const float scalar, unsigned int num_points){
-  unsigned int number = 0;
-
-  const unsigned int quarterPoints = num_points / 4;
-
-  const float* inputVectorPtr = (const float*)inputVector;
-  int16_t* outputVectorPtr = outputVector;
-
-  float min_val = -32768;
-  float max_val = 32767;
-  float r;
-
-  __m128 vScalar = _mm_set_ps1(scalar);
-  __m128 ret;
-  __m128 vmin_val = _mm_set_ps1(min_val);
-  __m128 vmax_val = _mm_set_ps1(max_val);
-
-  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
-
-  for(;number < quarterPoints; number++){
-    ret = _mm_load_ps(inputVectorPtr);
-    inputVectorPtr += 4;
-
-    // Scale and clip
-    ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
-
-    _mm_store_ps(outputFloatBuffer, ret);
-    *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]);
-    *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]);
-    *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]);
-    *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]);
-  }
-
-  number = quarterPoints * 4;
-  for(; number < num_points; number++){
-    r = inputVector[number] * scalar;
-    if(r > max_val)
-      r = max_val;
-    else if(r < min_val)
-      r = min_val;
-    outputVector[number] = (int16_t)rintf(r);
-  }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Multiplies each point in the input buffer by the scalar value, then 
converts the result into a 16 bit integer value
-    \param inputVector The floating point input data buffer
-    \param outputVector The 16 bit output data buffer
-    \param scalar The value multiplied against each point in the input buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_32f_s32f_convert_16i_a_generic(int16_t* outputVector, 
const float* inputVector, const float scalar, unsigned int num_points){
-  int16_t* outputVectorPtr = outputVector;
-  const float* inputVectorPtr = inputVector;
-  unsigned int number = 0;
-  float min_val = -32768;
-  float max_val = 32767;
-  float r;
-
-  for(number = 0; number < num_points; number++){
-    r  = *inputVectorPtr++ * scalar;
-    if(r < min_val)
-      r = min_val;
-    else if(r > max_val)
-      r = max_val;
-    *outputVectorPtr++ = (int16_t)rintf(r);
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32f_s32f_convert_16i_a_H */
diff --git a/volk/kernels/volk/volk_32f_s32f_convert_32i.h 
b/volk/kernels/volk/volk_32f_s32f_convert_32i.h
deleted file mode 100644
index f8e8045..0000000
--- a/volk/kernels/volk/volk_32f_s32f_convert_32i.h
+++ /dev/null
@@ -1,353 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_s32f_convert_32i_u_H
-#define INCLUDED_volk_32f_s32f_convert_32i_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-  /*!
-    \brief Multiplies each point in the input buffer by the scalar value, then 
converts the result into a 32 bit integer value
-    \param inputVector The floating point input data buffer
-    \param outputVector The 32 bit output data buffer
-    \param scalar The value multiplied against each point in the input buffer
-    \param num_points The number of data values to be converted
-    \note Input buffer does NOT need to be properly aligned
-  */
-static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector, 
const float* inputVector, const float scalar, unsigned int num_points){
-  unsigned int number = 0;
-
-  const unsigned int quarterPoints = num_points / 4;
-
-  const float* inputVectorPtr = (const float*)inputVector;
-  int32_t* outputVectorPtr = outputVector;
-
-  float min_val = -2147483647;
-  float max_val = 2147483647;
-  float r;
-
-  __m128 vScalar = _mm_set_ps1(scalar);
-  __m128 inputVal1;
-  __m128i intInputVal1;
-  __m128 vmin_val = _mm_set_ps1(min_val);
-  __m128 vmax_val = _mm_set_ps1(max_val);
-
-  for(;number < quarterPoints; number++){
-    inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
-
-    inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), 
vmax_val), vmin_val);
-    intInputVal1 = _mm_cvtps_epi32(inputVal1);
-
-    _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
-    outputVectorPtr += 4;
-  }
-
-  number = quarterPoints * 4;
-  for(; number < num_points; number++){
-    r = inputVector[number] * scalar;
-    if(r > max_val)
-      r = max_val;
-    else if(r < min_val)
-      r = min_val;
-    outputVector[number] = (int32_t)(r);
-  }
-}
-#endif /* LV_HAVE_SSE2 */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-  /*!
-    \brief Multiplies each point in the input buffer by the scalar value, then 
converts the result into a 32 bit integer value
-    \param inputVector The floating point input data buffer
-    \param outputVector The 32 bit output data buffer
-    \param scalar The value multiplied against each point in the input buffer
-    \param num_points The number of data values to be converted
-    \note Input buffer does NOT need to be properly aligned
-  */
-static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, 
const float* inputVector, const float scalar, unsigned int num_points){
-  unsigned int number = 0;
-
-  const unsigned int quarterPoints = num_points / 4;
-
-  const float* inputVectorPtr = (const float*)inputVector;
-  int32_t* outputVectorPtr = outputVector;
-
-  float min_val = -2147483647;
-  float max_val = 2147483647;
-  float r;
-
-  __m128 vScalar = _mm_set_ps1(scalar);
-  __m128 ret;
-  __m128 vmin_val = _mm_set_ps1(min_val);
-  __m128 vmax_val = _mm_set_ps1(max_val);
-
-  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
-
-  for(;number < quarterPoints; number++){
-    ret = _mm_loadu_ps(inputVectorPtr);
-    inputVectorPtr += 4;
-
-    ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
-
-    _mm_store_ps(outputFloatBuffer, ret);
-    *outputVectorPtr++ = (int32_t)(outputFloatBuffer[0]);
-    *outputVectorPtr++ = (int32_t)(outputFloatBuffer[1]);
-    *outputVectorPtr++ = (int32_t)(outputFloatBuffer[2]);
-    *outputVectorPtr++ = (int32_t)(outputFloatBuffer[3]);
-  }
-
-  number = quarterPoints * 4;
-  for(; number < num_points; number++){
-    r = inputVector[number] * scalar;
-    if(r > max_val)
-      r = max_val;
-    else if(r < min_val)
-      r = min_val;
-    outputVector[number] = (int32_t)(r);
-  }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Multiplies each point in the input buffer by the scalar value, then 
converts the result into a 32 bit integer value
-    \param inputVector The floating point input data buffer
-    \param outputVector The 32 bit output data buffer
-    \param scalar The value multiplied against each point in the input buffer
-    \param num_points The number of data values to be converted
-    \note Input buffer does NOT need to be properly aligned
-  */
-static inline void volk_32f_s32f_convert_32i_generic(int32_t* outputVector, 
const float* inputVector, const float scalar, unsigned int num_points){
-  int32_t* outputVectorPtr = outputVector;
-  const float* inputVectorPtr = inputVector;
-  unsigned int number = 0;
-  float min_val = -2147483647;
-  float max_val = 2147483647;
-  float r;
-
-  for(number = 0; number < num_points; number++){
-    r = *inputVectorPtr++ * scalar;
-    if(r > max_val)
-      r = max_val;
-    else if(r < min_val)
-      r = min_val;
-    *outputVectorPtr++ = (int32_t)(r);
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32f_s32f_convert_32i_u_H */
-#ifndef INCLUDED_volk_32f_s32f_convert_32i_a_H
-#define INCLUDED_volk_32f_s32f_convert_32i_a_H
-
-#include <volk/volk_common.h>
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-  /*!
-    \brief Multiplies each point in the input buffer by the scalar value, then 
converts the result into a 32 bit integer value
-    \param inputVector The floating point input data buffer
-    \param outputVector The 32 bit output data buffer
-    \param scalar The value multiplied against each point in the input buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector, 
const float* inputVector, const float scalar, unsigned int num_points){
-  unsigned int number = 0;
-
-  const unsigned int eighthPoints = num_points / 8;
-
-  const float* inputVectorPtr = (const float*)inputVector;
-  int32_t* outputVectorPtr = outputVector;
-
-  float min_val = -2147483647;
-  float max_val = 2147483647;
-  float r;
-
-  __m256 vScalar = _mm256_set1_ps(scalar);
-  __m256 inputVal1;
-  __m256i intInputVal1;
-  __m256 vmin_val = _mm256_set1_ps(min_val);
-  __m256 vmax_val = _mm256_set1_ps(max_val);
-
-  for(;number < eighthPoints; number++){
-    inputVal1 = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8;
-
-    inputVal1 = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), 
vmax_val), vmin_val);
-    intInputVal1 = _mm256_cvtps_epi32(inputVal1);
-
-    _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1);
-    outputVectorPtr += 8;
-  }
-
-  number = eighthPoints * 8;
-  for(; number < num_points; number++){
-    r = inputVector[number] * scalar;
-    if(r > max_val)
-      r = max_val;
-    else if(r < min_val)
-      r = min_val;
-    outputVector[number] = (int32_t)(r);
-  }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-  /*!
-    \brief Multiplies each point in the input buffer by the scalar value, then 
converts the result into a 32 bit integer value
-    \param inputVector The floating point input data buffer
-    \param outputVector The 32 bit output data buffer
-    \param scalar The value multiplied against each point in the input buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector, 
const float* inputVector, const float scalar, unsigned int num_points){
-  unsigned int number = 0;
-
-  const unsigned int quarterPoints = num_points / 4;
-
-  const float* inputVectorPtr = (const float*)inputVector;
-  int32_t* outputVectorPtr = outputVector;
-
-  float min_val = -2147483647;
-  float max_val = 2147483647;
-  float r;
-
-  __m128 vScalar = _mm_set_ps1(scalar);
-  __m128 inputVal1;
-  __m128i intInputVal1;
-  __m128 vmin_val = _mm_set_ps1(min_val);
-  __m128 vmax_val = _mm_set_ps1(max_val);
-
-  for(;number < quarterPoints; number++){
-    inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
-
-    inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), 
vmax_val), vmin_val);
-    intInputVal1 = _mm_cvtps_epi32(inputVal1);
-
-    _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
-    outputVectorPtr += 4;
-  }
-
-  number = quarterPoints * 4;
-  for(; number < num_points; number++){
-    r = inputVector[number] * scalar;
-    if(r > max_val)
-      r = max_val;
-    else if(r < min_val)
-      r = min_val;
-    outputVector[number] = (int32_t)(r);
-  }
-}
-#endif /* LV_HAVE_SSE2 */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-  /*!
-    \brief Multiplies each point in the input buffer by the scalar value, then 
converts the result into a 32 bit integer value
-    \param inputVector The floating point input data buffer
-    \param outputVector The 32 bit output data buffer
-    \param scalar The value multiplied against each point in the input buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, 
const float* inputVector, const float scalar, unsigned int num_points){
-  unsigned int number = 0;
-
-  const unsigned int quarterPoints = num_points / 4;
-
-  const float* inputVectorPtr = (const float*)inputVector;
-  int32_t* outputVectorPtr = outputVector;
-
-  float min_val = -2147483647;
-  float max_val = 2147483647;
-  float r;
-
-  __m128 vScalar = _mm_set_ps1(scalar);
-  __m128 ret;
-  __m128 vmin_val = _mm_set_ps1(min_val);
-  __m128 vmax_val = _mm_set_ps1(max_val);
-
-  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
-
-  for(;number < quarterPoints; number++){
-    ret = _mm_load_ps(inputVectorPtr);
-    inputVectorPtr += 4;
-
-    ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
-
-    _mm_store_ps(outputFloatBuffer, ret);
-    *outputVectorPtr++ = (int32_t)(outputFloatBuffer[0]);
-    *outputVectorPtr++ = (int32_t)(outputFloatBuffer[1]);
-    *outputVectorPtr++ = (int32_t)(outputFloatBuffer[2]);
-    *outputVectorPtr++ = (int32_t)(outputFloatBuffer[3]);
-  }
-
-  number = quarterPoints * 4;
-  for(; number < num_points; number++){
-    r = inputVector[number] * scalar;
-    if(r > max_val)
-      r = max_val;
-    else if(r < min_val)
-      r = min_val;
-    outputVector[number] = (int32_t)(r);
-  }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Multiplies each point in the input buffer by the scalar value, then 
converts the result into a 32 bit integer value
-    \param inputVector The floating point input data buffer
-    \param outputVector The 32 bit output data buffer
-    \param scalar The value multiplied against each point in the input buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_32f_s32f_convert_32i_a_generic(int32_t* outputVector, 
const float* inputVector, const float scalar, unsigned int num_points){
-  int32_t* outputVectorPtr = outputVector;
-  const float* inputVectorPtr = inputVector;
-  unsigned int number = 0;
-  float min_val = -2147483647;
-  float max_val = 2147483647;
-  float r;
-
-  for(number = 0; number < num_points; number++){
-    r = *inputVectorPtr++ * scalar;
-    if(r > max_val)
-      r = max_val;
-    else if(r < min_val)
-      r = min_val;
-    *outputVectorPtr++ = (int32_t)(r);
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32f_s32f_convert_32i_a_H */
diff --git a/volk/kernels/volk/volk_32f_s32f_convert_8i.h 
b/volk/kernels/volk/volk_32f_s32f_convert_8i.h
deleted file mode 100644
index 2f0cee9..0000000
--- a/volk/kernels/volk/volk_32f_s32f_convert_8i.h
+++ /dev/null
@@ -1,334 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_s32f_convert_8i_u_H
-#define INCLUDED_volk_32f_s32f_convert_8i_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-  /*!
-    \brief Multiplies each point in the input buffer by the scalar value, then 
converts the result into a 8 bit integer value
-    \param inputVector The floating point input data buffer
-    \param outputVector The 8 bit output data buffer
-    \param scalar The value multiplied against each point in the input buffer
-    \param num_points The number of data values to be converted
-    \note Input buffer does NOT need to be properly aligned
-  */
-static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector, const 
float* inputVector, const float scalar, unsigned int num_points){
-  unsigned int number = 0;
-
-  const unsigned int sixteenthPoints = num_points / 16;
-
-  const float* inputVectorPtr = (const float*)inputVector;
-  int8_t* outputVectorPtr = outputVector;
-
-  float min_val = -128;
-  float max_val = 127;
-  float r;
-
-  __m128 vScalar = _mm_set_ps1(scalar);
-  __m128 inputVal1, inputVal2, inputVal3, inputVal4;
-  __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
-  __m128 vmin_val = _mm_set_ps1(min_val);
-  __m128 vmax_val = _mm_set_ps1(max_val);
-
-  for(;number < sixteenthPoints; number++){
-    inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
-    inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
-    inputVal3 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
-    inputVal4 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4;
-
-    inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), 
vmax_val), vmin_val);
-    inputVal2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), 
vmax_val), vmin_val);
-    inputVal3 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), 
vmax_val), vmin_val);
-    inputVal4 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), 
vmax_val), vmin_val);
-
-    intInputVal1 = _mm_cvtps_epi32(inputVal1);
-    intInputVal2 = _mm_cvtps_epi32(inputVal2);
-    intInputVal3 = _mm_cvtps_epi32(inputVal3);
-    intInputVal4 = _mm_cvtps_epi32(inputVal4);
-
-    intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
-    intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
-
-    intInputVal1 = _mm_packs_epi16(intInputVal1, intInputVal3);
-
-    _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
-    outputVectorPtr += 16;
-  }
-
-  number = sixteenthPoints * 16;
-  for(; number < num_points; number++){
-    r = inputVector[number] * scalar;
-    if(r > max_val)
-      r = max_val;
-    else if(r < min_val)
-      r = min_val;
-    outputVector[number] = (int16_t)(r);
-  }
-}
-#endif /* LV_HAVE_SSE2 */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-  /*!
-    \brief Multiplies each point in the input buffer by the scalar value, then 
converts the result into a 8 bit integer value
-    \param inputVector The floating point input data buffer
-    \param outputVector The 8 bit output data buffer
-    \param scalar The value multiplied against each point in the input buffer
-    \param num_points The number of data values to be converted
-    \note Input buffer does NOT need to be properly aligned
-  */
-static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector, const 
float* inputVector, const float scalar, unsigned int num_points){
-  unsigned int number = 0;
-
-  const unsigned int quarterPoints = num_points / 4;
-
-  const float* inputVectorPtr = (const float*)inputVector;
-  int8_t* outputVectorPtr = outputVector;
-
-  float min_val = -128;
-  float max_val = 127;
-  float r;
-
-  __m128 vScalar = _mm_set_ps1(scalar);
-  __m128 ret;
-  __m128 vmin_val = _mm_set_ps1(min_val);
-  __m128 vmax_val = _mm_set_ps1(max_val);
-
-  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
-
-  for(;number < quarterPoints; number++){
-    ret = _mm_loadu_ps(inputVectorPtr);
-    inputVectorPtr += 4;
-
-    ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
-
-    _mm_store_ps(outputFloatBuffer, ret);
-    *outputVectorPtr++ = (int8_t)(outputFloatBuffer[0]);
-    *outputVectorPtr++ = (int8_t)(outputFloatBuffer[1]);
-    *outputVectorPtr++ = (int8_t)(outputFloatBuffer[2]);
-    *outputVectorPtr++ = (int8_t)(outputFloatBuffer[3]);
-  }
-
-  number = quarterPoints * 4;
-  for(; number < num_points; number++){
-    r = inputVector[number] * scalar;
-    if(r > max_val)
-      r = max_val;
-    else if(r < min_val)
-      r = min_val;
-    outputVector[number] = (int16_t)(r);
-  }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Multiplies each point in the input buffer by the scalar value, then 
converts the result into a 8 bit integer value
-    \param inputVector The floating point input data buffer
-    \param outputVector The 8 bit output data buffer
-    \param scalar The value multiplied against each point in the input buffer
-    \param num_points The number of data values to be converted
-    \note Input buffer does NOT need to be properly aligned
-  */
-static inline void volk_32f_s32f_convert_8i_generic(int8_t* outputVector, 
const float* inputVector, const float scalar, unsigned int num_points){
-  int8_t* outputVectorPtr = outputVector;
-  const float* inputVectorPtr = inputVector;
-  unsigned int number = 0;
-  float min_val = -128;
-  float max_val = 127;
-  float r;
-
-  for(number = 0; number < num_points; number++){
-    r = *inputVectorPtr++ * scalar;
-    if(r > max_val)
-      r = max_val;
-    else if(r < min_val)
-      r = min_val;
-    *outputVectorPtr++ = (int16_t)(r);
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32f_s32f_convert_8i_u_H */
-#ifndef INCLUDED_volk_32f_s32f_convert_8i_a_H
-#define INCLUDED_volk_32f_s32f_convert_8i_a_H
-
-#include <volk/volk_common.h>
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-  /*!
-    \brief Multiplies each point in the input buffer by the scalar value, then 
converts the result into a 8 bit integer value
-    \param inputVector The floating point input data buffer
-    \param outputVector The 8 bit output data buffer
-    \param scalar The value multiplied against each point in the input buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector, const 
float* inputVector, const float scalar, unsigned int num_points){
-  unsigned int number = 0;
-
-  const unsigned int sixteenthPoints = num_points / 16;
-
-  const float* inputVectorPtr = (const float*)inputVector;
-  int8_t* outputVectorPtr = outputVector;
-
-  float min_val = -128;
-  float max_val = 127;
-  float r;
-
-  __m128 vScalar = _mm_set_ps1(scalar);
-  __m128 inputVal1, inputVal2, inputVal3, inputVal4;
-  __m128i intInputVal1, intInputVal2, intInputVal3, intInputVal4;
-  __m128 vmin_val = _mm_set_ps1(min_val);
-  __m128 vmax_val = _mm_set_ps1(max_val);
-
-  for(;number < sixteenthPoints; number++){
-    inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
-    inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
-    inputVal3 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
-    inputVal4 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
-
-    inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), 
vmax_val), vmin_val);
-    inputVal2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), 
vmax_val), vmin_val);
-    inputVal3 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), 
vmax_val), vmin_val);
-    inputVal4 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), 
vmax_val), vmin_val);
-
-    intInputVal1 = _mm_cvtps_epi32(inputVal1);
-    intInputVal2 = _mm_cvtps_epi32(inputVal2);
-    intInputVal3 = _mm_cvtps_epi32(inputVal3);
-    intInputVal4 = _mm_cvtps_epi32(inputVal4);
-
-    intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
-    intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
-
-    intInputVal1 = _mm_packs_epi16(intInputVal1, intInputVal3);
-
-    _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
-    outputVectorPtr += 16;
-  }
-
-  number = sixteenthPoints * 16;
-  for(; number < num_points; number++){
-    r = inputVector[number] * scalar;
-    if(r > max_val)
-      r = max_val;
-    else if(r < min_val)
-      r = min_val;
-    outputVector[number] = (int8_t)(r);
-  }
-}
-#endif /* LV_HAVE_SSE2 */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-  /*!
-    \brief Multiplies each point in the input buffer by the scalar value, then 
converts the result into a 8 bit integer value
-    \param inputVector The floating point input data buffer
-    \param outputVector The 8 bit output data buffer
-    \param scalar The value multiplied against each point in the input buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector, const 
float* inputVector, const float scalar, unsigned int num_points){
-  unsigned int number = 0;
-
-  const unsigned int quarterPoints = num_points / 4;
-
-  const float* inputVectorPtr = (const float*)inputVector;
-
-  float min_val = -128;
-  float max_val = 127;
-  float r;
-
-  int8_t* outputVectorPtr = outputVector;
-  __m128 vScalar = _mm_set_ps1(scalar);
-  __m128 ret;
-  __m128 vmin_val = _mm_set_ps1(min_val);
-  __m128 vmax_val = _mm_set_ps1(max_val);
-
-  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
-
-  for(;number < quarterPoints; number++){
-    ret = _mm_load_ps(inputVectorPtr);
-    inputVectorPtr += 4;
-
-    ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
-
-    _mm_store_ps(outputFloatBuffer, ret);
-    *outputVectorPtr++ = (int8_t)(outputFloatBuffer[0]);
-    *outputVectorPtr++ = (int8_t)(outputFloatBuffer[1]);
-    *outputVectorPtr++ = (int8_t)(outputFloatBuffer[2]);
-    *outputVectorPtr++ = (int8_t)(outputFloatBuffer[3]);
-  }
-
-  number = quarterPoints * 4;
-  for(; number < num_points; number++){
-    r = inputVector[number] * scalar;
-    if(r > max_val)
-      r = max_val;
-    else if(r < min_val)
-      r = min_val;
-    outputVector[number] = (int8_t)(r);
-  }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Multiplies each point in the input buffer by the scalar value, then 
converts the result into a 8 bit integer value
-    \param inputVector The floating point input data buffer
-    \param outputVector The 8 bit output data buffer
-    \param scalar The value multiplied against each point in the input buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_32f_s32f_convert_8i_a_generic(int8_t* outputVector, 
const float* inputVector, const float scalar, unsigned int num_points){
-  int8_t* outputVectorPtr = outputVector;
-  const float* inputVectorPtr = inputVector;
-  unsigned int number = 0;
-  float min_val = -128;
-  float max_val = 127;
-  float r;
-
-  for(number = 0; number < num_points; number++){
-    r = *inputVectorPtr++ * scalar;
-    if(r > max_val)
-      r = max_val;
-    else if(r < min_val)
-      r = min_val;
-    *outputVectorPtr++ = (int8_t)(r);
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32f_s32f_convert_8i_a_H */
diff --git a/volk/kernels/volk/volk_32f_s32f_multiply_32f.h 
b/volk/kernels/volk/volk_32f_s32f_multiply_32f.h
deleted file mode 100644
index ee9e2b5..0000000
--- a/volk/kernels/volk/volk_32f_s32f_multiply_32f.h
+++ /dev/null
@@ -1,270 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_s32f_multiply_32f_u_H
-#define INCLUDED_volk_32f_s32f_multiply_32f_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Scalar float multiply
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be multiplied
-  \param scalar the scalar value
-  \param num_points The number of values in aVector and bVector to be 
multiplied together and stored into cVector
-*/
-static inline void volk_32f_s32f_multiply_32f_u_sse(float* cVector, const 
float* aVector, const float scalar, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-
-    __m128 aVal, bVal, cVal;
-    bVal = _mm_set_ps1(scalar);
-    for(;number < quarterPoints; number++){
-
-      aVal = _mm_loadu_ps(aPtr);
-
-      cVal = _mm_mul_ps(aVal, bVal);
-
-      _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
-
-      aPtr += 4;
-      cPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-      *cPtr++ = (*aPtr++) * scalar;
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-/*!
-  \brief Scalar float multiply
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be multiplied
-  \param scalar the scalar value
-  \param num_points The number of values in aVector and bVector to be 
multiplied together and stored into cVector
-*/
-static inline void volk_32f_s32f_multiply_32f_u_avx(float* cVector, const 
float* aVector, const float scalar, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int eighthPoints = num_points / 8;
-
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-
-    __m256 aVal, bVal, cVal;
-    bVal = _mm256_set1_ps(scalar);
-    for(;number < eighthPoints; number++){
-
-      aVal = _mm256_loadu_ps(aPtr);
-
-      cVal = _mm256_mul_ps(aVal, bVal);
-
-      _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C 
container
-
-      aPtr += 8;
-      cPtr += 8;
-    }
-
-    number = eighthPoints * 8;
-    for(;number < num_points; number++){
-      *cPtr++ = (*aPtr++) * scalar;
-    }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Scalar float multiply
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be multiplied
-  \param scalar the scalar value
-  \param num_points The number of values in aVector and bVector to be 
multiplied together and stored into cVector
-*/
-static inline void volk_32f_s32f_multiply_32f_generic(float* cVector, const 
float* aVector, const float scalar, unsigned int num_points){
-  unsigned int number = 0;
-  const float* inputPtr = aVector;
-  float* outputPtr = cVector;
-  for(number = 0; number < num_points; number++){
-    *outputPtr = (*inputPtr) * scalar;
-    inputPtr++;
-    outputPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-#endif /* INCLUDED_volk_32f_s32f_multiply_32f_u_H */
-#ifndef INCLUDED_volk_32f_s32f_multiply_32f_a_H
-#define INCLUDED_volk_32f_s32f_multiply_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Scalar float multiply
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be multiplied
-  \param scalar the scalar value
-  \param num_points The number of values in aVector and bVector to be 
multiplied together and stored into cVector
-*/
-static inline void volk_32f_s32f_multiply_32f_a_sse(float* cVector, const 
float* aVector, const float scalar, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-
-    __m128 aVal, bVal, cVal;
-    bVal = _mm_set_ps1(scalar);
-    for(;number < quarterPoints; number++){
-
-      aVal = _mm_load_ps(aPtr);
-
-      cVal = _mm_mul_ps(aVal, bVal);
-
-      _mm_store_ps(cPtr,cVal); // Store the results back into the C container
-
-      aPtr += 4;
-      cPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-      *cPtr++ = (*aPtr++) * scalar;
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-/*!
-  \brief Scalar float multiply
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be multiplied
-  \param scalar the scalar value
-  \param num_points The number of values in aVector and bVector to be 
multiplied together and stored into cVector
-*/
-static inline void volk_32f_s32f_multiply_32f_a_avx(float* cVector, const 
float* aVector, const float scalar, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int eighthPoints = num_points / 8;
-
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-
-    __m256 aVal, bVal, cVal;
-    bVal = _mm256_set1_ps(scalar);
-    for(;number < eighthPoints; number++){
-
-      aVal = _mm256_load_ps(aPtr);
-
-      cVal = _mm256_mul_ps(aVal, bVal);
-
-      _mm256_store_ps(cPtr,cVal); // Store the results back into the C 
container
-
-      aPtr += 8;
-      cPtr += 8;
-    }
-
-    number = eighthPoints * 8;
-    for(;number < num_points; number++){
-      *cPtr++ = (*aPtr++) * scalar;
-    }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-/*!
-  \brief Scalar float multiply
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be multiplied
-  \param scalar the scalar value
-  \param num_points The number of values in aVector and bVector to be 
multiplied together and stored into cVector
-*/
-static inline void volk_32f_s32f_multiply_32f_u_neon(float* cVector, const 
float* aVector, const float scalar, unsigned int num_points){
-  unsigned int number = 0;
-  const float* inputPtr = aVector;
-  float* outputPtr = cVector;
-  const unsigned int quarterPoints = num_points / 4;
-
-  float32x4_t aVal, cVal;
-
-  for(number = 0; number < quarterPoints; number++){
-    aVal = vld1q_f32(inputPtr); // Load into NEON regs
-    cVal = vmulq_n_f32 (aVal, scalar); // Do the multiply
-    vst1q_f32(outputPtr, cVal); // Store results back to output
-    inputPtr += 4;
-    outputPtr += 4;
-  }
-  for(number = quarterPoints * 4; number < num_points; number++){
-      *outputPtr++ = (*inputPtr++) * scalar;
-  }
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Scalar float multiply
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be multiplied
-  \param scalar the scalar value
-  \param num_points The number of values in aVector and bVector to be 
multiplied together and stored into cVector
-*/
-static inline void volk_32f_s32f_multiply_32f_a_generic(float* cVector, const 
float* aVector, const float scalar, unsigned int num_points){
-  unsigned int number = 0;
-  const float* inputPtr = aVector;
-  float* outputPtr = cVector;
-  for(number = 0; number < num_points; number++){
-    *outputPtr = (*inputPtr) * scalar;
-    inputPtr++;
-    outputPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_ORC
-/*!
-  \brief Scalar float multiply
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be multiplied
-  \param scalar the scalar value
-  \param num_points The number of values in aVector and bVector to be 
multiplied together and stored into cVector
-*/
-extern void volk_32f_s32f_multiply_32f_a_orc_impl(float* dst, const float* 
src, const float scalar, unsigned int num_points);
-static inline void volk_32f_s32f_multiply_32f_u_orc(float* cVector, const 
float* aVector, const float scalar, unsigned int num_points){
-    volk_32f_s32f_multiply_32f_a_orc_impl(cVector, aVector, scalar, 
num_points);
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-#endif /* INCLUDED_volk_32f_s32f_multiply_32f_a_H */
diff --git a/volk/kernels/volk/volk_32f_s32f_normalize.h 
b/volk/kernels/volk/volk_32f_s32f_normalize.h
deleted file mode 100644
index 95a25f5..0000000
--- a/volk/kernels/volk/volk_32f_s32f_normalize.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_s32f_normalize_a_H
-#define INCLUDED_volk_32f_s32f_normalize_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Normalizes all points in the buffer by the scalar value ( divides 
each data point by the scalar value )
-  \param vecBuffer The buffer of values to be vectorized
-  \param num_points The number of values in vecBuffer
-  \param scalar The scale value to be applied to each buffer value
-*/
-static inline void volk_32f_s32f_normalize_a_sse(float* vecBuffer, const float 
scalar, unsigned int num_points){
-  unsigned int number = 0;
-  float* inputPtr = vecBuffer;
-
-  const float invScalar = 1.0 / scalar;
-  __m128 vecScalar = _mm_set_ps1(invScalar);
-
-  __m128 input1;
-
-  const uint64_t quarterPoints = num_points / 4;
-  for(;number < quarterPoints; number++){
-
-    input1 = _mm_load_ps(inputPtr);
-
-    input1 = _mm_mul_ps(input1, vecScalar);
-
-    _mm_store_ps(inputPtr, input1);
-
-    inputPtr += 4;
-  }
-
-  number = quarterPoints*4;
-  for(; number < num_points; number++){
-    *inputPtr *= invScalar;
-    inputPtr++;
-  }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Normalizes the two input vectors and store their results in the third 
vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be normalizeed
-  \param bVector One of the vectors to be normalizeed
-  \param num_points The number of values in aVector and bVector to be 
normalizeed together and stored into cVector
-*/
-static inline void volk_32f_s32f_normalize_generic(float* vecBuffer, const 
float scalar, unsigned int num_points){
-  unsigned int number = 0;
-  float* inputPtr = vecBuffer;
-  const float invScalar = 1.0 / scalar;
-  for(number = 0; number < num_points; number++){
-    *inputPtr *= invScalar;
-    inputPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_ORC
-/*!
-  \brief Normalizes the two input vectors and store their results in the third 
vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be normalizeed
-  \param bVector One of the vectors to be normalizeed
-  \param num_points The number of values in aVector and bVector to be 
normalizeed together and stored into cVector
-*/
-extern void volk_32f_s32f_normalize_a_orc_impl(float* dst, float* src, const 
float scalar, unsigned int num_points);
-static inline void volk_32f_s32f_normalize_u_orc(float* vecBuffer, const float 
scalar, unsigned int num_points){
-    float invscalar = 1.0 / scalar;
-    volk_32f_s32f_normalize_a_orc_impl(vecBuffer, vecBuffer, invscalar, 
num_points);
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32f_s32f_normalize_a_H */
diff --git a/volk/kernels/volk/volk_32f_s32f_power_32f.h 
b/volk/kernels/volk/volk_32f_s32f_power_32f.h
deleted file mode 100644
index b8b4413..0000000
--- a/volk/kernels/volk/volk_32f_s32f_power_32f.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_s32f_power_32f_a_H
-#define INCLUDED_volk_32f_s32f_power_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <math.h>
-
-#ifdef LV_HAVE_SSE4_1
-#include <tmmintrin.h>
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-#include <simdmath.h>
-#endif /* LV_HAVE_LIB_SIMDMATH */
-
-/*!
-  \brief Takes each the input vector value to the specified power and stores 
the results in the return vector
-  \param cVector The vector where the results will be stored
-  \param aVector The vector of values to be taken to a power
-  \param power The power value to be applied to each data point
-  \param num_points The number of values in aVector to be taken to the 
specified power level and stored into cVector
-*/
-static inline void volk_32f_s32f_power_32f_a_sse4_1(float* cVector, const 
float* aVector, const float power, unsigned int num_points){
-  unsigned int number = 0;
-
-  float* cPtr = cVector;
-  const float* aPtr = aVector;
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-  const unsigned int quarterPoints = num_points / 4;
-  __m128 vPower = _mm_set_ps1(power);
-  __m128 zeroValue = _mm_setzero_ps();
-  __m128 signMask;
-  __m128 negatedValues;
-  __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
-  __m128 onesMask = _mm_set_ps1(1);
-
-  __m128 aVal, cVal;
-  for(;number < quarterPoints; number++){
-
-    aVal = _mm_load_ps(aPtr);
-    signMask = _mm_cmplt_ps(aVal, zeroValue);
-    negatedValues = _mm_sub_ps(zeroValue, aVal);
-    aVal = _mm_blendv_ps(aVal, negatedValues, signMask);
-
-    // powf4 doesn't support negative values in the base, so we mask them off 
and then apply the negative after
-    cVal = powf4(aVal, vPower); // Takes each input value to the specified 
power
-
-    cVal = _mm_mul_ps( _mm_blendv_ps(onesMask, negativeOneToPower, signMask), 
cVal);
-
-    _mm_store_ps(cPtr,cVal); // Store the results back into the C container
-
-    aPtr += 4;
-    cPtr += 4;
-  }
-
-  number = quarterPoints * 4;
-#endif /* LV_HAVE_LIB_SIMDMATH */
-
-  for(;number < num_points; number++){
-    *cPtr++ = powf((*aPtr++), power);
-  }
-}
-#endif /* LV_HAVE_SSE4_1 */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-#include <simdmath.h>
-#endif /* LV_HAVE_LIB_SIMDMATH */
-
-/*!
-  \brief Takes each the input vector value to the specified power and stores 
the results in the return vector
-  \param cVector The vector where the results will be stored
-  \param aVector The vector of values to be taken to a power
-  \param power The power value to be applied to each data point
-  \param num_points The number of values in aVector to be taken to the 
specified power level and stored into cVector
-*/
-static inline void volk_32f_s32f_power_32f_a_sse(float* cVector, const float* 
aVector, const float power, unsigned int num_points){
-  unsigned int number = 0;
-
-  float* cPtr = cVector;
-  const float* aPtr = aVector;
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-  const unsigned int quarterPoints = num_points / 4;
-  __m128 vPower = _mm_set_ps1(power);
-  __m128 zeroValue = _mm_setzero_ps();
-  __m128 signMask;
-  __m128 negatedValues;
-  __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
-  __m128 onesMask = _mm_set_ps1(1);
-
-  __m128 aVal, cVal;
-  for(;number < quarterPoints; number++){
-
-    aVal = _mm_load_ps(aPtr);
-    signMask = _mm_cmplt_ps(aVal, zeroValue);
-    negatedValues = _mm_sub_ps(zeroValue, aVal);
-    aVal = _mm_or_ps(_mm_andnot_ps(signMask, aVal), _mm_and_ps(signMask, 
negatedValues) );
-
-    // powf4 doesn't support negative values in the base, so we mask them off 
and then apply the negative after
-    cVal = powf4(aVal, vPower); // Takes each input value to the specified 
power
-
-    cVal = _mm_mul_ps( _mm_or_ps( _mm_andnot_ps(signMask, onesMask), 
_mm_and_ps(signMask, negativeOneToPower) ), cVal);
-
-    _mm_store_ps(cPtr,cVal); // Store the results back into the C container
-
-    aPtr += 4;
-    cPtr += 4;
-  }
-
-  number = quarterPoints * 4;
-#endif /* LV_HAVE_LIB_SIMDMATH */
-
-  for(;number < num_points; number++){
-    *cPtr++ = powf((*aPtr++), power);
-  }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Takes each the input vector value to the specified power and stores 
the results in the return vector
-    \param cVector The vector where the results will be stored
-    \param aVector The vector of values to be taken to a power
-    \param power The power value to be applied to each data point
-    \param num_points The number of values in aVector to be taken to the 
specified power level and stored into cVector
-  */
-static inline void volk_32f_s32f_power_32f_generic(float* cVector, const 
float* aVector, const float power, unsigned int num_points){
-  float* cPtr = cVector;
-  const float* aPtr = aVector;
-  unsigned int number = 0;
-
-  for(number = 0; number < num_points; number++){
-    *cPtr++ = powf((*aPtr++), power);
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32f_s32f_power_32f_a_H */
diff --git a/volk/kernels/volk/volk_32f_s32f_stddev_32f.h 
b/volk/kernels/volk/volk_32f_s32f_stddev_32f.h
deleted file mode 100644
index 590a7d2..0000000
--- a/volk/kernels/volk/volk_32f_s32f_stddev_32f.h
+++ /dev/null
@@ -1,167 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_s32f_stddev_32f_a_H
-#define INCLUDED_volk_32f_s32f_stddev_32f_a_H
-
-#include <volk/volk_common.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <math.h>
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Calculates the standard deviation of the input buffer using the 
supplied mean
-  \param stddev The calculated standard deviation
-  \param inputBuffer The buffer of points to calculate the std deviation for
-  \param mean The mean of the input buffer
-  \param num_points The number of values in input buffer to used in the stddev 
calculation
-*/
-static inline void volk_32f_s32f_stddev_32f_a_sse4_1(float* stddev, const 
float* inputBuffer, const float mean, unsigned int num_points){
-  float returnValue = 0;
-  if(num_points > 0){
-    unsigned int number = 0;
-    const unsigned int sixteenthPoints = num_points / 16;
-
-    const float* aPtr = inputBuffer;
-
-    __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
-
-    __m128 squareAccumulator = _mm_setzero_ps();
-    __m128 aVal1, aVal2, aVal3, aVal4;
-    __m128 cVal1, cVal2, cVal3, cVal4;
-    for(;number < sixteenthPoints; number++) {
-      aVal1 = _mm_load_ps(aPtr); aPtr += 4;
-      cVal1 = _mm_dp_ps(aVal1, aVal1, 0xF1);
-
-      aVal2 = _mm_load_ps(aPtr); aPtr += 4;
-      cVal2 = _mm_dp_ps(aVal2, aVal2, 0xF2);
-
-      aVal3 = _mm_load_ps(aPtr); aPtr += 4;
-      cVal3 = _mm_dp_ps(aVal3, aVal3, 0xF4);
-
-      aVal4 = _mm_load_ps(aPtr); aPtr += 4;
-      cVal4 = _mm_dp_ps(aVal4, aVal4, 0xF8);
-
-      cVal1 = _mm_or_ps(cVal1, cVal2);
-      cVal3 = _mm_or_ps(cVal3, cVal4);
-      cVal1 = _mm_or_ps(cVal1, cVal3);
-
-      squareAccumulator = _mm_add_ps(squareAccumulator, cVal1); // 
squareAccumulator += x^2
-    }
-    _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back 
into the C container
-    returnValue = squareBuffer[0];
-    returnValue += squareBuffer[1];
-    returnValue += squareBuffer[2];
-    returnValue += squareBuffer[3];
-
-    number = sixteenthPoints * 16;
-    for(;number < num_points; number++){
-      returnValue += (*aPtr) * (*aPtr);
-      aPtr++;
-    }
-    returnValue /= num_points;
-    returnValue -= (mean * mean);
-    returnValue = sqrtf(returnValue);
-  }
-  *stddev = returnValue;
-}
-#endif /* LV_HAVE_SSE4_1 */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Calculates the standard deviation of the input buffer using the 
supplied mean
-  \param stddev The calculated standard deviation
-  \param inputBuffer The buffer of points to calculate the std deviation for
-  \param mean The mean of the input buffer
-  \param num_points The number of values in input buffer to used in the stddev 
calculation
-*/
-static inline void volk_32f_s32f_stddev_32f_a_sse(float* stddev, const float* 
inputBuffer, const float mean, unsigned int num_points){
-  float returnValue = 0;
-  if(num_points > 0){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    const float* aPtr = inputBuffer;
-
-    __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
-
-    __m128 squareAccumulator = _mm_setzero_ps();
-    __m128 aVal = _mm_setzero_ps();
-    for(;number < quarterPoints; number++) {
-      aVal = _mm_load_ps(aPtr);                     // aVal = x
-      aVal = _mm_mul_ps(aVal, aVal);                // squareAccumulator += x^2
-      squareAccumulator = _mm_add_ps(squareAccumulator, aVal);
-      aPtr += 4;
-    }
-    _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back 
into the C container
-    returnValue = squareBuffer[0];
-    returnValue += squareBuffer[1];
-    returnValue += squareBuffer[2];
-    returnValue += squareBuffer[3];
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-      returnValue += (*aPtr) * (*aPtr);
-      aPtr++;
-    }
-    returnValue /= num_points;
-    returnValue -= (mean * mean);
-    returnValue = sqrtf(returnValue);
-  }
-  *stddev = returnValue;
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Calculates the standard deviation of the input buffer using the 
supplied mean
-  \param stddev The calculated standard deviation
-  \param inputBuffer The buffer of points to calculate the std deviation for
-  \param mean The mean of the input buffer
-  \param num_points The number of values in input buffer to used in the stddev 
calculation
-*/
-static inline void volk_32f_s32f_stddev_32f_generic(float* stddev, const 
float* inputBuffer, const float mean, unsigned int num_points){
-  float returnValue = 0;
-  if(num_points > 0){
-    const float* aPtr = inputBuffer;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      returnValue += (*aPtr) * (*aPtr);
-      aPtr++;
-    }
-
-    returnValue /= num_points;
-    returnValue -= (mean * mean);
-    returnValue = sqrtf(returnValue);
-  }
-  *stddev = returnValue;
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32f_s32f_stddev_32f_a_H */
diff --git a/volk/kernels/volk/volk_32f_sin_32f.h 
b/volk/kernels/volk/volk_32f_sin_32f.h
deleted file mode 100644
index 48ce60c..0000000
--- a/volk/kernels/volk/volk_32f_sin_32f.h
+++ /dev/null
@@ -1,211 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#include <stdio.h>
-#include <math.h>
-#include <inttypes.h>
-
-#ifndef INCLUDED_volk_32f_sin_32f_a_H
-#define INCLUDED_volk_32f_sin_32f_a_H
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Computes sine of input vector and stores results in output vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which sine is to be computed
-*/
-static inline void volk_32f_sin_32f_a_sse4_1(float* bVector, const float* 
aVector, unsigned int num_points){
-
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-
-    unsigned int number = 0;
-    unsigned int quarterPoints = num_points / 4;
-    unsigned int i = 0;
-
-    __m128 aVal, s, m4pi, pio4A, pio4B, cp1, cp2, cp3, cp4, cp5, ffours, 
ftwos, fones, fzeroes;
-    __m128 sine, cosine, condition1, condition2;
-    __m128i q, r, ones, twos, fours;
-
-    m4pi = _mm_set1_ps(1.273239545);
-    pio4A = _mm_set1_ps(0.78515625);
-    pio4B = _mm_set1_ps(0.241876e-3);
-    ffours = _mm_set1_ps(4.0);
-    ftwos = _mm_set1_ps(2.0);
-    fones = _mm_set1_ps(1.0);
-    fzeroes = _mm_setzero_ps();
-    ones = _mm_set1_epi32(1);
-    twos = _mm_set1_epi32(2);
-    fours = _mm_set1_epi32(4);
-
-    cp1 = _mm_set1_ps(1.0);
-    cp2 = _mm_set1_ps(0.83333333e-1);
-    cp3 = _mm_set1_ps(0.2777778e-2);
-    cp4 = _mm_set1_ps(0.49603e-4);
-    cp5 = _mm_set1_ps(0.551e-6);
-
-    for(;number < quarterPoints; number++){
-        aVal = _mm_load_ps(aPtr);
-        s = _mm_sub_ps(aVal, _mm_and_ps(_mm_mul_ps(aVal, ftwos), 
_mm_cmplt_ps(aVal, fzeroes)));
-        q = _mm_cvtps_epi32(_mm_mul_ps(s, m4pi));
-        r = _mm_add_epi32(q, _mm_and_si128(q, ones));
-
-        s = _mm_sub_ps(s, _mm_mul_ps(_mm_cvtepi32_ps(r), pio4A));
-        s = _mm_sub_ps(s, _mm_mul_ps(_mm_cvtepi32_ps(r), pio4B));
-
-        s = _mm_div_ps(s, _mm_set1_ps(8.0));    // The constant is 2^N, for 3 
times argument reduction
-        s = _mm_mul_ps(s, s);
-        // Evaluate Taylor series
-        s = 
_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_sub_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_sub_ps(_mm_mul_ps(s,
 cp5), cp4), s), cp3), s), cp2), s), cp1), s);
-
-        for(i = 0; i < 3; i++) {
-            s = _mm_mul_ps(s, _mm_sub_ps(ffours, s));
-        }
-        s = _mm_div_ps(s, ftwos);
-
-        sine = _mm_sqrt_ps(_mm_mul_ps(_mm_sub_ps(ftwos, s), s));
-        cosine = _mm_sub_ps(fones, s);
-
-        condition1 = 
_mm_cmpneq_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_add_epi32(q, ones), twos)), 
fzeroes);
-        condition2 = 
_mm_cmpneq_ps(_mm_cmpneq_ps(_mm_cvtepi32_ps(_mm_and_si128(q, fours)), fzeroes), 
_mm_cmplt_ps(aVal, fzeroes));
-        // Need this condition only for cos
-        //condition3 = 
_mm_cmpneq_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_add_epi32(q, twos), fours)), 
fzeroes);
-
-        sine = _mm_add_ps(sine, _mm_and_ps(_mm_sub_ps(cosine, sine), 
condition1));
-        sine = _mm_sub_ps(sine, _mm_and_ps(_mm_mul_ps(sine, 
_mm_set1_ps(2.0f)), condition2));
-        _mm_store_ps(bPtr, sine);
-        aPtr += 4;
-        bPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-        *bPtr++ = sin(*aPtr++);
-    }
-}
-#endif /* LV_HAVE_SSE4_1 for aligned */
-
-#endif /* INCLUDED_volk_32f_sin_32f_a_H */
-
-#ifndef INCLUDED_volk_32f_sin_32f_u_H
-#define INCLUDED_volk_32f_sin_32f_u_H
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Computes sine of input vector and stores results in output vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which sine is to be computed
-*/
-static inline void volk_32f_sin_32f_u_sse4_1(float* bVector, const float* 
aVector, unsigned int num_points){
-
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-
-    unsigned int number = 0;
-    unsigned int quarterPoints = num_points / 4;
-    unsigned int i = 0;
-
-    __m128 aVal, s, m4pi, pio4A, pio4B, cp1, cp2, cp3, cp4, cp5, ffours, 
ftwos, fones, fzeroes;
-    __m128 sine, cosine, condition1, condition2;
-    __m128i q, r, ones, twos, fours;
-
-    m4pi = _mm_set1_ps(1.273239545);
-    pio4A = _mm_set1_ps(0.78515625);
-    pio4B = _mm_set1_ps(0.241876e-3);
-    ffours = _mm_set1_ps(4.0);
-    ftwos = _mm_set1_ps(2.0);
-    fones = _mm_set1_ps(1.0);
-    fzeroes = _mm_setzero_ps();
-    ones = _mm_set1_epi32(1);
-    twos = _mm_set1_epi32(2);
-    fours = _mm_set1_epi32(4);
-
-    cp1 = _mm_set1_ps(1.0);
-    cp2 = _mm_set1_ps(0.83333333e-1);
-    cp3 = _mm_set1_ps(0.2777778e-2);
-    cp4 = _mm_set1_ps(0.49603e-4);
-    cp5 = _mm_set1_ps(0.551e-6);
-
-    for(;number < quarterPoints; number++){
-        aVal = _mm_loadu_ps(aPtr);
-        s = _mm_sub_ps(aVal, _mm_and_ps(_mm_mul_ps(aVal, ftwos), 
_mm_cmplt_ps(aVal, fzeroes)));
-        q = _mm_cvtps_epi32(_mm_mul_ps(s, m4pi));
-        r = _mm_add_epi32(q, _mm_and_si128(q, ones));
-
-        s = _mm_sub_ps(s, _mm_mul_ps(_mm_cvtepi32_ps(r), pio4A));
-        s = _mm_sub_ps(s, _mm_mul_ps(_mm_cvtepi32_ps(r), pio4B));
-
-        s = _mm_div_ps(s, _mm_set1_ps(8.0));    // The constant is 2^N, for 3 
times argument reduction
-        s = _mm_mul_ps(s, s);
-        // Evaluate Taylor series
-        s = 
_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_sub_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_sub_ps(_mm_mul_ps(s,
 cp5), cp4), s), cp3), s), cp2), s), cp1), s);
-
-        for(i = 0; i < 3; i++) {
-            s = _mm_mul_ps(s, _mm_sub_ps(ffours, s));
-        }
-        s = _mm_div_ps(s, ftwos);
-
-        sine = _mm_sqrt_ps(_mm_mul_ps(_mm_sub_ps(ftwos, s), s));
-        cosine = _mm_sub_ps(fones, s);
-
-        condition1 = 
_mm_cmpneq_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_add_epi32(q, ones), twos)), 
fzeroes);
-        condition2 = 
_mm_cmpneq_ps(_mm_cmpneq_ps(_mm_cvtepi32_ps(_mm_and_si128(q, fours)), fzeroes), 
_mm_cmplt_ps(aVal, fzeroes));
-
-        sine = _mm_add_ps(sine, _mm_and_ps(_mm_sub_ps(cosine, sine), 
condition1));
-        sine = _mm_sub_ps(sine, _mm_and_ps(_mm_mul_ps(sine, 
_mm_set1_ps(2.0f)), condition2));
-        _mm_storeu_ps(bPtr, sine);
-        aPtr += 4;
-        bPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-       *bPtr++ = sin(*aPtr++);
-    }
-}
-
-#endif /* LV_HAVE_SSE4_1 for unaligned */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Computes sine of input vector and stores results in output vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which sine is to be computed
-*/
-static inline void volk_32f_sin_32f_generic(float* bVector, const float* 
aVector, unsigned int num_points){
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-        *bPtr++ = sin(*aPtr++);
-    }
-
-}
-#endif /* LV_HAVE_GENERIC */
-
-#endif /* INCLUDED_volk_32f_sin_32f_u_H */
diff --git a/volk/kernels/volk/volk_32f_sqrt_32f.h 
b/volk/kernels/volk/volk_32f_sqrt_32f.h
deleted file mode 100644
index fb70b29..0000000
--- a/volk/kernels/volk/volk_32f_sqrt_32f.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_sqrt_32f_a_H
-#define INCLUDED_volk_32f_sqrt_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <math.h>
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Sqrts the two input vectors and store their results in the third 
vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be sqrted
-  \param num_points The number of values in aVector and bVector to be sqrted 
together and stored into cVector
-*/
-static inline void volk_32f_sqrt_32f_a_sse(float* cVector, const float* 
aVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-
-    __m128 aVal, cVal;
-    for(;number < quarterPoints; number++){
-
-      aVal = _mm_load_ps(aPtr);
-
-      cVal = _mm_sqrt_ps(aVal);
-
-      _mm_store_ps(cPtr,cVal); // Store the results back into the C container
-
-      aPtr += 4;
-      cPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-      *cPtr++ = sqrtf(*aPtr++);
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-
-/*!
-  \brief Sqrts the two input vectors and store their results in the third 
vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be sqrted
-  \param num_points The number of values in aVector and bVector to be sqrted 
together and stored into cVector
-*/
-static inline void volk_32f_sqrt_32f_neon(float* cVector, const float* 
aVector, unsigned int num_points){
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    unsigned int number = 0;
-    unsigned int quarter_points = num_points / 4;
-    float32x4_t in_vec, out_vec;
-
-    for(number = 0; number < quarter_points; number++){
-        in_vec = vld1q_f32(aPtr);
-        // note that armv8 has vsqrt_f32 which will be much better
-        out_vec = vrecpeq_f32(vrsqrteq_f32(in_vec) );
-        vst1q_f32(cPtr, out_vec);
-        aPtr += 4;
-        cPtr += 4;
-    }
-
-    for(number = quarter_points * 4; number < num_points; number++){
-      *cPtr++ = sqrtf(*aPtr++);
-    }
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Sqrts the two input vectors and store their results in the third 
vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be sqrted
-  \param num_points The number of values in aVector and bVector to be sqrted 
together and stored into cVector
-*/
-static inline void volk_32f_sqrt_32f_generic(float* cVector, const float* 
aVector, unsigned int num_points){
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      *cPtr++ = sqrtf(*aPtr++);
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_ORC
-extern void volk_32f_sqrt_32f_a_orc_impl(float *, const float*, unsigned int);
-/*!
-  \brief Sqrts the two input vectors and store their results in the third 
vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be sqrted
-  \param num_points The number of values in aVector and bVector to be sqrted 
together and stored into cVector
-*/
-static inline void volk_32f_sqrt_32f_u_orc(float* cVector, const float* 
aVector, unsigned int num_points){
-    volk_32f_sqrt_32f_a_orc_impl(cVector, aVector, num_points);
-}
-
-#endif /* LV_HAVE_ORC */
-
-
-
-#endif /* INCLUDED_volk_32f_sqrt_32f_a_H */
diff --git a/volk/kernels/volk/volk_32f_stddev_and_mean_32f_x2.h 
b/volk/kernels/volk/volk_32f_stddev_and_mean_32f_x2.h
deleted file mode 100644
index 9d5f709..0000000
--- a/volk/kernels/volk/volk_32f_stddev_and_mean_32f_x2.h
+++ /dev/null
@@ -1,192 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H
-#define INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H
-
-#include <volk/volk_common.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <math.h>
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Calculates the standard deviation and mean of the input buffer
-  \param stddev The calculated standard deviation
-  \param mean The mean of the input buffer
-  \param inputBuffer The buffer of points to calculate the std deviation for
-  \param num_points The number of values in input buffer to used in the stddev 
and mean calculations
-*/
-static inline void volk_32f_stddev_and_mean_32f_x2_a_sse4_1(float* stddev, 
float* mean, const float* inputBuffer, unsigned int num_points){
-  float returnValue = 0;
-  float newMean = 0;
-  if(num_points > 0){
-    unsigned int number = 0;
-    const unsigned int sixteenthPoints = num_points / 16;
-
-    const float* aPtr = inputBuffer;
-    __VOLK_ATTR_ALIGNED(16) float meanBuffer[4];
-    __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
-
-    __m128 accumulator = _mm_setzero_ps();
-    __m128 squareAccumulator = _mm_setzero_ps();
-    __m128 aVal1, aVal2, aVal3, aVal4;
-    __m128 cVal1, cVal2, cVal3, cVal4;
-    for(;number < sixteenthPoints; number++) {
-      aVal1 = _mm_load_ps(aPtr); aPtr += 4;
-      cVal1 = _mm_dp_ps(aVal1, aVal1, 0xF1);
-      accumulator = _mm_add_ps(accumulator, aVal1);  // accumulator += x
-
-      aVal2 = _mm_load_ps(aPtr); aPtr += 4;
-      cVal2 = _mm_dp_ps(aVal2, aVal2, 0xF2);
-      accumulator = _mm_add_ps(accumulator, aVal2);  // accumulator += x
-
-      aVal3 = _mm_load_ps(aPtr); aPtr += 4;
-      cVal3 = _mm_dp_ps(aVal3, aVal3, 0xF4);
-      accumulator = _mm_add_ps(accumulator, aVal3);  // accumulator += x
-
-      aVal4 = _mm_load_ps(aPtr); aPtr += 4;
-      cVal4 = _mm_dp_ps(aVal4, aVal4, 0xF8);
-      accumulator = _mm_add_ps(accumulator, aVal4);  // accumulator += x
-
-      cVal1 = _mm_or_ps(cVal1, cVal2);
-      cVal3 = _mm_or_ps(cVal3, cVal4);
-      cVal1 = _mm_or_ps(cVal1, cVal3);
-
-      squareAccumulator = _mm_add_ps(squareAccumulator, cVal1); // 
squareAccumulator += x^2
-    }
-    _mm_store_ps(meanBuffer,accumulator); // Store the results back into the C 
container
-    _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back 
into the C container
-    newMean = meanBuffer[0];
-    newMean += meanBuffer[1];
-    newMean += meanBuffer[2];
-    newMean += meanBuffer[3];
-    returnValue = squareBuffer[0];
-    returnValue += squareBuffer[1];
-    returnValue += squareBuffer[2];
-    returnValue += squareBuffer[3];
-
-    number = sixteenthPoints * 16;
-    for(;number < num_points; number++){
-      returnValue += (*aPtr) * (*aPtr);
-      newMean += *aPtr++;
-    }
-    newMean /= num_points;
-    returnValue /= num_points;
-    returnValue -= (newMean * newMean);
-    returnValue = sqrtf(returnValue);
-  }
-  *stddev = returnValue;
-  *mean = newMean;
-}
-#endif /* LV_HAVE_SSE4_1 */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Calculates the standard deviation and mean of the input buffer
-  \param stddev The calculated standard deviation
-  \param mean The mean of the input buffer
-  \param inputBuffer The buffer of points to calculate the std deviation for
-  \param num_points The number of values in input buffer to used in the stddev 
and mean calculations
-*/
-static inline void volk_32f_stddev_and_mean_32f_x2_a_sse(float* stddev, float* 
mean, const float* inputBuffer, unsigned int num_points){
-  float returnValue = 0;
-  float newMean = 0;
-  if(num_points > 0){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    const float* aPtr = inputBuffer;
-    __VOLK_ATTR_ALIGNED(16) float meanBuffer[4];
-    __VOLK_ATTR_ALIGNED(16) float squareBuffer[4];
-
-    __m128 accumulator = _mm_setzero_ps();
-    __m128 squareAccumulator = _mm_setzero_ps();
-    __m128 aVal = _mm_setzero_ps();
-    for(;number < quarterPoints; number++) {
-      aVal = _mm_load_ps(aPtr);                     // aVal = x
-      accumulator = _mm_add_ps(accumulator, aVal);  // accumulator += x
-      aVal = _mm_mul_ps(aVal, aVal);                // squareAccumulator += x^2
-      squareAccumulator = _mm_add_ps(squareAccumulator, aVal);
-      aPtr += 4;
-    }
-    _mm_store_ps(meanBuffer,accumulator); // Store the results back into the C 
container
-    _mm_store_ps(squareBuffer,squareAccumulator); // Store the results back 
into the C container
-    newMean = meanBuffer[0];
-    newMean += meanBuffer[1];
-    newMean += meanBuffer[2];
-    newMean += meanBuffer[3];
-    returnValue = squareBuffer[0];
-    returnValue += squareBuffer[1];
-    returnValue += squareBuffer[2];
-    returnValue += squareBuffer[3];
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-      returnValue += (*aPtr) * (*aPtr);
-      newMean += *aPtr++;
-    }
-    newMean /= num_points;
-    returnValue /= num_points;
-    returnValue -= (newMean * newMean);
-    returnValue = sqrtf(returnValue);
-  }
-  *stddev = returnValue;
-  *mean = newMean;
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Calculates the standard deviation and mean of the input buffer
-  \param stddev The calculated standard deviation
-  \param mean The mean of the input buffer
-  \param inputBuffer The buffer of points to calculate the std deviation for
-  \param num_points The number of values in input buffer to used in the stddev 
and mean calculations
-*/
-static inline void volk_32f_stddev_and_mean_32f_x2_generic(float* stddev, 
float* mean, const float* inputBuffer, unsigned int num_points){
-  float returnValue = 0;
-  float newMean = 0;
-  if(num_points > 0){
-    const float* aPtr = inputBuffer;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      returnValue += (*aPtr) * (*aPtr);
-      newMean += *aPtr++;
-    }
-    newMean /= num_points;
-    returnValue /= num_points;
-    returnValue -= (newMean * newMean);
-    returnValue = sqrtf(returnValue);
-  }
-  *stddev = returnValue;
-  *mean = newMean;
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32f_stddev_and_mean_32f_x2_a_H */
diff --git a/volk/kernels/volk/volk_32f_tan_32f.h 
b/volk/kernels/volk/volk_32f_tan_32f.h
deleted file mode 100644
index 05e52aa..0000000
--- a/volk/kernels/volk/volk_32f_tan_32f.h
+++ /dev/null
@@ -1,220 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#include <stdio.h>
-#include <math.h>
-#include <inttypes.h>
-
-#ifndef INCLUDED_volk_32f_tan_32f_a_H
-#define INCLUDED_volk_32f_tan_32f_a_H
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Computes tangent of input vector and stores results in output vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which tangent is to be computed
-*/
-static inline void volk_32f_tan_32f_a_sse4_1(float* bVector, const float* 
aVector, unsigned int num_points){
-
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-
-    unsigned int number = 0;
-    unsigned int quarterPoints = num_points / 4;
-    unsigned int i = 0;
-
-    __m128 aVal, s, m4pi, pio4A, pio4B, cp1, cp2, cp3, cp4, cp5, ffours, 
ftwos, fones, fzeroes;
-    __m128 sine, cosine, tangent, condition1, condition2, condition3;
-    __m128i q, r, ones, twos, fours;
-
-    m4pi = _mm_set1_ps(1.273239545);
-    pio4A = _mm_set1_ps(0.78515625);
-    pio4B = _mm_set1_ps(0.241876e-3);
-    ffours = _mm_set1_ps(4.0);
-    ftwos = _mm_set1_ps(2.0);
-    fones = _mm_set1_ps(1.0);
-    fzeroes = _mm_setzero_ps();
-    ones = _mm_set1_epi32(1);
-    twos = _mm_set1_epi32(2);
-    fours = _mm_set1_epi32(4);
-
-    cp1 = _mm_set1_ps(1.0);
-    cp2 = _mm_set1_ps(0.83333333e-1);
-    cp3 = _mm_set1_ps(0.2777778e-2);
-    cp4 = _mm_set1_ps(0.49603e-4);
-    cp5 = _mm_set1_ps(0.551e-6);
-
-    for(;number < quarterPoints; number++){
-        aVal = _mm_load_ps(aPtr);
-        s = _mm_sub_ps(aVal, _mm_and_ps(_mm_mul_ps(aVal, ftwos), 
_mm_cmplt_ps(aVal, fzeroes)));
-        q = _mm_cvtps_epi32(_mm_mul_ps(s, m4pi));
-        r = _mm_add_epi32(q, _mm_and_si128(q, ones));
-
-        s = _mm_sub_ps(s, _mm_mul_ps(_mm_cvtepi32_ps(r), pio4A));
-        s = _mm_sub_ps(s, _mm_mul_ps(_mm_cvtepi32_ps(r), pio4B));
-
-        s = _mm_div_ps(s, _mm_set1_ps(8.0));    // The constant is 2^N, for 3 
times argument reduction
-        s = _mm_mul_ps(s, s);
-        // Evaluate Taylor series
-        s = 
_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_sub_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_sub_ps(_mm_mul_ps(s,
 cp5), cp4), s), cp3), s), cp2), s), cp1), s);
-
-        for(i = 0; i < 3; i++){
-            s = _mm_mul_ps(s, _mm_sub_ps(ffours, s));
-        }
-        s = _mm_div_ps(s, ftwos);
-
-        sine = _mm_sqrt_ps(_mm_mul_ps(_mm_sub_ps(ftwos, s), s));
-        cosine = _mm_sub_ps(fones, s);
-
-        condition1 = 
_mm_cmpneq_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_add_epi32(q, ones), twos)), 
fzeroes);
-        condition2 = 
_mm_cmpneq_ps(_mm_cmpneq_ps(_mm_cvtepi32_ps(_mm_and_si128(q, fours)), fzeroes), 
_mm_cmplt_ps(aVal, fzeroes));
-        condition3 = 
_mm_cmpneq_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_add_epi32(q, twos), fours)), 
fzeroes);
-
-        __m128 temp = cosine;
-        cosine = _mm_add_ps(cosine, _mm_and_ps(_mm_sub_ps(sine, cosine), 
condition1));
-        sine = _mm_add_ps(sine, _mm_and_ps(_mm_sub_ps(temp, sine), 
condition1));
-        sine = _mm_sub_ps(sine, _mm_and_ps(_mm_mul_ps(sine, 
_mm_set1_ps(2.0f)), condition2));
-        cosine = _mm_sub_ps(cosine, _mm_and_ps(_mm_mul_ps(cosine, 
_mm_set1_ps(2.0f)), condition3));
-        tangent = _mm_div_ps(sine, cosine);
-        _mm_store_ps(bPtr, tangent);
-        aPtr += 4;
-        bPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-       *bPtr++ = tan(*aPtr++);
-    }
-}
-
-#endif /* LV_HAVE_SSE4_1 for aligned */
-
-#endif /* INCLUDED_volk_32f_tan_32f_a_H */
-
-#ifndef INCLUDED_volk_32f_tan_32f_u_H
-#define INCLUDED_volk_32f_tan_32f_u_H
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Computes tangent of input vector and stores results in output vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which tangent is to be computed
-*/
-static inline void volk_32f_tan_32f_u_sse4_1(float* bVector, const float* 
aVector, unsigned int num_points){
-
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-
-    unsigned int number = 0;
-    unsigned int quarterPoints = num_points / 4;
-    unsigned int i = 0;
-
-    __m128 aVal, s, m4pi, pio4A, pio4B, cp1, cp2, cp3, cp4, cp5, ffours, 
ftwos, fones, fzeroes;
-    __m128 sine, cosine, tangent, condition1, condition2, condition3;
-    __m128i q, r, ones, twos, fours;
-
-    m4pi = _mm_set1_ps(1.273239545);
-    pio4A = _mm_set1_ps(0.78515625);
-    pio4B = _mm_set1_ps(0.241876e-3);
-    ffours = _mm_set1_ps(4.0);
-    ftwos = _mm_set1_ps(2.0);
-    fones = _mm_set1_ps(1.0);
-    fzeroes = _mm_setzero_ps();
-    ones = _mm_set1_epi32(1);
-    twos = _mm_set1_epi32(2);
-    fours = _mm_set1_epi32(4);
-
-    cp1 = _mm_set1_ps(1.0);
-    cp2 = _mm_set1_ps(0.83333333e-1);
-    cp3 = _mm_set1_ps(0.2777778e-2);
-    cp4 = _mm_set1_ps(0.49603e-4);
-    cp5 = _mm_set1_ps(0.551e-6);
-
-    for(;number < quarterPoints; number++){
-        aVal = _mm_loadu_ps(aPtr);
-        s = _mm_sub_ps(aVal, _mm_and_ps(_mm_mul_ps(aVal, ftwos), 
_mm_cmplt_ps(aVal, fzeroes)));
-        q = _mm_cvtps_epi32(_mm_mul_ps(s, m4pi));
-        r = _mm_add_epi32(q, _mm_and_si128(q, ones));
-
-        s = _mm_sub_ps(s, _mm_mul_ps(_mm_cvtepi32_ps(r), pio4A));
-        s = _mm_sub_ps(s, _mm_mul_ps(_mm_cvtepi32_ps(r), pio4B));
-
-        s = _mm_div_ps(s, _mm_set1_ps(8.0));    // The constant is 2^N, for 3 
times argument reduction
-        s = _mm_mul_ps(s, s);
-        // Evaluate Taylor series
-        s = 
_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_sub_ps(_mm_mul_ps(_mm_add_ps(_mm_mul_ps(_mm_sub_ps(_mm_mul_ps(s,
 cp5), cp4), s), cp3), s), cp2), s), cp1), s);
-
-        for(i = 0; i < 3; i++){
-            s = _mm_mul_ps(s, _mm_sub_ps(ffours, s));
-        }
-        s = _mm_div_ps(s, ftwos);
-
-        sine = _mm_sqrt_ps(_mm_mul_ps(_mm_sub_ps(ftwos, s), s));
-        cosine = _mm_sub_ps(fones, s);
-
-        condition1 = 
_mm_cmpneq_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_add_epi32(q, ones), twos)), 
fzeroes);
-        condition2 = 
_mm_cmpneq_ps(_mm_cmpneq_ps(_mm_cvtepi32_ps(_mm_and_si128(q, fours)), fzeroes), 
_mm_cmplt_ps(aVal, fzeroes));
-        condition3 = 
_mm_cmpneq_ps(_mm_cvtepi32_ps(_mm_and_si128(_mm_add_epi32(q, twos), fours)), 
fzeroes);
-
-        __m128 temp = cosine;
-        cosine = _mm_add_ps(cosine, _mm_and_ps(_mm_sub_ps(sine, cosine), 
condition1));
-        sine = _mm_add_ps(sine, _mm_and_ps(_mm_sub_ps(temp, sine), 
condition1));
-        sine = _mm_sub_ps(sine, _mm_and_ps(_mm_mul_ps(sine, 
_mm_set1_ps(2.0f)), condition2));
-        cosine = _mm_sub_ps(cosine, _mm_and_ps(_mm_mul_ps(cosine, 
_mm_set1_ps(2.0f)), condition3));
-        tangent = _mm_div_ps(sine, cosine);
-        _mm_storeu_ps(bPtr, tangent);
-        aPtr += 4;
-        bPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-        *bPtr++ = tan(*aPtr++);
-    }
-}
-
-#endif /* LV_HAVE_SSE4_1 for unaligned */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Computes tangent of input vector and stores results in output vector
-  \param bVector The vector where results will be stored
-  \param aVector The input vector of floats
-  \param num_points Number of points for which tangent is to be computed
-*/
-static inline void volk_32f_tan_32f_generic(float* bVector, const float* 
aVector, unsigned int num_points){
-    float* bPtr = bVector;
-    const float* aPtr = aVector;
-    unsigned int number = 0;
-
-    for(; number < num_points; number++){
-        *bPtr++ = tan(*aPtr++);
-    }
-
-}
-#endif /* LV_HAVE_GENERIC */
-
-#endif /* INCLUDED_volk_32f_tan_32f_u_H */
diff --git a/volk/kernels/volk/volk_32f_tanh_32f.h 
b/volk/kernels/volk/volk_32f_tanh_32f.h
deleted file mode 100644
index cdb685f..0000000
--- a/volk/kernels/volk/volk_32f_tanh_32f.h
+++ /dev/null
@@ -1,318 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_tanh_32f_a_H
-#define INCLUDED_volk_32f_tanh_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <math.h>
-#include <string.h>
-
-#ifdef LV_HAVE_GENERIC
-/*!
-\brief Calculates tanh(x)
-\param cVector The vector where the results will be stored
-\param aVector Input vector
-\param num_points The number of values to calulate
-*/
-static inline void volk_32f_tanh_32f_generic(float* cVector, const float* 
aVector,
-                                             unsigned int num_points)
-{
-  unsigned int number = 0;
-  float* cPtr = cVector;
-  const float* aPtr = aVector;
-  for(; number < num_points; number++) {
-    *cPtr++ = tanh(*aPtr++);
-  }
-}
-
-#endif /* LV_HAVE_GENERIC */
-
-
-#ifdef LV_HAVE_GENERIC
-/*!
-\brief Calculates tanh(x) using a series approximation, good to within 1e-6 of 
the actual tanh.
-\param cVector The vector where the results will be stored
-\param aVector Input vector
-\param num_points The number of values to calulate
-*/
-static inline void volk_32f_tanh_32f_series(float* cVector, const float* 
aVector,
-                                            unsigned int num_points)
-{
-  unsigned int number = 0;
-  float* cPtr = cVector;
-  const float* aPtr = aVector;
-  for(; number < num_points; number++) {
-    if(*aPtr > 4.97)
-      *cPtr++ = 1;
-    else if(*aPtr <= -4.97)
-      *cPtr++ = -1;
-    else {
-      float x2 = (*aPtr) * (*aPtr);
-      float a = (*aPtr) * (135135.0f + x2 * (17325.0f + x2 * (378.0f + x2)));
-      float b = 135135.0f + x2 * (62370.0f + x2 * (3150.0f + x2 * 28.0f));
-      *cPtr++ = a / b;
-      aPtr++;
-    }
-  }
-}
-
-#endif /* LV_HAVE_GENERIC */
-
-
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-\brief Calculates tanh(x) using a series approximation, good to within 1e-6 of 
the actual tanh.
-\param cVector The vector where the results will be stored
-\param aVector Input vector
-\param num_points The number of values to calulate
-*/
-static inline void volk_32f_tanh_32f_a_sse(float* cVector, const float* 
aVector,
-                                           unsigned int num_points)
-{
-  unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;
-
-  float* cPtr = cVector;
-  const float* aPtr = aVector;
-
-  __m128 aVal, cVal, x2, a, b;
-  __m128 const1, const2, const3, const4, const5, const6;
-  const1 = _mm_set_ps1(135135.0f);
-  const2 = _mm_set_ps1(17325.0f);
-  const3 = _mm_set_ps1(378.0f);
-  const4 = _mm_set_ps1(62370.0f);
-  const5 = _mm_set_ps1(3150.0f);
-  const6 = _mm_set_ps1(28.0f);
-  for(;number < quarterPoints; number++){
-
-    aVal = _mm_load_ps(aPtr);
-    x2 = _mm_mul_ps(aVal, aVal);
-    a  = _mm_mul_ps(aVal, _mm_add_ps(const1, _mm_mul_ps(x2, _mm_add_ps(const2, 
_mm_mul_ps(x2, _mm_add_ps(const3, x2))))));
-    b  = _mm_add_ps(const1, _mm_mul_ps(x2, _mm_add_ps(const4, _mm_mul_ps(x2, 
_mm_add_ps(const5, _mm_mul_ps(x2, const6))))));
-
-    cVal = _mm_div_ps(a, b);
-
-    _mm_store_ps(cPtr, cVal); // Store the results back into the C container
-
-    aPtr += 4;
-    cPtr += 4;
-  }
-
-  number = quarterPoints * 4;
-  for(;number < num_points; number++) {
-    if(*aPtr > 4.97)
-      *cPtr++ = 1;
-    else if(*aPtr <= -4.97)
-      *cPtr++ = -1;
-    else {
-      float x2 = (*aPtr) * (*aPtr);
-      float a = (*aPtr) * (135135.0f + x2 * (17325.0f + x2 * (378.0f + x2)));
-      float b = 135135.0f + x2 * (62370.0f + x2 * (3150.0f + x2 * 28.0f));
-      *cPtr++ = a / b;
-      aPtr++;
-    }
-  }
-}
-#endif /* LV_HAVE_SSE */
-
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-/*!
-\brief Calculates tanh(x) using a series approximation, good to within 1e-6 of 
the actual tanh.
-\param cVector The vector where the results will be stored
-\param aVector Input vector
-\param num_points The number of values to calulate
-*/
-static inline void volk_32f_tanh_32f_a_avx(float* cVector, const float* 
aVector,
-                                           unsigned int num_points)
-{
-  unsigned int number = 0;
-  const unsigned int eighthPoints = num_points / 8;
-
-  float* cPtr = cVector;
-  const float* aPtr = aVector;
-
-  __m256 aVal, cVal, x2, a, b;
-  __m256 const1, const2, const3, const4, const5, const6;
-  const1 = _mm256_set1_ps(135135.0f);
-  const2 = _mm256_set1_ps(17325.0f);
-  const3 = _mm256_set1_ps(378.0f);
-  const4 = _mm256_set1_ps(62370.0f);
-  const5 = _mm256_set1_ps(3150.0f);
-  const6 = _mm256_set1_ps(28.0f);
-  for(;number < eighthPoints; number++){
-
-    aVal = _mm256_load_ps(aPtr);
-    x2 = _mm256_mul_ps(aVal, aVal);
-    a  = _mm256_mul_ps(aVal, _mm256_add_ps(const1, _mm256_mul_ps(x2, 
_mm256_add_ps(const2, _mm256_mul_ps(x2, _mm256_add_ps(const3, x2))))));
-    b  = _mm256_add_ps(const1, _mm256_mul_ps(x2, _mm256_add_ps(const4, 
_mm256_mul_ps(x2, _mm256_add_ps(const5, _mm256_mul_ps(x2, const6))))));
-
-    cVal = _mm256_div_ps(a, b);
-
-    _mm256_store_ps(cPtr, cVal); // Store the results back into the C container
-
-    aPtr += 8;
-    cPtr += 8;
-  }
-
-  number = eighthPoints * 8;
-  for(;number < num_points; number++) {
-    if(*aPtr > 4.97)
-      *cPtr++ = 1;
-    else if(*aPtr <= -4.97)
-      *cPtr++ = -1;
-    else {
-      float x2 = (*aPtr) * (*aPtr);
-      float a = (*aPtr) * (135135.0f + x2 * (17325.0f + x2 * (378.0f + x2)));
-      float b = 135135.0f + x2 * (62370.0f + x2 * (3150.0f + x2 * 28.0f));
-      *cPtr++ = a / b;
-      aPtr++;
-    }
-  }
-}
-#endif /* LV_HAVE_AVX */
-
-
-
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-\brief Calculates tanh(x) using a series approximation, good to within 1e-6 of 
the actual tanh.
-\param cVector The vector where the results will be stored
-\param aVector Input vector
-\param num_points The number of values to calulate
-*/
-static inline void volk_32f_tanh_32f_u_sse(float* cVector, const float* 
aVector,
-                                           unsigned int num_points)
-{
-  unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;
-
-  float* cPtr = cVector;
-  const float* aPtr = aVector;
-
-  __m128 aVal, cVal, x2, a, b;
-  __m128 const1, const2, const3, const4, const5, const6;
-  const1 = _mm_set_ps1(135135.0f);
-  const2 = _mm_set_ps1(17325.0f);
-  const3 = _mm_set_ps1(378.0f);
-  const4 = _mm_set_ps1(62370.0f);
-  const5 = _mm_set_ps1(3150.0f);
-  const6 = _mm_set_ps1(28.0f);
-  for(;number < quarterPoints; number++){
-
-    aVal = _mm_loadu_ps(aPtr);
-    x2 = _mm_mul_ps(aVal, aVal);
-    a  = _mm_mul_ps(aVal, _mm_add_ps(const1, _mm_mul_ps(x2, _mm_add_ps(const2, 
_mm_mul_ps(x2, _mm_add_ps(const3, x2))))));
-    b  = _mm_add_ps(const1, _mm_mul_ps(x2, _mm_add_ps(const4, _mm_mul_ps(x2, 
_mm_add_ps(const5, _mm_mul_ps(x2, const6))))));
-
-    cVal = _mm_div_ps(a, b);
-
-    _mm_storeu_ps(cPtr, cVal); // Store the results back into the C container
-
-    aPtr += 4;
-    cPtr += 4;
-  }
-
-  number = quarterPoints * 4;
-  for(;number < num_points; number++) {
-    if(*aPtr > 4.97)
-      *cPtr++ = 1;
-    else if(*aPtr <= -4.97)
-      *cPtr++ = -1;
-    else {
-      float x2 = (*aPtr) * (*aPtr);
-      float a = (*aPtr) * (135135.0f + x2 * (17325.0f + x2 * (378.0f + x2)));
-      float b = 135135.0f + x2 * (62370.0f + x2 * (3150.0f + x2 * 28.0f));
-      *cPtr++ = a / b;
-      aPtr++;
-    }
-  }
-}
-#endif /* LV_HAVE_SSE */
-
-
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-/*!
-\brief Calculates tanh(x) using a series approximation, good to within 1e-6 of 
the actual tanh.
-\param cVector The vector where the results will be stored
-\param aVector Input vector
-\param num_points The number of values to calulate
-*/
-static inline void volk_32f_tanh_32f_u_avx(float* cVector, const float* 
aVector,
-                                           unsigned int num_points)
-{
-  unsigned int number = 0;
-  const unsigned int eighthPoints = num_points / 8;
-
-  float* cPtr = cVector;
-  const float* aPtr = aVector;
-
-  __m256 aVal, cVal, x2, a, b;
-  __m256 const1, const2, const3, const4, const5, const6;
-  const1 = _mm256_set1_ps(135135.0f);
-  const2 = _mm256_set1_ps(17325.0f);
-  const3 = _mm256_set1_ps(378.0f);
-  const4 = _mm256_set1_ps(62370.0f);
-  const5 = _mm256_set1_ps(3150.0f);
-  const6 = _mm256_set1_ps(28.0f);
-  for(;number < eighthPoints; number++){
-
-    aVal = _mm256_loadu_ps(aPtr);
-    x2 = _mm256_mul_ps(aVal, aVal);
-    a  = _mm256_mul_ps(aVal, _mm256_add_ps(const1, _mm256_mul_ps(x2, 
_mm256_add_ps(const2, _mm256_mul_ps(x2, _mm256_add_ps(const3, x2))))));
-    b  = _mm256_add_ps(const1, _mm256_mul_ps(x2, _mm256_add_ps(const4, 
_mm256_mul_ps(x2, _mm256_add_ps(const5, _mm256_mul_ps(x2, const6))))));
-
-    cVal = _mm256_div_ps(a, b);
-
-    _mm256_storeu_ps(cPtr, cVal); // Store the results back into the C 
container
-
-    aPtr += 8;
-    cPtr += 8;
-  }
-
-  number = eighthPoints * 8;
-  for(;number < num_points; number++) {
-    if(*aPtr > 4.97)
-      *cPtr++ = 1;
-    else if(*aPtr <= -4.97)
-      *cPtr++ = -1;
-    else {
-      float x2 = (*aPtr) * (*aPtr);
-      float a = (*aPtr) * (135135.0f + x2 * (17325.0f + x2 * (378.0f + x2)));
-      float b = 135135.0f + x2 * (62370.0f + x2 * (3150.0f + x2 * 28.0f));
-      *cPtr++ = a / b;
-      aPtr++;
-    }
-  }
-}
-#endif /* LV_HAVE_AVX */
-
-#endif /* INCLUDED_volk_32f_tanh_32f_a_H */
diff --git a/volk/kernels/volk/volk_32f_x2_add_32f.h 
b/volk/kernels/volk/volk_32f_x2_add_32f.h
deleted file mode 100644
index 0c66b27..0000000
--- a/volk/kernels/volk/volk_32f_x2_add_32f.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_x2_add_32f_u_H
-#define INCLUDED_volk_32f_x2_add_32f_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Adds the two input vectors and store their results in the third vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be added
-  \param bVector One of the vectors to be added
-  \param num_points The number of values in aVector and bVector to be added 
together and stored into cVector
-*/
-static inline void volk_32f_x2_add_32f_u_sse(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    const float* bPtr=  bVector;
-
-    __m128 aVal, bVal, cVal;
-    for(;number < quarterPoints; number++){
-
-      aVal = _mm_loadu_ps(aPtr);
-      bVal = _mm_loadu_ps(bPtr);
-
-      cVal = _mm_add_ps(aVal, bVal);
-
-      _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
-
-      aPtr += 4;
-      bPtr += 4;
-      cPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-      *cPtr++ = (*aPtr++) + (*bPtr++);
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Adds the two input vectors and store their results in the third vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be added
-  \param bVector One of the vectors to be added
-  \param num_points The number of values in aVector and bVector to be added 
together and stored into cVector
-*/
-static inline void volk_32f_x2_add_32f_generic(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    const float* bPtr=  bVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      *cPtr++ = (*aPtr++) + (*bPtr++);
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#endif /* INCLUDED_volk_32f_x2_add_32f_u_H */
-#ifndef INCLUDED_volk_32f_x2_add_32f_a_H
-#define INCLUDED_volk_32f_x2_add_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Adds the two input vectors and store their results in the third vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be added
-  \param bVector One of the vectors to be added
-  \param num_points The number of values in aVector and bVector to be added 
together and stored into cVector
-*/
-static inline void volk_32f_x2_add_32f_a_sse(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    const float* bPtr=  bVector;
-
-    __m128 aVal, bVal, cVal;
-    for(;number < quarterPoints; number++){
-
-      aVal = _mm_load_ps(aPtr);
-      bVal = _mm_load_ps(bPtr);
-
-      cVal = _mm_add_ps(aVal, bVal);
-
-      _mm_store_ps(cPtr,cVal); // Store the results back into the C container
-
-      aPtr += 4;
-      bPtr += 4;
-      cPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-      *cPtr++ = (*aPtr++) + (*bPtr++);
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-/*
-  \brief Adds the two input vectors and store their results in the third vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be added
-  \param bVector One of the vectors to be added
-  \param num_points The number of values in aVector and bVector to be added 
together and stored into cVector
-*/
-static inline void volk_32f_x2_add_32f_u_neon(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points) {
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    const float* bPtr=  bVector;
-    float32x4_t aVal, bVal, cVal;
-    for(number=0; number < quarterPoints; number++){
-      // Load in to NEON registers
-      aVal = vld1q_f32(aPtr);
-      bVal = vld1q_f32(bPtr);
-      __builtin_prefetch(aPtr+4);
-      __builtin_prefetch(bPtr+4);
-
-      // vector add
-      cVal = vaddq_f32(aVal, bVal);
-      // Store the results back into the C container
-      vst1q_f32(cPtr,cVal);
-
-      aPtr += 4; // q uses quadwords, 4 floats per vadd
-      bPtr += 4;
-      cPtr += 4;
-    }
-
-    number = quarterPoints * 4; // should be = num_points
-    for(;number < num_points; number++){
-      *cPtr++ = (*aPtr++) + (*bPtr++);
-    }
-
-}
-
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Adds the two input vectors and store their results in the third vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be added
-  \param bVector One of the vectors to be added
-  \param num_points The number of values in aVector and bVector to be added 
together and stored into cVector
-*/
-static inline void volk_32f_x2_add_32f_a_generic(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    const float* bPtr=  bVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      *cPtr++ = (*aPtr++) + (*bPtr++);
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_ORC
-/*!
-  \brief Adds the two input vectors and store their results in the third vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be added
-  \param bVector One of the vectors to be added
-  \param num_points The number of values in aVector and bVector to be added 
together and stored into cVector
-*/
-extern void volk_32f_x2_add_32f_a_orc_impl(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points);
-static inline void volk_32f_x2_add_32f_u_orc(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    volk_32f_x2_add_32f_a_orc_impl(cVector, aVector, bVector, num_points);
-}
-#endif /* LV_HAVE_ORC */
-
-
-#endif /* INCLUDED_volk_32f_x2_add_32f_a_H */
diff --git a/volk/kernels/volk/volk_32f_x2_divide_32f.h 
b/volk/kernels/volk/volk_32f_x2_divide_32f.h
deleted file mode 100644
index bbb5cbc..0000000
--- a/volk/kernels/volk/volk_32f_x2_divide_32f.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_x2_divide_32f_a_H
-#define INCLUDED_volk_32f_x2_divide_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Divides the two input vectors and store their results in the third 
vector
-  \param cVector The vector where the results will be stored
-  \param aVector The vector to be divideed
-  \param bVector The divisor vector
-  \param num_points The number of values in aVector and bVector to be divideed 
together and stored into cVector
-*/
-static inline void volk_32f_x2_divide_32f_a_sse(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    const float* bPtr=  bVector;
-
-    __m128 aVal, bVal, cVal;
-    for(;number < quarterPoints; number++){
-
-      aVal = _mm_load_ps(aPtr);
-      bVal = _mm_load_ps(bPtr);
-
-      cVal = _mm_div_ps(aVal, bVal);
-
-      _mm_store_ps(cPtr,cVal); // Store the results back into the C container
-
-      aPtr += 4;
-      bPtr += 4;
-      cPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-      *cPtr++ = (*aPtr++) / (*bPtr++);
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Divides the two input vectors and store their results in the third 
vector
-  \param cVector The vector where the results will be stored
-  \param aVector The vector to be divideed
-  \param bVector The divisor vector
-  \param num_points The number of values in aVector and bVector to be divideed 
together and stored into cVector
-*/
-static inline void volk_32f_x2_divide_32f_generic(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    const float* bPtr=  bVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      *cPtr++ = (*aPtr++) / (*bPtr++);
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_ORC
-/*!
-  \brief Divides the two input vectors and store their results in the third 
vector
-  \param cVector The vector where the results will be stored
-  \param aVector The vector to be divideed
-  \param bVector The divisor vector
-  \param num_points The number of values in aVector and bVector to be divideed 
together and stored into cVector
-*/
-extern void volk_32f_x2_divide_32f_a_orc_impl(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points);
-static inline void volk_32f_x2_divide_32f_u_orc(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    volk_32f_x2_divide_32f_a_orc_impl(cVector, aVector, bVector, num_points);
-}
-#endif /* LV_HAVE_ORC */
-
-
-
-#endif /* INCLUDED_volk_32f_x2_divide_32f_a_H */
diff --git a/volk/kernels/volk/volk_32f_x2_dot_prod_16i.h 
b/volk/kernels/volk/volk_32f_x2_dot_prod_16i.h
deleted file mode 100644
index 6e38e33..0000000
--- a/volk/kernels/volk/volk_32f_x2_dot_prod_16i.h
+++ /dev/null
@@ -1,187 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_x2_dot_prod_16i_H
-#define INCLUDED_volk_32f_x2_dot_prod_16i_H
-
-#include <volk/volk_common.h>
-#include<stdio.h>
-
-
-#ifdef LV_HAVE_GENERIC
-
-
-static inline void volk_32f_x2_dot_prod_16i_generic(int16_t* result, const 
float* input, const float* taps, unsigned int num_points) {
-
-  float dotProduct = 0;
-  const float* aPtr = input;
-  const float* bPtr=  taps;
-  unsigned int number = 0;
-
-  for(number = 0; number < num_points; number++){
-    dotProduct += ((*aPtr++) * (*bPtr++));
-  }
-
-  *result = (int16_t)dotProduct;
-}
-
-#endif /*LV_HAVE_GENERIC*/
-
-
-#ifdef LV_HAVE_SSE
-
-static inline void volk_32f_x2_dot_prod_16i_a_sse(int16_t* result, const  
float* input, const  float* taps, unsigned int num_points) {
-
-  unsigned int number = 0;
-  const unsigned int sixteenthPoints = num_points / 16;
-
-  float dotProduct = 0;
-  const float* aPtr = input;
-  const float* bPtr = taps;
-
-  __m128 a0Val, a1Val, a2Val, a3Val;
-  __m128 b0Val, b1Val, b2Val, b3Val;
-  __m128 c0Val, c1Val, c2Val, c3Val;
-
-  __m128 dotProdVal0 = _mm_setzero_ps();
-  __m128 dotProdVal1 = _mm_setzero_ps();
-  __m128 dotProdVal2 = _mm_setzero_ps();
-  __m128 dotProdVal3 = _mm_setzero_ps();
-
-  for(;number < sixteenthPoints; number++){
-
-    a0Val = _mm_load_ps(aPtr);
-    a1Val = _mm_load_ps(aPtr+4);
-    a2Val = _mm_load_ps(aPtr+8);
-    a3Val = _mm_load_ps(aPtr+12);
-    b0Val = _mm_load_ps(bPtr);
-    b1Val = _mm_load_ps(bPtr+4);
-    b2Val = _mm_load_ps(bPtr+8);
-    b3Val = _mm_load_ps(bPtr+12);
-
-    c0Val = _mm_mul_ps(a0Val, b0Val);
-    c1Val = _mm_mul_ps(a1Val, b1Val);
-    c2Val = _mm_mul_ps(a2Val, b2Val);
-    c3Val = _mm_mul_ps(a3Val, b3Val);
-
-    dotProdVal0 = _mm_add_ps(c0Val, dotProdVal0);
-    dotProdVal1 = _mm_add_ps(c1Val, dotProdVal1);
-    dotProdVal2 = _mm_add_ps(c2Val, dotProdVal2);
-    dotProdVal3 = _mm_add_ps(c3Val, dotProdVal3);
-
-    aPtr += 16;
-    bPtr += 16;
-  }
-
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal1);
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal2);
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal3);
-
-  __VOLK_ATTR_ALIGNED(16) float dotProductVector[4];
-
-  _mm_store_ps(dotProductVector,dotProdVal0); // Store the results back into 
the dot product vector
-
-  dotProduct = dotProductVector[0];
-  dotProduct += dotProductVector[1];
-  dotProduct += dotProductVector[2];
-  dotProduct += dotProductVector[3];
-
-  number = sixteenthPoints*16;
-  for(;number < num_points; number++){
-    dotProduct += ((*aPtr++) * (*bPtr++));
-  }
-
-  *result = (short)dotProduct;
-}
-
-#endif /*LV_HAVE_SSE*/
-
-
-#ifdef LV_HAVE_SSE
-
-static inline void volk_32f_x2_dot_prod_16i_u_sse(int16_t* result, const  
float* input, const  float* taps, unsigned int num_points) {
-
-  unsigned int number = 0;
-  const unsigned int sixteenthPoints = num_points / 16;
-
-  float dotProduct = 0;
-  const float* aPtr = input;
-  const float* bPtr = taps;
-
-  __m128 a0Val, a1Val, a2Val, a3Val;
-  __m128 b0Val, b1Val, b2Val, b3Val;
-  __m128 c0Val, c1Val, c2Val, c3Val;
-
-  __m128 dotProdVal0 = _mm_setzero_ps();
-  __m128 dotProdVal1 = _mm_setzero_ps();
-  __m128 dotProdVal2 = _mm_setzero_ps();
-  __m128 dotProdVal3 = _mm_setzero_ps();
-
-  for(;number < sixteenthPoints; number++){
-
-    a0Val = _mm_loadu_ps(aPtr);
-    a1Val = _mm_loadu_ps(aPtr+4);
-    a2Val = _mm_loadu_ps(aPtr+8);
-    a3Val = _mm_loadu_ps(aPtr+12);
-    b0Val = _mm_loadu_ps(bPtr);
-    b1Val = _mm_loadu_ps(bPtr+4);
-    b2Val = _mm_loadu_ps(bPtr+8);
-    b3Val = _mm_loadu_ps(bPtr+12);
-
-    c0Val = _mm_mul_ps(a0Val, b0Val);
-    c1Val = _mm_mul_ps(a1Val, b1Val);
-    c2Val = _mm_mul_ps(a2Val, b2Val);
-    c3Val = _mm_mul_ps(a3Val, b3Val);
-
-    dotProdVal0 = _mm_add_ps(c0Val, dotProdVal0);
-    dotProdVal1 = _mm_add_ps(c1Val, dotProdVal1);
-    dotProdVal2 = _mm_add_ps(c2Val, dotProdVal2);
-    dotProdVal3 = _mm_add_ps(c3Val, dotProdVal3);
-
-    aPtr += 16;
-    bPtr += 16;
-  }
-
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal1);
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal2);
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal3);
-
-  __VOLK_ATTR_ALIGNED(16) float dotProductVector[4];
-
-  _mm_store_ps(dotProductVector,dotProdVal0); // Store the results back into 
the dot product vector
-
-  dotProduct = dotProductVector[0];
-  dotProduct += dotProductVector[1];
-  dotProduct += dotProductVector[2];
-  dotProduct += dotProductVector[3];
-
-  number = sixteenthPoints*16;
-  for(;number < num_points; number++){
-    dotProduct += ((*aPtr++) * (*bPtr++));
-  }
-
-  *result = (short)dotProduct;
-}
-
-#endif /*LV_HAVE_SSE*/
-
-#endif /*INCLUDED_volk_32f_x2_dot_prod_16i_H*/
diff --git a/volk/kernels/volk/volk_32f_x2_dot_prod_32f.h 
b/volk/kernels/volk/volk_32f_x2_dot_prod_32f.h
deleted file mode 100644
index a98802b..0000000
--- a/volk/kernels/volk/volk_32f_x2_dot_prod_32f.h
+++ /dev/null
@@ -1,692 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_x2_dot_prod_32f_u_H
-#define INCLUDED_volk_32f_x2_dot_prod_32f_u_H
-
-#include <volk/volk_common.h>
-#include<stdio.h>
-
-
-#ifdef LV_HAVE_GENERIC
-
-
-static inline void volk_32f_x2_dot_prod_32f_generic(float * result, const 
float * input, const float * taps, unsigned int num_points) {
-
-  float dotProduct = 0;
-  const float* aPtr = input;
-  const float* bPtr=  taps;
-  unsigned int number = 0;
-
-  for(number = 0; number < num_points; number++){
-    dotProduct += ((*aPtr++) * (*bPtr++));
-  }
-
-  *result = dotProduct;
-}
-
-#endif /*LV_HAVE_GENERIC*/
-
-
-#ifdef LV_HAVE_SSE
-
-
-static inline void volk_32f_x2_dot_prod_32f_u_sse( float* result, const  
float* input, const  float* taps, unsigned int num_points) {
-
-  unsigned int number = 0;
-  const unsigned int sixteenthPoints = num_points / 16;
-
-  float dotProduct = 0;
-  const float* aPtr = input;
-  const float* bPtr = taps;
-
-  __m128 a0Val, a1Val, a2Val, a3Val;
-  __m128 b0Val, b1Val, b2Val, b3Val;
-  __m128 c0Val, c1Val, c2Val, c3Val;
-
-  __m128 dotProdVal0 = _mm_setzero_ps();
-  __m128 dotProdVal1 = _mm_setzero_ps();
-  __m128 dotProdVal2 = _mm_setzero_ps();
-  __m128 dotProdVal3 = _mm_setzero_ps();
-
-  for(;number < sixteenthPoints; number++){
-
-    a0Val = _mm_loadu_ps(aPtr);
-    a1Val = _mm_loadu_ps(aPtr+4);
-    a2Val = _mm_loadu_ps(aPtr+8);
-    a3Val = _mm_loadu_ps(aPtr+12);
-    b0Val = _mm_loadu_ps(bPtr);
-    b1Val = _mm_loadu_ps(bPtr+4);
-    b2Val = _mm_loadu_ps(bPtr+8);
-    b3Val = _mm_loadu_ps(bPtr+12);
-
-    c0Val = _mm_mul_ps(a0Val, b0Val);
-    c1Val = _mm_mul_ps(a1Val, b1Val);
-    c2Val = _mm_mul_ps(a2Val, b2Val);
-    c3Val = _mm_mul_ps(a3Val, b3Val);
-
-    dotProdVal0 = _mm_add_ps(c0Val, dotProdVal0);
-    dotProdVal1 = _mm_add_ps(c1Val, dotProdVal1);
-    dotProdVal2 = _mm_add_ps(c2Val, dotProdVal2);
-    dotProdVal3 = _mm_add_ps(c3Val, dotProdVal3);
-
-    aPtr += 16;
-    bPtr += 16;
-  }
-
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal1);
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal2);
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal3);
-
-  __VOLK_ATTR_ALIGNED(16) float dotProductVector[4];
-
-  _mm_store_ps(dotProductVector,dotProdVal0); // Store the results back into 
the dot product vector
-
-  dotProduct = dotProductVector[0];
-  dotProduct += dotProductVector[1];
-  dotProduct += dotProductVector[2];
-  dotProduct += dotProductVector[3];
-
-  number = sixteenthPoints*16;
-  for(;number < num_points; number++){
-    dotProduct += ((*aPtr++) * (*bPtr++));
-  }
-
-  *result = dotProduct;
-
-}
-
-#endif /*LV_HAVE_SSE*/
-
-#ifdef LV_HAVE_SSE3
-
-#include <pmmintrin.h>
-
-static inline void volk_32f_x2_dot_prod_32f_u_sse3(float * result, const float 
* input, const float * taps, unsigned int num_points) {
-  unsigned int number = 0;
-  const unsigned int sixteenthPoints = num_points / 16;
-
-  float dotProduct = 0;
-  const float* aPtr = input;
-  const float* bPtr = taps;
-
-  __m128 a0Val, a1Val, a2Val, a3Val;
-  __m128 b0Val, b1Val, b2Val, b3Val;
-  __m128 c0Val, c1Val, c2Val, c3Val;
-
-  __m128 dotProdVal0 = _mm_setzero_ps();
-  __m128 dotProdVal1 = _mm_setzero_ps();
-  __m128 dotProdVal2 = _mm_setzero_ps();
-  __m128 dotProdVal3 = _mm_setzero_ps();
-
-  for(;number < sixteenthPoints; number++){
-
-    a0Val = _mm_loadu_ps(aPtr);
-    a1Val = _mm_loadu_ps(aPtr+4);
-    a2Val = _mm_loadu_ps(aPtr+8);
-    a3Val = _mm_loadu_ps(aPtr+12);
-    b0Val = _mm_loadu_ps(bPtr);
-    b1Val = _mm_loadu_ps(bPtr+4);
-    b2Val = _mm_loadu_ps(bPtr+8);
-    b3Val = _mm_loadu_ps(bPtr+12);
-
-    c0Val = _mm_mul_ps(a0Val, b0Val);
-    c1Val = _mm_mul_ps(a1Val, b1Val);
-    c2Val = _mm_mul_ps(a2Val, b2Val);
-    c3Val = _mm_mul_ps(a3Val, b3Val);
-
-    dotProdVal0 = _mm_add_ps(dotProdVal0, c0Val);
-    dotProdVal1 = _mm_add_ps(dotProdVal1, c1Val);
-    dotProdVal2 = _mm_add_ps(dotProdVal2, c2Val);
-    dotProdVal3 = _mm_add_ps(dotProdVal3, c3Val);
-
-    aPtr += 16;
-    bPtr += 16;
-  }
-
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal1);
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal2);
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal3);
-
-  __VOLK_ATTR_ALIGNED(16) float dotProductVector[4];
-  _mm_store_ps(dotProductVector,dotProdVal0); // Store the results back into 
the dot product vector
-
-  dotProduct = dotProductVector[0];
-  dotProduct += dotProductVector[1];
-  dotProduct += dotProductVector[2];
-  dotProduct += dotProductVector[3];
-
-  number = sixteenthPoints*16;
-  for(;number < num_points; number++){
-    dotProduct += ((*aPtr++) * (*bPtr++));
-  }
-
-  *result = dotProduct;
-}
-
-#endif /*LV_HAVE_SSE3*/
-
-#ifdef LV_HAVE_SSE4_1
-
-#include <smmintrin.h>
-
-static inline void volk_32f_x2_dot_prod_32f_u_sse4_1(float * result, const 
float * input, const float* taps, unsigned int num_points) {
-  unsigned int number = 0;
-  const unsigned int sixteenthPoints = num_points / 16;
-
-  float dotProduct = 0;
-  const float* aPtr = input;
-  const float* bPtr = taps;
-
-  __m128 aVal1, bVal1, cVal1;
-  __m128 aVal2, bVal2, cVal2;
-  __m128 aVal3, bVal3, cVal3;
-  __m128 aVal4, bVal4, cVal4;
-
-  __m128 dotProdVal = _mm_setzero_ps();
-
-  for(;number < sixteenthPoints; number++){
-
-    aVal1 = _mm_loadu_ps(aPtr); aPtr += 4;
-    aVal2 = _mm_loadu_ps(aPtr); aPtr += 4;
-    aVal3 = _mm_loadu_ps(aPtr); aPtr += 4;
-    aVal4 = _mm_loadu_ps(aPtr); aPtr += 4;
-
-    bVal1 = _mm_loadu_ps(bPtr); bPtr += 4;
-    bVal2 = _mm_loadu_ps(bPtr); bPtr += 4;
-    bVal3 = _mm_loadu_ps(bPtr); bPtr += 4;
-    bVal4 = _mm_loadu_ps(bPtr); bPtr += 4;
-
-    cVal1 = _mm_dp_ps(aVal1, bVal1, 0xF1);
-    cVal2 = _mm_dp_ps(aVal2, bVal2, 0xF2);
-    cVal3 = _mm_dp_ps(aVal3, bVal3, 0xF4);
-    cVal4 = _mm_dp_ps(aVal4, bVal4, 0xF8);
-
-    cVal1 = _mm_or_ps(cVal1, cVal2);
-    cVal3 = _mm_or_ps(cVal3, cVal4);
-    cVal1 = _mm_or_ps(cVal1, cVal3);
-
-    dotProdVal = _mm_add_ps(dotProdVal, cVal1);
-  }
-
-  __VOLK_ATTR_ALIGNED(16) float dotProductVector[4];
-  _mm_store_ps(dotProductVector, dotProdVal); // Store the results back into 
the dot product vector
-
-  dotProduct = dotProductVector[0];
-  dotProduct += dotProductVector[1];
-  dotProduct += dotProductVector[2];
-  dotProduct += dotProductVector[3];
-
-  number = sixteenthPoints * 16;
-  for(;number < num_points; number++){
-    dotProduct += ((*aPtr++) * (*bPtr++));
-  }
-
-  *result = dotProduct;
-}
-
-#endif /*LV_HAVE_SSE4_1*/
-
-#ifdef LV_HAVE_AVX
-
-#include <immintrin.h>
-
-static inline void volk_32f_x2_dot_prod_32f_u_avx( float* result, const  
float* input, const  float* taps, unsigned int num_points) {
-
-  unsigned int number = 0;
-  const unsigned int sixteenthPoints = num_points / 16;
-
-  float dotProduct = 0;
-  const float* aPtr = input;
-  const float* bPtr = taps;
-
-  __m256 a0Val, a1Val;
-  __m256 b0Val, b1Val;
-  __m256 c0Val, c1Val;
-
-  __m256 dotProdVal0 = _mm256_setzero_ps();
-  __m256 dotProdVal1 = _mm256_setzero_ps();
-
-  for(;number < sixteenthPoints; number++){
-
-    a0Val = _mm256_loadu_ps(aPtr);
-    a1Val = _mm256_loadu_ps(aPtr+8);
-    b0Val = _mm256_loadu_ps(bPtr);
-    b1Val = _mm256_loadu_ps(bPtr+8);
-
-    c0Val = _mm256_mul_ps(a0Val, b0Val);
-    c1Val = _mm256_mul_ps(a1Val, b1Val);
-
-    dotProdVal0 = _mm256_add_ps(c0Val, dotProdVal0);
-    dotProdVal1 = _mm256_add_ps(c1Val, dotProdVal1);
-
-    aPtr += 16;
-    bPtr += 16;
-  }
-
-  dotProdVal0 = _mm256_add_ps(dotProdVal0, dotProdVal1);
-
-  __VOLK_ATTR_ALIGNED(32) float dotProductVector[8];
-
-  _mm256_storeu_ps(dotProductVector,dotProdVal0); // Store the results back 
into the dot product vector
-
-  dotProduct = dotProductVector[0];
-  dotProduct += dotProductVector[1];
-  dotProduct += dotProductVector[2];
-  dotProduct += dotProductVector[3];
-  dotProduct += dotProductVector[4];
-  dotProduct += dotProductVector[5];
-  dotProduct += dotProductVector[6];
-  dotProduct += dotProductVector[7];
-
-  number = sixteenthPoints*16;
-  for(;number < num_points; number++){
-    dotProduct += ((*aPtr++) * (*bPtr++));
-  }
-
-  *result = dotProduct;
-
-}
-
-#endif /*LV_HAVE_AVX*/
-
-#endif /*INCLUDED_volk_32f_x2_dot_prod_32f_u_H*/
-#ifndef INCLUDED_volk_32f_x2_dot_prod_32f_a_H
-#define INCLUDED_volk_32f_x2_dot_prod_32f_a_H
-
-#include <volk/volk_common.h>
-#include<stdio.h>
-
-
-#ifdef LV_HAVE_GENERIC
-
-
-static inline void volk_32f_x2_dot_prod_32f_a_generic(float * result, const 
float * input, const float * taps, unsigned int num_points) {
-
-  float dotProduct = 0;
-  const float* aPtr = input;
-  const float* bPtr=  taps;
-  unsigned int number = 0;
-
-  for(number = 0; number < num_points; number++){
-    dotProduct += ((*aPtr++) * (*bPtr++));
-  }
-
-  *result = dotProduct;
-}
-
-#endif /*LV_HAVE_GENERIC*/
-
-
-#ifdef LV_HAVE_SSE
-
-
-static inline void volk_32f_x2_dot_prod_32f_a_sse( float* result, const  
float* input, const  float* taps, unsigned int num_points) {
-
-  unsigned int number = 0;
-  const unsigned int sixteenthPoints = num_points / 16;
-
-  float dotProduct = 0;
-  const float* aPtr = input;
-  const float* bPtr = taps;
-
-  __m128 a0Val, a1Val, a2Val, a3Val;
-  __m128 b0Val, b1Val, b2Val, b3Val;
-  __m128 c0Val, c1Val, c2Val, c3Val;
-
-  __m128 dotProdVal0 = _mm_setzero_ps();
-  __m128 dotProdVal1 = _mm_setzero_ps();
-  __m128 dotProdVal2 = _mm_setzero_ps();
-  __m128 dotProdVal3 = _mm_setzero_ps();
-
-  for(;number < sixteenthPoints; number++){
-
-    a0Val = _mm_load_ps(aPtr);
-    a1Val = _mm_load_ps(aPtr+4);
-    a2Val = _mm_load_ps(aPtr+8);
-    a3Val = _mm_load_ps(aPtr+12);
-    b0Val = _mm_load_ps(bPtr);
-    b1Val = _mm_load_ps(bPtr+4);
-    b2Val = _mm_load_ps(bPtr+8);
-    b3Val = _mm_load_ps(bPtr+12);
-
-    c0Val = _mm_mul_ps(a0Val, b0Val);
-    c1Val = _mm_mul_ps(a1Val, b1Val);
-    c2Val = _mm_mul_ps(a2Val, b2Val);
-    c3Val = _mm_mul_ps(a3Val, b3Val);
-
-    dotProdVal0 = _mm_add_ps(c0Val, dotProdVal0);
-    dotProdVal1 = _mm_add_ps(c1Val, dotProdVal1);
-    dotProdVal2 = _mm_add_ps(c2Val, dotProdVal2);
-    dotProdVal3 = _mm_add_ps(c3Val, dotProdVal3);
-
-    aPtr += 16;
-    bPtr += 16;
-  }
-
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal1);
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal2);
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal3);
-
-  __VOLK_ATTR_ALIGNED(16) float dotProductVector[4];
-
-  _mm_store_ps(dotProductVector,dotProdVal0); // Store the results back into 
the dot product vector
-
-  dotProduct = dotProductVector[0];
-  dotProduct += dotProductVector[1];
-  dotProduct += dotProductVector[2];
-  dotProduct += dotProductVector[3];
-
-  number = sixteenthPoints*16;
-  for(;number < num_points; number++){
-    dotProduct += ((*aPtr++) * (*bPtr++));
-  }
-
-  *result = dotProduct;
-
-}
-
-#endif /*LV_HAVE_SSE*/
-
-#ifdef LV_HAVE_SSE3
-
-#include <pmmintrin.h>
-
-static inline void volk_32f_x2_dot_prod_32f_a_sse3(float * result, const float 
* input, const float * taps, unsigned int num_points) {
-  unsigned int number = 0;
-  const unsigned int sixteenthPoints = num_points / 16;
-
-  float dotProduct = 0;
-  const float* aPtr = input;
-  const float* bPtr = taps;
-
-  __m128 a0Val, a1Val, a2Val, a3Val;
-  __m128 b0Val, b1Val, b2Val, b3Val;
-  __m128 c0Val, c1Val, c2Val, c3Val;
-
-  __m128 dotProdVal0 = _mm_setzero_ps();
-  __m128 dotProdVal1 = _mm_setzero_ps();
-  __m128 dotProdVal2 = _mm_setzero_ps();
-  __m128 dotProdVal3 = _mm_setzero_ps();
-
-  for(;number < sixteenthPoints; number++){
-
-    a0Val = _mm_load_ps(aPtr);
-    a1Val = _mm_load_ps(aPtr+4);
-    a2Val = _mm_load_ps(aPtr+8);
-    a3Val = _mm_load_ps(aPtr+12);
-    b0Val = _mm_load_ps(bPtr);
-    b1Val = _mm_load_ps(bPtr+4);
-    b2Val = _mm_load_ps(bPtr+8);
-    b3Val = _mm_load_ps(bPtr+12);
-
-    c0Val = _mm_mul_ps(a0Val, b0Val);
-    c1Val = _mm_mul_ps(a1Val, b1Val);
-    c2Val = _mm_mul_ps(a2Val, b2Val);
-    c3Val = _mm_mul_ps(a3Val, b3Val);
-
-    dotProdVal0 = _mm_add_ps(dotProdVal0, c0Val);
-    dotProdVal1 = _mm_add_ps(dotProdVal1, c1Val);
-    dotProdVal2 = _mm_add_ps(dotProdVal2, c2Val);
-    dotProdVal3 = _mm_add_ps(dotProdVal3, c3Val);
-
-    aPtr += 16;
-    bPtr += 16;
-  }
-
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal1);
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal2);
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal3);
-
-  __VOLK_ATTR_ALIGNED(16) float dotProductVector[4];
-  _mm_store_ps(dotProductVector,dotProdVal0); // Store the results back into 
the dot product vector
-
-  dotProduct = dotProductVector[0];
-  dotProduct += dotProductVector[1];
-  dotProduct += dotProductVector[2];
-  dotProduct += dotProductVector[3];
-
-  number = sixteenthPoints*16;
-  for(;number < num_points; number++){
-    dotProduct += ((*aPtr++) * (*bPtr++));
-  }
-
-  *result = dotProduct;
-}
-
-#endif /*LV_HAVE_SSE3*/
-
-#ifdef LV_HAVE_SSE4_1
-
-#include <smmintrin.h>
-
-static inline void volk_32f_x2_dot_prod_32f_a_sse4_1(float * result, const 
float * input, const float* taps, unsigned int num_points) {
-  unsigned int number = 0;
-  const unsigned int sixteenthPoints = num_points / 16;
-
-  float dotProduct = 0;
-  const float* aPtr = input;
-  const float* bPtr = taps;
-
-  __m128 aVal1, bVal1, cVal1;
-  __m128 aVal2, bVal2, cVal2;
-  __m128 aVal3, bVal3, cVal3;
-  __m128 aVal4, bVal4, cVal4;
-
-  __m128 dotProdVal = _mm_setzero_ps();
-
-  for(;number < sixteenthPoints; number++){
-
-    aVal1 = _mm_load_ps(aPtr); aPtr += 4;
-    aVal2 = _mm_load_ps(aPtr); aPtr += 4;
-    aVal3 = _mm_load_ps(aPtr); aPtr += 4;
-    aVal4 = _mm_load_ps(aPtr); aPtr += 4;
-
-    bVal1 = _mm_load_ps(bPtr); bPtr += 4;
-    bVal2 = _mm_load_ps(bPtr); bPtr += 4;
-    bVal3 = _mm_load_ps(bPtr); bPtr += 4;
-    bVal4 = _mm_load_ps(bPtr); bPtr += 4;
-
-    cVal1 = _mm_dp_ps(aVal1, bVal1, 0xF1);
-    cVal2 = _mm_dp_ps(aVal2, bVal2, 0xF2);
-    cVal3 = _mm_dp_ps(aVal3, bVal3, 0xF4);
-    cVal4 = _mm_dp_ps(aVal4, bVal4, 0xF8);
-
-    cVal1 = _mm_or_ps(cVal1, cVal2);
-    cVal3 = _mm_or_ps(cVal3, cVal4);
-    cVal1 = _mm_or_ps(cVal1, cVal3);
-
-    dotProdVal = _mm_add_ps(dotProdVal, cVal1);
-  }
-
-  __VOLK_ATTR_ALIGNED(16) float dotProductVector[4];
-  _mm_store_ps(dotProductVector, dotProdVal); // Store the results back into 
the dot product vector
-
-  dotProduct = dotProductVector[0];
-  dotProduct += dotProductVector[1];
-  dotProduct += dotProductVector[2];
-  dotProduct += dotProductVector[3];
-
-  number = sixteenthPoints * 16;
-  for(;number < num_points; number++){
-    dotProduct += ((*aPtr++) * (*bPtr++));
-  }
-
-  *result = dotProduct;
-}
-
-#endif /*LV_HAVE_SSE4_1*/
-
-#ifdef LV_HAVE_AVX
-
-#include <immintrin.h>
-
-static inline void volk_32f_x2_dot_prod_32f_a_avx( float* result, const  
float* input, const  float* taps, unsigned int num_points) {
-
-  unsigned int number = 0;
-  const unsigned int sixteenthPoints = num_points / 16;
-
-  float dotProduct = 0;
-  const float* aPtr = input;
-  const float* bPtr = taps;
-
-  __m256 a0Val, a1Val;
-  __m256 b0Val, b1Val;
-  __m256 c0Val, c1Val;
-
-  __m256 dotProdVal0 = _mm256_setzero_ps();
-  __m256 dotProdVal1 = _mm256_setzero_ps();
-
-  for(;number < sixteenthPoints; number++){
-
-    a0Val = _mm256_load_ps(aPtr);
-    a1Val = _mm256_load_ps(aPtr+8);
-    b0Val = _mm256_load_ps(bPtr);
-    b1Val = _mm256_load_ps(bPtr+8);
-
-    c0Val = _mm256_mul_ps(a0Val, b0Val);
-    c1Val = _mm256_mul_ps(a1Val, b1Val);
-
-    dotProdVal0 = _mm256_add_ps(c0Val, dotProdVal0);
-    dotProdVal1 = _mm256_add_ps(c1Val, dotProdVal1);
-
-    aPtr += 16;
-    bPtr += 16;
-  }
-
-  dotProdVal0 = _mm256_add_ps(dotProdVal0, dotProdVal1);
-
-  __VOLK_ATTR_ALIGNED(32) float dotProductVector[8];
-
-  _mm256_store_ps(dotProductVector,dotProdVal0); // Store the results back 
into the dot product vector
-
-  dotProduct = dotProductVector[0];
-  dotProduct += dotProductVector[1];
-  dotProduct += dotProductVector[2];
-  dotProduct += dotProductVector[3];
-  dotProduct += dotProductVector[4];
-  dotProduct += dotProductVector[5];
-  dotProduct += dotProductVector[6];
-  dotProduct += dotProductVector[7];
-
-  number = sixteenthPoints*16;
-  for(;number < num_points; number++){
-    dotProduct += ((*aPtr++) * (*bPtr++));
-  }
-
-  *result = dotProduct;
-
-}
-
-#endif /*LV_HAVE_AVX*/
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-
-static inline void volk_32f_x2_dot_prod_32f_neonopts(float * result, const 
float * input, const float * taps, unsigned int num_points) {
-
-    unsigned int quarter_points = num_points / 16;
-    float dotProduct = 0;
-    const float* aPtr = input;
-    const float* bPtr=  taps;
-    unsigned int number = 0;
-
-    float32x4x4_t a_val, b_val, accumulator0;
-    accumulator0.val[0] = vdupq_n_f32(0);
-    accumulator0.val[1] = vdupq_n_f32(0);
-    accumulator0.val[2] = vdupq_n_f32(0);
-    accumulator0.val[3] = vdupq_n_f32(0);
-    // factor of 4 loop unroll with independent accumulators
-    // uses 12 out of 16 neon q registers
-    for( number = 0; number < quarter_points; ++number) {
-        a_val = vld4q_f32(aPtr);
-        b_val = vld4q_f32(bPtr);
-        accumulator0.val[0] = vmlaq_f32(accumulator0.val[0], a_val.val[0], 
b_val.val[0]);
-        accumulator0.val[1] = vmlaq_f32(accumulator0.val[1], a_val.val[1], 
b_val.val[1]);
-        accumulator0.val[2] = vmlaq_f32(accumulator0.val[2], a_val.val[2], 
b_val.val[2]);
-        accumulator0.val[3] = vmlaq_f32(accumulator0.val[3], a_val.val[3], 
b_val.val[3]);
-        aPtr += 16;
-        bPtr += 16;
-    }
-    accumulator0.val[0] = vaddq_f32(accumulator0.val[0], accumulator0.val[1]);
-    accumulator0.val[2] = vaddq_f32(accumulator0.val[2], accumulator0.val[3]);
-    accumulator0.val[0] = vaddq_f32(accumulator0.val[2], accumulator0.val[0]);
-    __VOLK_ATTR_ALIGNED(32) float accumulator[4];
-    vst1q_f32(accumulator, accumulator0.val[0]);
-    dotProduct = accumulator[0] + accumulator[1] + accumulator[2] + 
accumulator[3];
-
-    for(number = quarter_points*16; number < num_points; number++){
-      dotProduct += ((*aPtr++) * (*bPtr++));
-    }
-
-    *result = dotProduct;
-}
-
-#endif
-
-
-
-
-#ifdef LV_HAVE_NEON
-static inline void volk_32f_x2_dot_prod_32f_neon(float * result, const float * 
input, const float * taps, unsigned int num_points) {
-
-    unsigned int quarter_points = num_points / 8;
-    float dotProduct = 0;
-    const float* aPtr = input;
-    const float* bPtr=  taps;
-    unsigned int number = 0;
-
-    float32x4x2_t a_val, b_val, accumulator_val;
-    accumulator_val.val[0] = vdupq_n_f32(0);
-    accumulator_val.val[1] = vdupq_n_f32(0);
-    // factor of 2 loop unroll with independent accumulators
-    for( number = 0; number < quarter_points; ++number) {
-        a_val = vld2q_f32(aPtr);
-        b_val = vld2q_f32(bPtr);
-        accumulator_val.val[0] = vmlaq_f32(accumulator_val.val[0], 
a_val.val[0], b_val.val[0]);
-        accumulator_val.val[1] = vmlaq_f32(accumulator_val.val[1], 
a_val.val[1], b_val.val[1]);
-        aPtr += 8;
-        bPtr += 8;
-    }
-    accumulator_val.val[0] = vaddq_f32(accumulator_val.val[0], 
accumulator_val.val[1]);
-    __VOLK_ATTR_ALIGNED(32) float accumulator[4];
-    vst1q_f32(accumulator, accumulator_val.val[0]);
-    dotProduct = accumulator[0] + accumulator[1] + accumulator[2] + 
accumulator[3];
-
-    for(number = quarter_points*8; number < num_points; number++){
-      dotProduct += ((*aPtr++) * (*bPtr++));
-    }
-
-    *result = dotProduct;
-}
-
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_NEON
-extern void volk_32f_x2_dot_prod_32f_neonasm(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points);
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_NEON
-extern void volk_32f_x2_dot_prod_32f_neonasm_opts(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points);
-#endif /* LV_HAVE_NEON */
-
-#endif /*INCLUDED_volk_32f_x2_dot_prod_32f_a_H*/
diff --git a/volk/kernels/volk/volk_32f_x2_interleave_32fc.h 
b/volk/kernels/volk/volk_32f_x2_interleave_32fc.h
deleted file mode 100644
index bf7e925..0000000
--- a/volk/kernels/volk/volk_32f_x2_interleave_32fc.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_x2_interleave_32fc_a_H
-#define INCLUDED_volk_32f_x2_interleave_32fc_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Interleaves the I & Q vector data into the complex vector
-  \param iBuffer The I buffer data to be interleaved
-  \param qBuffer The Q buffer data to be interleaved
-  \param complexVector The complex output vector
-  \param num_points The number of complex data values to be interleaved
-*/
-static inline void volk_32f_x2_interleave_32fc_a_sse(lv_32fc_t* complexVector, 
const float* iBuffer, const float* qBuffer, unsigned int num_points){
-  unsigned int number = 0;
-  float* complexVectorPtr = (float*)complexVector;
-  const float* iBufferPtr = iBuffer;
-  const float* qBufferPtr = qBuffer;
-
-  const uint64_t quarterPoints = num_points / 4;
-
-  __m128 iValue, qValue, cplxValue;
-  for(;number < quarterPoints; number++){
-    iValue = _mm_load_ps(iBufferPtr);
-    qValue = _mm_load_ps(qBufferPtr);
-
-    // Interleaves the lower two values in the i and q variables into one 
buffer
-    cplxValue = _mm_unpacklo_ps(iValue, qValue);
-    _mm_store_ps(complexVectorPtr, cplxValue);
-    complexVectorPtr += 4;
-
-    // Interleaves the upper two values in the i and q variables into one 
buffer
-    cplxValue = _mm_unpackhi_ps(iValue, qValue);
-    _mm_store_ps(complexVectorPtr, cplxValue);
-    complexVectorPtr += 4;
-
-    iBufferPtr += 4;
-    qBufferPtr += 4;
-  }
-
-  number = quarterPoints * 4;
-  for(; number < num_points; number++){
-    *complexVectorPtr++ = *iBufferPtr++;
-    *complexVectorPtr++ = *qBufferPtr++;
-  }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-/*!
-  \brief Interleaves the I & Q vector data into the complex vector.
-  \param iBuffer The I buffer data to be interleaved
-  \param qBuffer The Q buffer data to be interleaved
-  \param complexVector The complex output vector
-  \param num_points The number of complex data values to be interleaved
-*/
-static inline void volk_32f_x2_interleave_32fc_neon(lv_32fc_t* complexVector, 
const float* iBuffer, const float* qBuffer, unsigned int num_points){
-    unsigned int quarter_points = num_points / 4;
-    unsigned int number;
-    float* complexVectorPtr = (float*) complexVector;
-
-    float32x4x2_t complex_vec;
-    for(number=0; number < quarter_points; ++number) {
-        complex_vec.val[0] = vld1q_f32(iBuffer);
-        complex_vec.val[1] = vld1q_f32(qBuffer);
-        vst2q_f32(complexVectorPtr, complex_vec);
-        iBuffer += 4;
-        qBuffer += 4;
-        complexVectorPtr += 8;
-    }
-
-    for(number=quarter_points * 4; number < num_points; ++number) {
-        *complexVectorPtr++ = *iBuffer++;
-        *complexVectorPtr++ = *qBuffer++;
-    }
-
-}
-#endif /* LV_HAVE_NEON */
-
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Interleaves the I & Q vector data into the complex vector.
-  \param iBuffer The I buffer data to be interleaved
-  \param qBuffer The Q buffer data to be interleaved
-  \param complexVector The complex output vector
-  \param num_points The number of complex data values to be interleaved
-*/
-static inline void volk_32f_x2_interleave_32fc_generic(lv_32fc_t* 
complexVector, const float* iBuffer, const float* qBuffer, unsigned int 
num_points){
-  float* complexVectorPtr = (float*)complexVector;
-  const float* iBufferPtr = iBuffer;
-  const float* qBufferPtr = qBuffer;
-  unsigned int number;
-
-  for(number = 0; number < num_points; number++){
-    *complexVectorPtr++ = *iBufferPtr++;
-    *complexVectorPtr++ = *qBufferPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32f_x2_interleave_32fc_a_H */
diff --git a/volk/kernels/volk/volk_32f_x2_max_32f.h 
b/volk/kernels/volk/volk_32f_x2_max_32f.h
deleted file mode 100644
index 7e4e2af..0000000
--- a/volk/kernels/volk/volk_32f_x2_max_32f.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_x2_max_32f_a_H
-#define INCLUDED_volk_32f_x2_max_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Selects maximum value from each entry between bVector and aVector and 
store their results in the cVector
-  \param cVector The vector where the results will be stored
-  \param aVector The vector to be checked
-  \param bVector The vector to be checked
-  \param num_points The number of values in aVector and bVector to be checked 
and stored into cVector
-*/
-static inline void volk_32f_x2_max_32f_a_sse(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    const float* bPtr=  bVector;
-
-    __m128 aVal, bVal, cVal;
-    for(;number < quarterPoints; number++){
-
-      aVal = _mm_load_ps(aPtr);
-      bVal = _mm_load_ps(bPtr);
-
-      cVal = _mm_max_ps(aVal, bVal);
-
-      _mm_store_ps(cPtr,cVal); // Store the results back into the C container
-
-      aPtr += 4;
-      bPtr += 4;
-      cPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-      const float a = *aPtr++;
-      const float b = *bPtr++;
-      *cPtr++ = ( a > b ? a : b);
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-/*!
-  \brief Selects maximum value from each entry between bVector and aVector and 
store their results in the cVector
-  \param cVector The vector where the results will be stored
-  \param aVector The vector to be checked
-  \param bVector The vector to be checked
-  \param num_points The number of values in aVector and bVector to be checked 
and stored into cVector
-*/
-static inline void volk_32f_x2_max_32f_neon(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    unsigned int quarter_points = num_points / 4;
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    const float* bPtr=  bVector;
-    unsigned int number = 0;
-
-    float32x4_t a_vec, b_vec, c_vec;
-    for(number = 0; number < quarter_points; number++){
-        a_vec = vld1q_f32(aPtr);
-        b_vec = vld1q_f32(bPtr);
-        c_vec = vmaxq_f32(a_vec, b_vec);
-        vst1q_f32(cPtr, c_vec);
-        aPtr += 4;
-        bPtr += 4;
-        cPtr += 4;
-    }
-
-    for(number = quarter_points*4; number < num_points; number++){
-        const float a = *aPtr++;
-        const float b = *bPtr++;
-        *cPtr++ = ( a > b ? a : b);
-    }
-
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Selects maximum value from each entry between bVector and aVector and 
store their results in the cVector
-  \param cVector The vector where the results will be stored
-  \param aVector The vector to be checked
-  \param bVector The vector to be checked
-  \param num_points The number of values in aVector and bVector to be checked 
and stored into cVector
-*/
-static inline void volk_32f_x2_max_32f_generic(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    const float* bPtr=  bVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      const float a = *aPtr++;
-      const float b = *bPtr++;
-      *cPtr++ = ( a > b ? a : b);
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_ORC
-/*!
-  \brief Selects maximum value from each entry between bVector and aVector and 
store their results in the cVector
-  \param cVector The vector where the results will be stored
-  \param aVector The vector to be checked
-  \param bVector The vector to be checked
-  \param num_points The number of values in aVector and bVector to be checked 
and stored into cVector
-*/
-extern void volk_32f_x2_max_32f_a_orc_impl(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points);
-static inline void volk_32f_x2_max_32f_u_orc(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    volk_32f_x2_max_32f_a_orc_impl(cVector, aVector, bVector, num_points);
-}
-#endif /* LV_HAVE_ORC */
-
-
-#endif /* INCLUDED_volk_32f_x2_max_32f_a_H */
diff --git a/volk/kernels/volk/volk_32f_x2_min_32f.h 
b/volk/kernels/volk/volk_32f_x2_min_32f.h
deleted file mode 100644
index b67c7b8..0000000
--- a/volk/kernels/volk/volk_32f_x2_min_32f.h
+++ /dev/null
@@ -1,145 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_x2_min_32f_a_H
-#define INCLUDED_volk_32f_x2_min_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Selects minimum value from each entry between bVector and aVector and 
store their results in the cVector
-  \param cVector The vector where the results will be stored
-  \param aVector The vector to be checked
-  \param bVector The vector to be checked
-  \param num_points The number of values in aVector and bVector to be checked 
and stored into cVector
-*/
-static inline void volk_32f_x2_min_32f_a_sse(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    const float* bPtr=  bVector;
-
-    __m128 aVal, bVal, cVal;
-    for(;number < quarterPoints; number++){
-
-      aVal = _mm_load_ps(aPtr);
-      bVal = _mm_load_ps(bPtr);
-
-      cVal = _mm_min_ps(aVal, bVal);
-
-      _mm_store_ps(cPtr,cVal); // Store the results back into the C container
-
-      aPtr += 4;
-      bPtr += 4;
-      cPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-      const float a = *aPtr++;
-      const float b = *bPtr++;
-      *cPtr++ = ( a < b ? a : b);
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-
-/*!
-  \brief Selects minimum value from each entry between bVector and aVector and 
store their results in the cVector
-  \param cVector The vector where the results will be stored
-  \param aVector The vector to be checked
-  \param bVector The vector to be checked
-  \param num_points The number of values in aVector and bVector to be checked 
and stored into cVector
-*/
-static inline void volk_32f_x2_min_32f_neon(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    const float* bPtr=  bVector;
-    unsigned int number = 0;
-    unsigned int quarter_points = num_points / 4;
-
-    float32x4_t a_vec, b_vec, c_vec;
-    for(number = 0; number < quarter_points; number++){
-        a_vec = vld1q_f32(aPtr);
-        b_vec = vld1q_f32(bPtr);
-
-        c_vec = vminq_f32(a_vec, b_vec);
-
-        vst1q_f32(cPtr, c_vec);
-        aPtr += 4;
-        bPtr += 4;
-        cPtr += 4;
-    }
-
-    for(number = quarter_points*4; number < num_points; number++){
-      const float a = *aPtr++;
-      const float b = *bPtr++;
-      *cPtr++ = ( a < b ? a : b);
-    }
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Selects minimum value from each entry between bVector and aVector and 
store their results in the cVector
-  \param cVector The vector where the results will be stored
-  \param aVector The vector to be checked
-  \param bVector The vector to be checked
-  \param num_points The number of values in aVector and bVector to be checked 
and stored into cVector
-*/
-static inline void volk_32f_x2_min_32f_generic(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    const float* bPtr=  bVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      const float a = *aPtr++;
-      const float b = *bPtr++;
-      *cPtr++ = ( a < b ? a : b);
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_ORC
-/*!
-  \brief Selects minimum value from each entry between bVector and aVector and 
store their results in the cVector
-  \param cVector The vector where the results will be stored
-  \param aVector The vector to be checked
-  \param bVector The vector to be checked
-  \param num_points The number of values in aVector and bVector to be checked 
and stored into cVector
-*/
-extern void volk_32f_x2_min_32f_a_orc_impl(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points);
-static inline void volk_32f_x2_min_32f_u_orc(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    volk_32f_x2_min_32f_a_orc_impl(cVector, aVector, bVector, num_points);
-}
-#endif /* LV_HAVE_ORC */
-
-
-#endif /* INCLUDED_volk_32f_x2_min_32f_a_H */
diff --git a/volk/kernels/volk/volk_32f_x2_multiply_32f.h 
b/volk/kernels/volk/volk_32f_x2_multiply_32f.h
deleted file mode 100644
index b054215..0000000
--- a/volk/kernels/volk/volk_32f_x2_multiply_32f.h
+++ /dev/null
@@ -1,277 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_x2_multiply_32f_u_H
-#define INCLUDED_volk_32f_x2_multiply_32f_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Multiplys the two input vectors and store their results in the third 
vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be multiplied
-  \param bVector One of the vectors to be multiplied
-  \param num_points The number of values in aVector and bVector to be 
multiplied together and stored into cVector
-*/
-static inline void volk_32f_x2_multiply_32f_u_sse(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    const float* bPtr=  bVector;
-
-    __m128 aVal, bVal, cVal;
-    for(;number < quarterPoints; number++){
-
-      aVal = _mm_loadu_ps(aPtr);
-      bVal = _mm_loadu_ps(bPtr);
-
-      cVal = _mm_mul_ps(aVal, bVal);
-
-      _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
-
-      aPtr += 4;
-      bPtr += 4;
-      cPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-      *cPtr++ = (*aPtr++) * (*bPtr++);
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-/*!
-  \brief Multiplies the two input vectors and store their results in the third 
vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be multiplied
-  \param bVector One of the vectors to be multiplied
-  \param num_points The number of values in aVector and bVector to be 
multiplied together and stored into cVector
-*/
-static inline void volk_32f_x2_multiply_32f_u_avx(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int eighthPoints = num_points / 8;
-
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    const float* bPtr=  bVector;
-
-    __m256 aVal, bVal, cVal;
-    for(;number < eighthPoints; number++){
-
-      aVal = _mm256_loadu_ps(aPtr);
-      bVal = _mm256_loadu_ps(bPtr);
-
-      cVal = _mm256_mul_ps(aVal, bVal);
-
-      _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C 
container
-
-      aPtr += 8;
-      bPtr += 8;
-      cPtr += 8;
-    }
-
-    number = eighthPoints * 8;
-    for(;number < num_points; number++){
-      *cPtr++ = (*aPtr++) * (*bPtr++);
-    }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Multiplys the two input vectors and store their results in the third 
vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be multiplied
-  \param bVector One of the vectors to be multiplied
-  \param num_points The number of values in aVector and bVector to be 
multiplied together and stored into cVector
-*/
-static inline void volk_32f_x2_multiply_32f_generic(float* cVector, const 
float* aVector, const float* bVector, unsigned int num_points){
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    const float* bPtr=  bVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      *cPtr++ = (*aPtr++) * (*bPtr++);
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-#endif /* INCLUDED_volk_32f_x2_multiply_32f_u_H */
-#ifndef INCLUDED_volk_32f_x2_multiply_32f_a_H
-#define INCLUDED_volk_32f_x2_multiply_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Multiplys the two input vectors and store their results in the third 
vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be multiplied
-  \param bVector One of the vectors to be multiplied
-  \param num_points The number of values in aVector and bVector to be 
multiplied together and stored into cVector
-*/
-static inline void volk_32f_x2_multiply_32f_a_sse(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    const float* bPtr=  bVector;
-
-    __m128 aVal, bVal, cVal;
-    for(;number < quarterPoints; number++){
-
-      aVal = _mm_load_ps(aPtr);
-      bVal = _mm_load_ps(bPtr);
-
-      cVal = _mm_mul_ps(aVal, bVal);
-
-      _mm_store_ps(cPtr,cVal); // Store the results back into the C container
-
-      aPtr += 4;
-      bPtr += 4;
-      cPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-      *cPtr++ = (*aPtr++) * (*bPtr++);
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-/*!
-  \brief Multiplies the two input vectors and store their results in the third 
vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be multiplied
-  \param bVector One of the vectors to be multiplied
-  \param num_points The number of values in aVector and bVector to be 
multiplied together and stored into cVector
-*/
-static inline void volk_32f_x2_multiply_32f_a_avx(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int eighthPoints = num_points / 8;
-
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    const float* bPtr=  bVector;
-
-    __m256 aVal, bVal, cVal;
-    for(;number < eighthPoints; number++){
-
-      aVal = _mm256_load_ps(aPtr);
-      bVal = _mm256_load_ps(bPtr);
-
-      cVal = _mm256_mul_ps(aVal, bVal);
-
-      _mm256_store_ps(cPtr,cVal); // Store the results back into the C 
container
-
-      aPtr += 8;
-      bPtr += 8;
-      cPtr += 8;
-    }
-
-    number = eighthPoints * 8;
-    for(;number < num_points; number++){
-      *cPtr++ = (*aPtr++) * (*bPtr++);
-    }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-
-/*!
-  \brief Multiplys the two input vectors and store their results in the third 
vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be multiplied
-  \param bVector One of the vectors to be multiplied
-  \param num_points The number of values in aVector and bVector to be 
multiplied together and stored into cVector
-*/
-static inline void volk_32f_x2_multiply_32f_neon(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    const unsigned int quarter_points = num_points / 4;
-    unsigned int number;
-    float32x4_t avec, bvec, cvec;
-    for(number=0; number < quarter_points; ++number) {
-        avec = vld1q_f32(aVector);
-        bvec = vld1q_f32(bVector);
-        cvec = vmulq_f32(avec, bvec);
-        vst1q_f32(cVector, cvec);
-        aVector += 4;
-        bVector += 4;
-        cVector += 4;
-    }
-    for(number=quarter_points*4; number < num_points; ++number) {
-        *cVector++ = *aVector++ * *bVector++;
-    }
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Multiplys the two input vectors and store their results in the third 
vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be multiplied
-  \param bVector One of the vectors to be multiplied
-  \param num_points The number of values in aVector and bVector to be 
multiplied together and stored into cVector
-*/
-static inline void volk_32f_x2_multiply_32f_a_generic(float* cVector, const 
float* aVector, const float* bVector, unsigned int num_points){
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    const float* bPtr=  bVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      *cPtr++ = (*aPtr++) * (*bPtr++);
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_ORC
-/*!
-  \brief Multiplys the two input vectors and store their results in the third 
vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be multiplied
-  \param bVector One of the vectors to be multiplied
-  \param num_points The number of values in aVector and bVector to be 
multiplied together and stored into cVector
-*/
-extern void volk_32f_x2_multiply_32f_a_orc_impl(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points);
-static inline void volk_32f_x2_multiply_32f_u_orc(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    volk_32f_x2_multiply_32f_a_orc_impl(cVector, aVector, bVector, num_points);
-}
-#endif /* LV_HAVE_ORC */
-
-
-#endif /* INCLUDED_volk_32f_x2_multiply_32f_a_H */
diff --git a/volk/kernels/volk/volk_32f_x2_pow_32f.h 
b/volk/kernels/volk/volk_32f_x2_pow_32f.h
deleted file mode 100644
index 431c4c7..0000000
--- a/volk/kernels/volk/volk_32f_x2_pow_32f.h
+++ /dev/null
@@ -1,298 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <inttypes.h>
-#include <math.h>
-
-#define POLY0(x, c0) _mm_set1_ps(c0)
-#define POLY1(x, c0, c1) _mm_add_ps(_mm_mul_ps(POLY0(x, c1), x), 
_mm_set1_ps(c0))
-#define POLY2(x, c0, c1, c2) _mm_add_ps(_mm_mul_ps(POLY1(x, c1, c2), x), 
_mm_set1_ps(c0))
-#define POLY3(x, c0, c1, c2, c3) _mm_add_ps(_mm_mul_ps(POLY2(x, c1, c2, c3), 
x), _mm_set1_ps(c0))
-#define POLY4(x, c0, c1, c2, c3, c4) _mm_add_ps(_mm_mul_ps(POLY3(x, c1, c2, 
c3, c4), x), _mm_set1_ps(c0))
-#define POLY5(x, c0, c1, c2, c3, c4, c5) _mm_add_ps(_mm_mul_ps(POLY4(x, c1, 
c2, c3, c4, c5), x), _mm_set1_ps(c0))
-
-#define LOG_POLY_DEGREE 3
-
-#ifndef INCLUDED_volk_32f_x2_pow_32f_a_H
-#define INCLUDED_volk_32f_x2_pow_32f_a_H
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Computes pow(x,y) by using exp and log
-  \param cVector The vector where results will be stored
-  \param aVector The input vector of bases
-  \param bVector The input vector of indices
-  \param num_points Number of points for which pow is to be computed
-*/
-static inline void volk_32f_x2_pow_32f_a_sse4_1(float* cVector, const float* 
bVector, const float* aVector, unsigned int num_points){
-
-    float* cPtr = cVector;
-    const float* bPtr = bVector;
-    const float* aPtr = aVector;
-
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    __m128 aVal, bVal, cVal, logarithm, mantissa, frac, leadingOne;
-    __m128 tmp, fx, mask, pow2n, z, y;
-    __m128 one, exp_hi, exp_lo, ln2, log2EF, half, exp_C1, exp_C2;
-    __m128 exp_p0, exp_p1, exp_p2, exp_p3, exp_p4, exp_p5;
-    __m128i bias, exp, emm0, pi32_0x7f;
-
-    one = _mm_set1_ps(1.0);
-    exp_hi = _mm_set1_ps(88.3762626647949);
-    exp_lo = _mm_set1_ps(-88.3762626647949);
-    ln2 = _mm_set1_ps(0.6931471805);
-    log2EF = _mm_set1_ps(1.44269504088896341);
-    half = _mm_set1_ps(0.5);
-    exp_C1 = _mm_set1_ps(0.693359375);
-    exp_C2 = _mm_set1_ps(-2.12194440e-4);
-    pi32_0x7f = _mm_set1_epi32(0x7f);
-
-    exp_p0 = _mm_set1_ps(1.9875691500e-4);
-    exp_p1 = _mm_set1_ps(1.3981999507e-3);
-    exp_p2 = _mm_set1_ps(8.3334519073e-3);
-    exp_p3 = _mm_set1_ps(4.1665795894e-2);
-    exp_p4 = _mm_set1_ps(1.6666665459e-1);
-    exp_p5 = _mm_set1_ps(5.0000001201e-1);
-
-    for(;number < quarterPoints; number++){
-        // First compute the logarithm
-        aVal = _mm_load_ps(aPtr);
-        bias = _mm_set1_epi32(127);
-        leadingOne = _mm_set1_ps(1.0f);
-        exp = 
_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(_mm_castps_si128(aVal), 
_mm_set1_epi32(0x7f800000)), 23), bias);
-        logarithm = _mm_cvtepi32_ps(exp);
-
-        frac = _mm_or_ps(leadingOne, _mm_and_ps(aVal, 
_mm_castsi128_ps(_mm_set1_epi32(0x7fffff))));
-
-        #if LOG_POLY_DEGREE == 6
-          mantissa = POLY5( frac, 3.1157899f, -3.3241990f, 2.5988452f, 
-1.2315303f,  3.1821337e-1f, -3.4436006e-2f);
-        #elif LOG_POLY_DEGREE == 5
-          mantissa = POLY4( frac, 2.8882704548164776201f, 
-2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 
0.0596515482674574969533f);
-        #elif LOG_POLY_DEGREE == 4
-          mantissa = POLY3( frac, 2.61761038894603480148f, 
-1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f);
-        #elif LOG_POLY_DEGREE == 3
-          mantissa = POLY2( frac, 2.28330284476918490682f, 
-1.04913055217340124191f, 0.204446009836232697516f);
-        #else
-        #error
-        #endif
-
-        logarithm = _mm_add_ps(logarithm, _mm_mul_ps(mantissa, 
_mm_sub_ps(frac, leadingOne)));
-        logarithm = _mm_mul_ps(logarithm, ln2);
-
-
-        // Now calculate b*lna
-        bVal = _mm_load_ps(bPtr);
-        bVal = _mm_mul_ps(bVal, logarithm);
-
-        // Now compute exp(b*lna)
-        tmp = _mm_setzero_ps();
-
-        bVal = _mm_max_ps(_mm_min_ps(bVal, exp_hi), exp_lo);
-
-        fx = _mm_add_ps(_mm_mul_ps(bVal, log2EF), half);
-
-        emm0 = _mm_cvttps_epi32(fx);
-        tmp = _mm_cvtepi32_ps(emm0);
-
-        mask = _mm_and_ps(_mm_cmpgt_ps(tmp, fx), one);
-        fx = _mm_sub_ps(tmp, mask);
-
-        tmp = _mm_mul_ps(fx, exp_C1);
-        z = _mm_mul_ps(fx, exp_C2);
-        bVal = _mm_sub_ps(_mm_sub_ps(bVal, tmp), z);
-        z = _mm_mul_ps(bVal, bVal);
-
-        y = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(exp_p0, bVal), exp_p1), bVal);
-        y = _mm_add_ps(_mm_mul_ps(_mm_add_ps(y, exp_p2), bVal), exp_p3);
-        y = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(y, bVal), exp_p4), bVal);
-        y = _mm_add_ps(_mm_mul_ps(_mm_add_ps(y, exp_p5), z), bVal);
-        y = _mm_add_ps(y, one);
-
-        emm0 = _mm_slli_epi32(_mm_add_epi32(_mm_cvttps_epi32(fx), pi32_0x7f), 
23);
-
-        pow2n = _mm_castsi128_ps(emm0);
-        cVal = _mm_mul_ps(y, pow2n);
-
-        _mm_store_ps(cPtr, cVal);
-
-        aPtr += 4;
-        bPtr += 4;
-        cPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-        *cPtr++ = pow(*aPtr++, *bPtr++);
-    }
-}
-
-#endif /* LV_HAVE_SSE4_1 for aligned */
-
-#endif /* INCLUDED_volk_32f_x2_pow_32f_a_H */
-
-#ifndef INCLUDED_volk_32f_x2_pow_32f_u_H
-#define INCLUDED_volk_32f_x2_pow_32f_u_H
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Computes pow(x,y) by using exp and log
-  \param cVector The vector where results will be stored
-  \param aVector The input vector of bases
-  \param bVector The input vector of indices
-  \param num_points Number of points for which pow is to be computed
-*/
-static inline void volk_32f_x2_pow_32f_generic(float* cVector, const float* 
bVector, const float* aVector, unsigned int num_points){
-    float* cPtr = cVector;
-    const float* bPtr = bVector;
-    const float* aPtr = aVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-        *cPtr++ = pow(*aPtr++, *bPtr++);
-    }
-
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Computes pow(x,y) by using exp and log
-  \param cVector The vector where results will be stored
-  \param aVector The input vector of bases
-  \param bVector The input vector of indices
-  \param num_points Number of points for which pow is to be computed
-*/
-static inline void volk_32f_x2_pow_32f_u_sse4_1(float* cVector, const float* 
bVector, const float* aVector, unsigned int num_points){
-
-    float* cPtr = cVector;
-    const float* bPtr = bVector;
-    const float* aPtr = aVector;
-
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    __m128 aVal, bVal, cVal, logarithm, mantissa, frac, leadingOne;
-    __m128 tmp, fx, mask, pow2n, z, y;
-    __m128 one, exp_hi, exp_lo, ln2, log2EF, half, exp_C1, exp_C2;
-    __m128 exp_p0, exp_p1, exp_p2, exp_p3, exp_p4, exp_p5;
-    __m128i bias, exp, emm0, pi32_0x7f;
-
-    one = _mm_set1_ps(1.0);
-    exp_hi = _mm_set1_ps(88.3762626647949);
-    exp_lo = _mm_set1_ps(-88.3762626647949);
-    ln2 = _mm_set1_ps(0.6931471805);
-    log2EF = _mm_set1_ps(1.44269504088896341);
-    half = _mm_set1_ps(0.5);
-    exp_C1 = _mm_set1_ps(0.693359375);
-    exp_C2 = _mm_set1_ps(-2.12194440e-4);
-    pi32_0x7f = _mm_set1_epi32(0x7f);
-
-    exp_p0 = _mm_set1_ps(1.9875691500e-4);
-    exp_p1 = _mm_set1_ps(1.3981999507e-3);
-    exp_p2 = _mm_set1_ps(8.3334519073e-3);
-    exp_p3 = _mm_set1_ps(4.1665795894e-2);
-    exp_p4 = _mm_set1_ps(1.6666665459e-1);
-    exp_p5 = _mm_set1_ps(5.0000001201e-1);
-
-    for(;number < quarterPoints; number++){
-
-    // First compute the logarithm
-    aVal = _mm_loadu_ps(aPtr);
-    bias = _mm_set1_epi32(127);
-    leadingOne = _mm_set1_ps(1.0f);
-    exp = _mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(_mm_castps_si128(aVal), 
_mm_set1_epi32(0x7f800000)), 23), bias);
-    logarithm = _mm_cvtepi32_ps(exp);
-
-    frac = _mm_or_ps(leadingOne, _mm_and_ps(aVal, 
_mm_castsi128_ps(_mm_set1_epi32(0x7fffff))));
-
-    #if LOG_POLY_DEGREE == 6
-      mantissa = POLY5( frac, 3.1157899f, -3.3241990f, 2.5988452f, 
-1.2315303f,  3.1821337e-1f, -3.4436006e-2f);
-    #elif LOG_POLY_DEGREE == 5
-      mantissa = POLY4( frac, 2.8882704548164776201f, 
-2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 
0.0596515482674574969533f);
-    #elif LOG_POLY_DEGREE == 4
-      mantissa = POLY3( frac, 2.61761038894603480148f, 
-1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f);
-    #elif LOG_POLY_DEGREE == 3
-      mantissa = POLY2( frac, 2.28330284476918490682f, 
-1.04913055217340124191f, 0.204446009836232697516f);
-    #else
-    #error
-    #endif
-
-    logarithm = _mm_add_ps(logarithm, _mm_mul_ps(mantissa, _mm_sub_ps(frac, 
leadingOne)));
-    logarithm = _mm_mul_ps(logarithm, ln2);
-
-
-    // Now calculate b*lna
-    bVal = _mm_loadu_ps(bPtr);
-    bVal = _mm_mul_ps(bVal, logarithm);
-
-    // Now compute exp(b*lna)
-    tmp = _mm_setzero_ps();
-
-    bVal = _mm_max_ps(_mm_min_ps(bVal, exp_hi), exp_lo);
-
-    fx = _mm_add_ps(_mm_mul_ps(bVal, log2EF), half);
-
-    emm0 = _mm_cvttps_epi32(fx);
-    tmp = _mm_cvtepi32_ps(emm0);
-
-    mask = _mm_and_ps(_mm_cmpgt_ps(tmp, fx), one);
-    fx = _mm_sub_ps(tmp, mask);
-
-    tmp = _mm_mul_ps(fx, exp_C1);
-    z = _mm_mul_ps(fx, exp_C2);
-    bVal = _mm_sub_ps(_mm_sub_ps(bVal, tmp), z);
-    z = _mm_mul_ps(bVal, bVal);
-
-    y = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(exp_p0, bVal), exp_p1), bVal);
-    y = _mm_add_ps(_mm_mul_ps(_mm_add_ps(y, exp_p2), bVal), exp_p3);
-    y = _mm_mul_ps(_mm_add_ps(_mm_mul_ps(y, bVal), exp_p4), bVal);
-    y = _mm_add_ps(_mm_mul_ps(_mm_add_ps(y, exp_p5), z), bVal);
-    y = _mm_add_ps(y, one);
-
-    emm0 = _mm_slli_epi32(_mm_add_epi32(_mm_cvttps_epi32(fx), pi32_0x7f), 23);
-
-    pow2n = _mm_castsi128_ps(emm0);
-    cVal = _mm_mul_ps(y, pow2n);
-
-    _mm_storeu_ps(cPtr, cVal);
-
-    aPtr += 4;
-    bPtr += 4;
-    cPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-        *cPtr++ = pow(*aPtr++, *bPtr++);
-    }
-}
-
-#endif /* LV_HAVE_SSE4_1 for unaligned */
-
-#endif /* INCLUDED_volk_32f_x2_log2_32f_u_H */
diff --git a/volk/kernels/volk/volk_32f_x2_s32f_interleave_16ic.h 
b/volk/kernels/volk/volk_32f_x2_s32f_interleave_16ic.h
deleted file mode 100644
index 5d7eca8..0000000
--- a/volk/kernels/volk/volk_32f_x2_s32f_interleave_16ic.h
+++ /dev/null
@@ -1,178 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
-#define INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H
-
-#include <volk/volk_common.h>
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-  /*!
-    \brief Interleaves the I & Q vector data into the complex vector, scales 
the output values by the scalar, and converts to 16 bit data.
-    \param iBuffer The I buffer data to be interleaved
-    \param qBuffer The Q buffer data to be interleaved
-    \param complexVector The complex output vector
-    \param scalar The scaling value being multiplied against each data point
-    \param num_points The number of complex data values to be interleaved
-  */
-static inline void volk_32f_x2_s32f_interleave_16ic_a_sse2(lv_16sc_t* 
complexVector, const float* iBuffer, const float* qBuffer, const float scalar, 
unsigned int num_points){
-    unsigned int number = 0;
-    const float* iBufferPtr = iBuffer;
-    const float* qBufferPtr = qBuffer;
-
-    __m128 vScalar = _mm_set_ps1(scalar);
-
-    const unsigned int quarterPoints = num_points / 4;
-
-    __m128 iValue, qValue, cplxValue1, cplxValue2;
-    __m128i intValue1, intValue2;
-
-    int16_t* complexVectorPtr = (int16_t*)complexVector;
-
-    for(;number < quarterPoints; number++){
-      iValue = _mm_load_ps(iBufferPtr);
-      qValue = _mm_load_ps(qBufferPtr);
-
-      // Interleaves the lower two values in the i and q variables into one 
buffer
-      cplxValue1 = _mm_unpacklo_ps(iValue, qValue);
-      cplxValue1 = _mm_mul_ps(cplxValue1, vScalar);
-
-      // Interleaves the upper two values in the i and q variables into one 
buffer
-      cplxValue2 = _mm_unpackhi_ps(iValue, qValue);
-      cplxValue2 = _mm_mul_ps(cplxValue2, vScalar);
-
-      intValue1 = _mm_cvtps_epi32(cplxValue1);
-      intValue2 = _mm_cvtps_epi32(cplxValue2);
-
-      intValue1 = _mm_packs_epi32(intValue1, intValue2);
-
-      _mm_store_si128((__m128i*)complexVectorPtr, intValue1);
-      complexVectorPtr += 8;
-
-      iBufferPtr += 4;
-      qBufferPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    complexVectorPtr = (int16_t*)(&complexVector[number]);
-    for(; number < num_points; number++){
-      *complexVectorPtr++ = (int16_t)(*iBufferPtr++ * scalar);
-      *complexVectorPtr++ = (int16_t)(*qBufferPtr++ * scalar);
-    }
-
-}
-#endif /* LV_HAVE_SSE2 */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-  /*!
-    \brief Interleaves the I & Q vector data into the complex vector, scales 
the output values by the scalar, and converts to 16 bit data.
-    \param iBuffer The I buffer data to be interleaved
-    \param qBuffer The Q buffer data to be interleaved
-    \param complexVector The complex output vector
-    \param scalar The scaling value being multiplied against each data point
-    \param num_points The number of complex data values to be interleaved
-  */
-static inline void volk_32f_x2_s32f_interleave_16ic_a_sse(lv_16sc_t* 
complexVector, const float* iBuffer, const float* qBuffer, const float scalar, 
unsigned int num_points){
-    unsigned int number = 0;
-    const float* iBufferPtr = iBuffer;
-    const float* qBufferPtr = qBuffer;
-
-    __m128 vScalar = _mm_set_ps1(scalar);
-
-    const unsigned int quarterPoints = num_points / 4;
-
-    __m128 iValue, qValue, cplxValue;
-
-    int16_t* complexVectorPtr = (int16_t*)complexVector;
-
-    __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
-
-    for(;number < quarterPoints; number++){
-      iValue = _mm_load_ps(iBufferPtr);
-      qValue = _mm_load_ps(qBufferPtr);
-
-      // Interleaves the lower two values in the i and q variables into one 
buffer
-      cplxValue = _mm_unpacklo_ps(iValue, qValue);
-      cplxValue = _mm_mul_ps(cplxValue, vScalar);
-
-      _mm_store_ps(floatBuffer, cplxValue);
-
-      *complexVectorPtr++ = (int16_t)(floatBuffer[0]);
-      *complexVectorPtr++ = (int16_t)(floatBuffer[1]);
-      *complexVectorPtr++ = (int16_t)(floatBuffer[2]);
-      *complexVectorPtr++ = (int16_t)(floatBuffer[3]);
-
-      // Interleaves the upper two values in the i and q variables into one 
buffer
-      cplxValue = _mm_unpackhi_ps(iValue, qValue);
-      cplxValue = _mm_mul_ps(cplxValue, vScalar);
-
-      _mm_store_ps(floatBuffer, cplxValue);
-
-      *complexVectorPtr++ = (int16_t)(floatBuffer[0]);
-      *complexVectorPtr++ = (int16_t)(floatBuffer[1]);
-      *complexVectorPtr++ = (int16_t)(floatBuffer[2]);
-      *complexVectorPtr++ = (int16_t)(floatBuffer[3]);
-
-      iBufferPtr += 4;
-      qBufferPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    complexVectorPtr = (int16_t*)(&complexVector[number]);
-    for(; number < num_points; number++){
-      *complexVectorPtr++ = (int16_t)(*iBufferPtr++ * scalar);
-      *complexVectorPtr++ = (int16_t)(*qBufferPtr++ * scalar);
-    }
-
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Interleaves the I & Q vector data into the complex vector, scales 
the output values by the scalar, and converts to 16 bit data.
-    \param iBuffer The I buffer data to be interleaved
-    \param qBuffer The Q buffer data to be interleaved
-    \param complexVector The complex output vector
-    \param scalar The scaling value being multiplied against each data point
-    \param num_points The number of complex data values to be interleaved
-  */
-static inline void volk_32f_x2_s32f_interleave_16ic_generic(lv_16sc_t* 
complexVector, const float* iBuffer, const float* qBuffer, const float scalar, 
unsigned int num_points){
-  int16_t* complexVectorPtr = (int16_t*)complexVector;
-  const float* iBufferPtr = iBuffer;
-  const float* qBufferPtr = qBuffer;
-  unsigned int number = 0;
-
-  for(number = 0; number < num_points; number++){
-    *complexVectorPtr++ = (int16_t)(*iBufferPtr++ * scalar);
-    *complexVectorPtr++ = (int16_t)(*qBufferPtr++ * scalar);
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32f_x2_s32f_interleave_16ic_a_H */
diff --git a/volk/kernels/volk/volk_32f_x2_subtract_32f.h 
b/volk/kernels/volk/volk_32f_x2_subtract_32f.h
deleted file mode 100644
index a3aa8e0..0000000
--- a/volk/kernels/volk/volk_32f_x2_subtract_32f.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_x2_subtract_32f_a_H
-#define INCLUDED_volk_32f_x2_subtract_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Subtracts bVector form aVector and store their results in the cVector
-  \param cVector The vector where the results will be stored
-  \param aVector The initial vector
-  \param bVector The vector to be subtracted
-  \param num_points The number of values in aVector and bVector to be 
subtracted together and stored into cVector
-*/
-static inline void volk_32f_x2_subtract_32f_a_sse(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    const float* bPtr=  bVector;
-
-    __m128 aVal, bVal, cVal;
-    for(;number < quarterPoints; number++){
-
-      aVal = _mm_load_ps(aPtr);
-      bVal = _mm_load_ps(bPtr);
-
-      cVal = _mm_sub_ps(aVal, bVal);
-
-      _mm_store_ps(cPtr,cVal); // Store the results back into the C container
-
-      aPtr += 4;
-      bPtr += 4;
-      cPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-      *cPtr++ = (*aPtr++) - (*bPtr++);
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Subtracts bVector form aVector and store their results in the cVector
-  \param cVector The vector where the results will be stored
-  \param aVector The initial vector
-  \param bVector The vector to be subtracted
-  \param num_points The number of values in aVector and bVector to be 
subtracted together and stored into cVector
-*/
-static inline void volk_32f_x2_subtract_32f_generic(float* cVector, const 
float* aVector, const float* bVector, unsigned int num_points){
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    const float* bPtr=  bVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      *cPtr++ = (*aPtr++) - (*bPtr++);
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-
-/*!
-  \brief Subtracts bVector form aVector and store their results in the cVector
-  \param cVector The vector where the results will be stored
-  \param aVector The initial vector
-  \param bVector The vector to be subtracted
-  \param num_points The number of values in aVector and bVector to be 
subtracted together and stored into cVector
-*/
-static inline void volk_32f_x2_subtract_32f_neon(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-    const float* bPtr=  bVector;
-    unsigned int number = 0;
-    unsigned int quarter_points = num_points / 4;
-
-    float32x4_t a_vec, b_vec, c_vec;
-
-    for(number = 0; number < quarter_points; number++){
-        a_vec = vld1q_f32(aPtr);
-        b_vec = vld1q_f32(bPtr);
-        c_vec = vsubq_f32(a_vec, b_vec);
-        vst1q_f32(cPtr, c_vec);
-        aPtr += 4;
-        bPtr += 4;
-        cPtr += 4;
-    }
-
-    for(number = quarter_points * 4; number < num_points; number++){
-      *cPtr++ = (*aPtr++) - (*bPtr++);
-    }
-}
-#endif /* LV_HAVE_NEON */
-
-
-#ifdef LV_HAVE_ORC
-/*!
-  \brief Subtracts bVector form aVector and store their results in the cVector
-  \param cVector The vector where the results will be stored
-  \param aVector The initial vector
-  \param bVector The vector to be subtracted
-  \param num_points The number of values in aVector and bVector to be 
subtracted together and stored into cVector
-*/
-extern void volk_32f_x2_subtract_32f_a_orc_impl(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points);
-static inline void volk_32f_x2_subtract_32f_u_orc(float* cVector, const float* 
aVector, const float* bVector, unsigned int num_points){
-    volk_32f_x2_subtract_32f_a_orc_impl(cVector, aVector, bVector, num_points);
-}
-#endif /* LV_HAVE_ORC */
-
-
-#endif /* INCLUDED_volk_32f_x2_subtract_32f_a_H */
diff --git a/volk/kernels/volk/volk_32f_x3_sum_of_poly_32f.h 
b/volk/kernels/volk/volk_32f_x3_sum_of_poly_32f.h
deleted file mode 100644
index cc40c67..0000000
--- a/volk/kernels/volk/volk_32f_x3_sum_of_poly_32f.h
+++ /dev/null
@@ -1,452 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32f_x3_sum_of_poly_32f_a_H
-#define INCLUDED_volk_32f_x3_sum_of_poly_32f_a_H
-
-#include<inttypes.h>
-#include<stdio.h>
-#include<volk/volk_complex.h>
-
-#ifndef MAX
-#define MAX(X,Y) ((X) > (Y)?(X):(Y))
-#endif
-
-#ifdef LV_HAVE_SSE3
-#include<xmmintrin.h>
-#include<pmmintrin.h>
-
-static inline void volk_32f_x3_sum_of_poly_32f_a_sse3(float* target, float* 
src0, float* center_point_array, float* cutoff, unsigned int num_points) {
-
-  const unsigned int num_bytes = num_points*4;
-
-  float result = 0.0;
-  float fst = 0.0;
-  float sq = 0.0;
-  float thrd = 0.0;
-  float frth = 0.0;
-  //float fith = 0.0;
-
-
-
-  __m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10;// 
xmm11, xmm12;
-
-  xmm9 = _mm_setzero_ps();
-  xmm1 = _mm_setzero_ps();
-
-  xmm0 = _mm_load1_ps(&center_point_array[0]);
-  xmm6 = _mm_load1_ps(&center_point_array[1]);
-  xmm7 = _mm_load1_ps(&center_point_array[2]);
-  xmm8 = _mm_load1_ps(&center_point_array[3]);
-  //xmm11 = _mm_load1_ps(&center_point_array[4]);
-  xmm10 = _mm_load1_ps(cutoff);
-
-  int bound = num_bytes >> 4;
-  int leftovers = (num_bytes >> 2) & 3;
-  int i = 0;
-
-  for(; i < bound; ++i) {
-    xmm2 = _mm_load_ps(src0);
-    xmm2 = _mm_max_ps(xmm10, xmm2);
-    xmm3 = _mm_mul_ps(xmm2, xmm2);
-    xmm4 = _mm_mul_ps(xmm2, xmm3);
-    xmm5 = _mm_mul_ps(xmm3, xmm3);
-    //xmm12 = _mm_mul_ps(xmm3, xmm4);
-
-    xmm2 = _mm_mul_ps(xmm2, xmm0);
-    xmm3 = _mm_mul_ps(xmm3, xmm6);
-    xmm4 = _mm_mul_ps(xmm4, xmm7);
-    xmm5 = _mm_mul_ps(xmm5, xmm8);
-    //xmm12 = _mm_mul_ps(xmm12, xmm11);
-
-    xmm2 = _mm_add_ps(xmm2, xmm3);
-    xmm3 = _mm_add_ps(xmm4, xmm5);
-
-    src0 += 4;
-
-    xmm9 = _mm_add_ps(xmm2, xmm9);
-
-    xmm1 = _mm_add_ps(xmm3, xmm1);
-
-    //xmm9 = _mm_add_ps(xmm12, xmm9);
-  }
-
-  xmm2 = _mm_hadd_ps(xmm9, xmm1);
-  xmm3 = _mm_hadd_ps(xmm2, xmm2);
-  xmm4 = _mm_hadd_ps(xmm3, xmm3);
-
-  _mm_store_ss(&result, xmm4);
-
-
-
-  for(i = 0; i < leftovers; ++i) {
-    fst = src0[i];
-    fst = MAX(fst, *cutoff);
-    sq = fst * fst;
-    thrd = fst * sq;
-    frth = sq * sq;
-    //fith = sq * thrd;
-
-    result += (center_point_array[0] * fst +
-              center_point_array[1] * sq +
-              center_point_array[2] * thrd +
-              center_point_array[3] * frth);// +
-              //center_point_array[4] * fith);
-  }
-
-  result += ((float)((bound * 4) + leftovers)) * center_point_array[4]; 
//center_point_array[5];
-
-  target[0] = result;
-}
-
-
-#endif /*LV_HAVE_SSE3*/
-
-
-#ifdef LV_HAVE_AVX
-#include<immintrin.h>
-
-static inline void volk_32f_x3_sum_of_poly_32f_a_avx(float* target, float* 
src0, float* center_point_array, float* cutoff, unsigned int num_points)
-{
-  const unsigned int eighth_points = num_points / 8;
-  float fst = 0.0;
-  float sq = 0.0;
-  float thrd = 0.0;
-  float frth = 0.0;
-
-  __m256 cpa0, cpa1, cpa2, cpa3, cutoff_vec;
-  __m256 target_vec;
-  __m256 x_to_1, x_to_2, x_to_3, x_to_4;
-
-  cpa0 = _mm256_set1_ps(center_point_array[0]);
-  cpa1 = _mm256_set1_ps(center_point_array[1]);
-  cpa2 = _mm256_set1_ps(center_point_array[2]);
-  cpa3 = _mm256_set1_ps(center_point_array[3]);
-  cutoff_vec = _mm256_set1_ps(*cutoff);
-  target_vec = _mm256_setzero_ps();
-
-  unsigned int i;
-
-  for(i = 0; i < eighth_points; ++i) {
-    x_to_1 = _mm256_load_ps(src0);
-    x_to_1 = _mm256_max_ps(x_to_1, cutoff_vec);
-    x_to_2 = _mm256_mul_ps(x_to_1, x_to_1); // x^2
-    x_to_3 = _mm256_mul_ps(x_to_1, x_to_2); // x^3
-    // x^1 * x^3 is slightly faster than x^2 * x^2
-    x_to_4 = _mm256_mul_ps(x_to_1, x_to_3); // x^4
-
-    x_to_1 = _mm256_mul_ps(x_to_1, cpa0); // cpa[0] * x^1
-    x_to_2 = _mm256_mul_ps(x_to_2, cpa1); // cpa[1] * x^2
-    x_to_3 = _mm256_mul_ps(x_to_3, cpa2); // cpa[2] * x^3
-    x_to_4 = _mm256_mul_ps(x_to_4, cpa3); // cpa[3] * x^4
-
-    x_to_1 = _mm256_add_ps(x_to_1, x_to_2);
-    x_to_3 = _mm256_add_ps(x_to_3, x_to_4);
-    // this is slightly faster than result += (x_to_1 + x_to_3)
-    target_vec = _mm256_add_ps(x_to_1, target_vec);
-    target_vec = _mm256_add_ps(x_to_3, target_vec);
-
-    src0 += 8;
-  }
-
-  // the hadd for vector reduction has very very slight impact @ 50k iters
-  __VOLK_ATTR_ALIGNED(32) float temp_results[8];
-  target_vec = _mm256_hadd_ps(target_vec, target_vec); // x0+x1 | x2+x3 | 
x0+x1 | x2+x3 || x4+x5 | x6+x7 | x4+x5 | x6+x7
-  _mm256_store_ps(temp_results, target_vec);
-  *target = temp_results[0] + temp_results[1] + temp_results[4] + 
temp_results[5];
-
-
-  for(i = eighth_points*8; i < num_points; ++i) {
-    fst = *(src0++);
-    fst = MAX(fst, *cutoff);
-    sq = fst * fst;
-    thrd = fst * sq;
-    frth = sq * sq;
-
-    *target += (center_point_array[0] * fst +
-              center_point_array[1] * sq +
-              center_point_array[2] * thrd +
-              center_point_array[3] * frth);
-  }
-
-  *target += ((float)(num_points)) * center_point_array[4];
-
-}
-#endif // LV_HAVE_AVX
-
-
-#ifdef LV_HAVE_GENERIC
-
-static inline void volk_32f_x3_sum_of_poly_32f_generic(float* target, float* 
src0, float* center_point_array, float* cutoff, unsigned int num_points) {
-
-  const unsigned int num_bytes = num_points*4;
-
-  float result = 0.0;
-  float fst = 0.0;
-  float sq = 0.0;
-  float thrd = 0.0;
-  float frth = 0.0;
-  //float fith = 0.0;
-
-
-
-  unsigned int i = 0;
-
-  for(; i < num_bytes >> 2; ++i) {
-    fst = src0[i];
-    fst = MAX(fst, *cutoff);
-
-    sq = fst * fst;
-    thrd = fst * sq;
-    frth = sq * sq;
-    //fith = sq * thrd;
-
-    result += (center_point_array[0] * fst +
-              center_point_array[1] * sq +
-              center_point_array[2] * thrd +
-              center_point_array[3] * frth); //+
-              //center_point_array[4] * fith);
-    /*printf("%f12...%d\n", (center_point_array[0] * fst +
-                 center_point_array[1] * sq +
-                 center_point_array[2] * thrd +
-                        center_point_array[3] * frth) +
-          //center_point_array[4] * fith) +
-          (center_point_array[4]), i);
-    */
-  }
-
-  result += ((float)(num_bytes >> 2)) * 
(center_point_array[4]);//(center_point_array[5]);
-
-
-
-  *target = result;
-}
-
-#endif /*LV_HAVE_GENERIC*/
-
-
-#ifdef LV_HAVE_AVX
-#include<immintrin.h>
-
-static inline void volk_32f_x3_sum_of_poly_32f_u_avx(float* target, float* 
src0, float* center_point_array, float* cutoff, unsigned int num_points)
-{
-  const unsigned int eighth_points = num_points / 8;
-  float fst = 0.0;
-  float sq = 0.0;
-  float thrd = 0.0;
-  float frth = 0.0;
-
-  __m256 cpa0, cpa1, cpa2, cpa3, cutoff_vec;
-  __m256 target_vec;
-  __m256 x_to_1, x_to_2, x_to_3, x_to_4;
-
-  cpa0 = _mm256_set1_ps(center_point_array[0]);
-  cpa1 = _mm256_set1_ps(center_point_array[1]);
-  cpa2 = _mm256_set1_ps(center_point_array[2]);
-  cpa3 = _mm256_set1_ps(center_point_array[3]);
-  cutoff_vec = _mm256_set1_ps(*cutoff);
-  target_vec = _mm256_setzero_ps();
-
-  unsigned int i;
-
-  for(i = 0; i < eighth_points; ++i) {
-    x_to_1 = _mm256_loadu_ps(src0);
-    x_to_1 = _mm256_max_ps(x_to_1, cutoff_vec);
-    x_to_2 = _mm256_mul_ps(x_to_1, x_to_1); // x^2
-    x_to_3 = _mm256_mul_ps(x_to_1, x_to_2); // x^3
-    // x^1 * x^3 is slightly faster than x^2 * x^2
-    x_to_4 = _mm256_mul_ps(x_to_1, x_to_3); // x^4
-
-    x_to_1 = _mm256_mul_ps(x_to_1, cpa0); // cpa[0] * x^1
-    x_to_2 = _mm256_mul_ps(x_to_2, cpa1); // cpa[1] * x^2
-    x_to_3 = _mm256_mul_ps(x_to_3, cpa2); // cpa[2] * x^3
-    x_to_4 = _mm256_mul_ps(x_to_4, cpa3); // cpa[3] * x^4
-
-    x_to_1 = _mm256_add_ps(x_to_1, x_to_2);
-    x_to_3 = _mm256_add_ps(x_to_3, x_to_4);
-    // this is slightly faster than result += (x_to_1 + x_to_3)
-    target_vec = _mm256_add_ps(x_to_1, target_vec);
-    target_vec = _mm256_add_ps(x_to_3, target_vec);
-
-    src0 += 8;
-  }
-
-  // the hadd for vector reduction has very very slight impact @ 50k iters
-  __VOLK_ATTR_ALIGNED(32) float temp_results[8];
-  target_vec = _mm256_hadd_ps(target_vec, target_vec); // x0+x1 | x2+x3 | 
x0+x1 | x2+x3 || x4+x5 | x6+x7 | x4+x5 | x6+x7
-  _mm256_store_ps(temp_results, target_vec);
-  *target = temp_results[0] + temp_results[1] + temp_results[4] + 
temp_results[5];
-
-
-  for(i = eighth_points*8; i < num_points; ++i) {
-    fst = *(src0++);
-    fst = MAX(fst, *cutoff);
-    sq = fst * fst;
-    thrd = fst * sq;
-    frth = sq * sq;
-
-    *target += (center_point_array[0] * fst +
-              center_point_array[1] * sq +
-              center_point_array[2] * thrd +
-              center_point_array[3] * frth);
-  }
-
-  *target += ((float)(num_points)) * center_point_array[4];
-
-}
-#endif // LV_HAVE_AVX
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-
-static inline void volk_32f_x3_sum_of_poly_32f_a_neon(float* __restrict 
target, float* __restrict src0, float* __restrict center_point_array, float* 
__restrict cutoff, unsigned int num_points) {
-
-
-  unsigned int i;
-  float zero[4] = {0.0f, 0.0f, 0.0f, 0.0f };
-
-  float32x2_t x_to_1, x_to_2, x_to_3, x_to_4;
-  float32x2_t cutoff_vector;
-  float32x2x2_t x_low, x_high;
-  float32x4_t x_qvector, c_qvector, cpa_qvector;
-  float accumulator;
-  float res_accumulators[4];
-
-  c_qvector = vld1q_f32( zero );
-  // load the cutoff in to a vector
-  cutoff_vector = vdup_n_f32( *cutoff );
-  // ... center point array
-  cpa_qvector = vld1q_f32( center_point_array );
-
-  for(i=0; i < num_points; ++i) {
-    // load x  (src0)
-    x_to_1 = vdup_n_f32( *src0++ );
-
-    // Get a vector of max(src0, cutoff)
-    x_to_1 = vmax_f32(x_to_1,  cutoff_vector ); // x^1
-    x_to_2 = vmul_f32(x_to_1, x_to_1); // x^2
-    x_to_3 = vmul_f32(x_to_2, x_to_1); // x^3
-    x_to_4 = vmul_f32(x_to_3, x_to_1); // x^4
-    // zip up doubles to interleave
-    x_low = vzip_f32(x_to_1, x_to_2); // [x^2 | x^1 || x^2 | x^1]
-    x_high = vzip_f32(x_to_3, x_to_4); // [x^4 | x^3 || x^4 | x^3]
-    // float32x4_t vcombine_f32(float32x2_t low, float32x2_t high); // VMOV 
d0,d0
-    x_qvector = vcombine_f32(x_low.val[0], x_high.val[0]);
-    // now we finally have [x^4 | x^3 | x^2 | x] !
-
-    c_qvector = vmlaq_f32(c_qvector, x_qvector, cpa_qvector);
-
-  }
-  // there should be better vector reduction techniques
-  vst1q_f32(res_accumulators, c_qvector );
-  accumulator = res_accumulators[0] + res_accumulators[1] +
-          res_accumulators[2] + res_accumulators[3];
-
-  *target = accumulator + center_point_array[4] * (float)num_points;
-}
-
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_NEON
-
-static inline void volk_32f_x3_sum_of_poly_32f_neonvert(float* __restrict 
target, float* __restrict src0, float* __restrict center_point_array, float* 
__restrict cutoff, unsigned int num_points) {
-
-
-  unsigned int i;
-  float zero[4] = {0.0f, 0.0f, 0.0f, 0.0f };
-
-  float accumulator;
-
-
-  float32x4_t accumulator1_vec, accumulator2_vec, accumulator3_vec, 
accumulator4_vec;
-  accumulator1_vec = vld1q_f32(zero);
-  accumulator2_vec = vld1q_f32(zero);
-  accumulator3_vec = vld1q_f32(zero);
-  accumulator4_vec = vld1q_f32(zero);
-  float32x4_t x_to_1, x_to_2, x_to_3, x_to_4;
-  float32x4_t cutoff_vector, cpa_0, cpa_1, cpa_2, cpa_3;
-
-  // load the cutoff in to a vector
-  cutoff_vector = vdupq_n_f32( *cutoff );
-  // ... center point array
-  cpa_0 = vdupq_n_f32(center_point_array[0]);
-  cpa_1 = vdupq_n_f32(center_point_array[1]);
-  cpa_2 = vdupq_n_f32(center_point_array[2]);
-  cpa_3 = vdupq_n_f32(center_point_array[3]);
-
-
-  // nathan is not sure why this is slower *and* wrong compared to neonvertfma
-  for(i=0; i < num_points/4; ++i) {
-    // load x
-    x_to_1 = vld1q_f32( src0 );
-
-    // Get a vector of max(src0, cutoff)
-    x_to_1 = vmaxq_f32(x_to_1,  cutoff_vector ); // x^1
-    x_to_2 = vmulq_f32(x_to_1, x_to_1); // x^2
-    x_to_3 = vmulq_f32(x_to_2, x_to_1); // x^3
-    x_to_4 = vmulq_f32(x_to_3, x_to_1); // x^4
-    x_to_1 = vmulq_f32(x_to_1, cpa_0);
-    x_to_2 = vmulq_f32(x_to_2, cpa_1);
-    x_to_3 = vmulq_f32(x_to_3, cpa_2);
-    x_to_4 = vmulq_f32(x_to_4, cpa_3);
-    accumulator1_vec = vaddq_f32(accumulator1_vec, x_to_1);
-    accumulator2_vec = vaddq_f32(accumulator2_vec, x_to_2);
-    accumulator3_vec = vaddq_f32(accumulator3_vec, x_to_3);
-    accumulator4_vec = vaddq_f32(accumulator4_vec, x_to_4);
-
-    src0 += 4;
-  }
-  accumulator1_vec = vaddq_f32(accumulator1_vec, accumulator2_vec);
-  accumulator3_vec = vaddq_f32(accumulator3_vec, accumulator4_vec);
-  accumulator1_vec = vaddq_f32(accumulator1_vec, accumulator3_vec);
-
-  __VOLK_ATTR_ALIGNED(32) float res_accumulators[4];
-  vst1q_f32(res_accumulators, accumulator1_vec );
-  accumulator = res_accumulators[0] + res_accumulators[1] +
-          res_accumulators[2] + res_accumulators[3];
-
-  float fst = 0.0;
-  float sq = 0.0;
-  float thrd = 0.0;
-  float frth = 0.0;
-
-  for(i = 4*num_points/4; i < num_points; ++i) {
-    fst = src0[i];
-    fst = MAX(fst, *cutoff);
-
-    sq = fst * fst;
-    thrd = fst * sq;
-    frth = sq * sq;
-    //fith = sq * thrd;
-
-    accumulator += (center_point_array[0] * fst +
-           center_point_array[1] * sq +
-           center_point_array[2] * thrd +
-           center_point_array[3] * frth); //+
-  }
-
-  *target = accumulator + center_point_array[4] * (float)num_points;
-}
-
-#endif /* LV_HAVE_NEON */
-
-#endif /*INCLUDED_volk_32f_x3_sum_of_poly_32f_a_H*/
diff --git a/volk/kernels/volk/volk_32fc_32f_dot_prod_32fc.h 
b/volk/kernels/volk/volk_32fc_32f_dot_prod_32fc.h
deleted file mode 100644
index 0c3ef0d..0000000
--- a/volk/kernels/volk/volk_32fc_32f_dot_prod_32fc.h
+++ /dev/null
@@ -1,539 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_32f_dot_prod_32fc_a_H
-#define INCLUDED_volk_32fc_32f_dot_prod_32fc_a_H
-
-#include <volk/volk_common.h>
-#include<stdio.h>
-
-
-#ifdef LV_HAVE_GENERIC
-
-
-static inline void volk_32fc_32f_dot_prod_32fc_generic(lv_32fc_t* result, 
const lv_32fc_t* input, const float * taps, unsigned int num_points) {
-
-  float res[2];
-  float *realpt = &res[0], *imagpt = &res[1];
-  const float* aPtr = (float*)input;
-  const float* bPtr=  taps;
-  unsigned int number = 0;
-
-  *realpt = 0;
-  *imagpt = 0;
-
-  for(number = 0; number < num_points; number++){
-    *realpt += ((*aPtr++) * (*bPtr));
-    *imagpt += ((*aPtr++) * (*bPtr++));
-  }
-
-  *result = *(lv_32fc_t*)(&res[0]);
-}
-
-#endif /*LV_HAVE_GENERIC*/
-
-
-#ifdef LV_HAVE_AVX
-
-#include <immintrin.h>
-
-static inline void volk_32fc_32f_dot_prod_32fc_a_avx( lv_32fc_t* result, const 
lv_32fc_t* input, const float* taps, unsigned int num_points) {
-
-  unsigned int number = 0;
-  const unsigned int sixteenthPoints = num_points / 16;
-
-  float res[2];
-  float *realpt = &res[0], *imagpt = &res[1];
-  const float* aPtr = (float*)input;
-  const float* bPtr = taps;
-
-  __m256 a0Val, a1Val, a2Val, a3Val;
-  __m256 b0Val, b1Val, b2Val, b3Val;
-  __m256 x0Val, x1Val, x0loVal, x0hiVal, x1loVal, x1hiVal;
-  __m256 c0Val, c1Val, c2Val, c3Val;
-
-  __m256 dotProdVal0 = _mm256_setzero_ps();
-  __m256 dotProdVal1 = _mm256_setzero_ps();
-  __m256 dotProdVal2 = _mm256_setzero_ps();
-  __m256 dotProdVal3 = _mm256_setzero_ps();
-
-  for(;number < sixteenthPoints; number++){
-
-    a0Val = _mm256_load_ps(aPtr);
-    a1Val = _mm256_load_ps(aPtr+8);
-    a2Val = _mm256_load_ps(aPtr+16);
-    a3Val = _mm256_load_ps(aPtr+24);
-
-    x0Val = _mm256_load_ps(bPtr); // t0|t1|t2|t3|t4|t5|t6|t7
-    x1Val = _mm256_load_ps(bPtr+8);
-    x0loVal = _mm256_unpacklo_ps(x0Val, x0Val); // t0|t0|t1|t1|t4|t4|t5|t5
-    x0hiVal = _mm256_unpackhi_ps(x0Val, x0Val); // t2|t2|t3|t3|t6|t6|t7|t7
-    x1loVal = _mm256_unpacklo_ps(x1Val, x1Val);
-    x1hiVal = _mm256_unpackhi_ps(x1Val, x1Val);
-
-    // TODO: it may be possible to rearrange swizzling to better pipeline data
-    b0Val = _mm256_permute2f128_ps(x0loVal, x0hiVal, 0x20); // 
t0|t0|t1|t1|t2|t2|t3|t3
-    b1Val = _mm256_permute2f128_ps(x0loVal, x0hiVal, 0x31); // 
t4|t4|t5|t5|t6|t6|t7|t7
-    b2Val = _mm256_permute2f128_ps(x1loVal, x1hiVal, 0x20);
-    b3Val = _mm256_permute2f128_ps(x1loVal, x1hiVal, 0x31);
-
-    c0Val = _mm256_mul_ps(a0Val, b0Val);
-    c1Val = _mm256_mul_ps(a1Val, b1Val);
-    c2Val = _mm256_mul_ps(a2Val, b2Val);
-    c3Val = _mm256_mul_ps(a3Val, b3Val);
-
-    dotProdVal0 = _mm256_add_ps(c0Val, dotProdVal0);
-    dotProdVal1 = _mm256_add_ps(c1Val, dotProdVal1);
-    dotProdVal2 = _mm256_add_ps(c2Val, dotProdVal2);
-    dotProdVal3 = _mm256_add_ps(c3Val, dotProdVal3);
-
-    aPtr += 32;
-    bPtr += 16;
-  }
-
-  dotProdVal0 = _mm256_add_ps(dotProdVal0, dotProdVal1);
-  dotProdVal0 = _mm256_add_ps(dotProdVal0, dotProdVal2);
-  dotProdVal0 = _mm256_add_ps(dotProdVal0, dotProdVal3);
-
-  __VOLK_ATTR_ALIGNED(32) float dotProductVector[8];
-
-  _mm256_store_ps(dotProductVector,dotProdVal0); // Store the results back 
into the dot product vector
-
-  *realpt = dotProductVector[0];
-  *imagpt = dotProductVector[1];
-  *realpt += dotProductVector[2];
-  *imagpt += dotProductVector[3];
-  *realpt += dotProductVector[4];
-  *imagpt += dotProductVector[5];
-  *realpt += dotProductVector[6];
-  *imagpt += dotProductVector[7];
-
-  number = sixteenthPoints*16;
-  for(;number < num_points; number++){
-    *realpt += ((*aPtr++) * (*bPtr));
-    *imagpt += ((*aPtr++) * (*bPtr++));
-  }
-
-  *result = *(lv_32fc_t*)(&res[0]);
-}
-
-#endif /*LV_HAVE_AVX*/
-
-
-
-
-#ifdef LV_HAVE_SSE
-
-
-static inline void volk_32fc_32f_dot_prod_32fc_a_sse( lv_32fc_t* result, const 
 lv_32fc_t* input, const  float* taps, unsigned int num_points) {
-
-  unsigned int number = 0;
-  const unsigned int sixteenthPoints = num_points / 8;
-
-  float res[2];
-  float *realpt = &res[0], *imagpt = &res[1];
-  const float* aPtr = (float*)input;
-  const float* bPtr = taps;
-
-  __m128 a0Val, a1Val, a2Val, a3Val;
-  __m128 b0Val, b1Val, b2Val, b3Val;
-  __m128 x0Val, x1Val, x2Val, x3Val;
-  __m128 c0Val, c1Val, c2Val, c3Val;
-
-  __m128 dotProdVal0 = _mm_setzero_ps();
-  __m128 dotProdVal1 = _mm_setzero_ps();
-  __m128 dotProdVal2 = _mm_setzero_ps();
-  __m128 dotProdVal3 = _mm_setzero_ps();
-
-  for(;number < sixteenthPoints; number++){
-
-    a0Val = _mm_load_ps(aPtr);
-    a1Val = _mm_load_ps(aPtr+4);
-    a2Val = _mm_load_ps(aPtr+8);
-    a3Val = _mm_load_ps(aPtr+12);
-
-    x0Val = _mm_load_ps(bPtr);
-    x1Val = _mm_load_ps(bPtr);
-    x2Val = _mm_load_ps(bPtr+4);
-    x3Val = _mm_load_ps(bPtr+4);
-    b0Val = _mm_unpacklo_ps(x0Val, x1Val);
-    b1Val = _mm_unpackhi_ps(x0Val, x1Val);
-    b2Val = _mm_unpacklo_ps(x2Val, x3Val);
-    b3Val = _mm_unpackhi_ps(x2Val, x3Val);
-
-    c0Val = _mm_mul_ps(a0Val, b0Val);
-    c1Val = _mm_mul_ps(a1Val, b1Val);
-    c2Val = _mm_mul_ps(a2Val, b2Val);
-    c3Val = _mm_mul_ps(a3Val, b3Val);
-
-    dotProdVal0 = _mm_add_ps(c0Val, dotProdVal0);
-    dotProdVal1 = _mm_add_ps(c1Val, dotProdVal1);
-    dotProdVal2 = _mm_add_ps(c2Val, dotProdVal2);
-    dotProdVal3 = _mm_add_ps(c3Val, dotProdVal3);
-
-    aPtr += 16;
-    bPtr += 8;
-  }
-
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal1);
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal2);
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal3);
-
-  __VOLK_ATTR_ALIGNED(16) float dotProductVector[4];
-
-  _mm_store_ps(dotProductVector,dotProdVal0); // Store the results back into 
the dot product vector
-
-  *realpt = dotProductVector[0];
-  *imagpt = dotProductVector[1];
-  *realpt += dotProductVector[2];
-  *imagpt += dotProductVector[3];
-
-  number = sixteenthPoints*8;
-  for(;number < num_points; number++){
-    *realpt += ((*aPtr++) * (*bPtr));
-    *imagpt += ((*aPtr++) * (*bPtr++));
-  }
-
-  *result = *(lv_32fc_t*)(&res[0]);
-}
-
-#endif /*LV_HAVE_SSE*/
-
-
-
-#ifdef LV_HAVE_AVX
-
-#include <immintrin.h>
-
-static inline void volk_32fc_32f_dot_prod_32fc_u_avx( lv_32fc_t* result, const 
lv_32fc_t* input, const float* taps, unsigned int num_points) {
-
-  unsigned int number = 0;
-  const unsigned int sixteenthPoints = num_points / 16;
-
-  float res[2];
-  float *realpt = &res[0], *imagpt = &res[1];
-  const float* aPtr = (float*)input;
-  const float* bPtr = taps;
-
-  __m256 a0Val, a1Val, a2Val, a3Val;
-  __m256 b0Val, b1Val, b2Val, b3Val;
-  __m256 x0Val, x1Val, x0loVal, x0hiVal, x1loVal, x1hiVal;
-  __m256 c0Val, c1Val, c2Val, c3Val;
-
-  __m256 dotProdVal0 = _mm256_setzero_ps();
-  __m256 dotProdVal1 = _mm256_setzero_ps();
-  __m256 dotProdVal2 = _mm256_setzero_ps();
-  __m256 dotProdVal3 = _mm256_setzero_ps();
-
-  for(;number < sixteenthPoints; number++){
-
-    a0Val = _mm256_loadu_ps(aPtr);
-    a1Val = _mm256_loadu_ps(aPtr+8);
-    a2Val = _mm256_loadu_ps(aPtr+16);
-    a3Val = _mm256_loadu_ps(aPtr+24);
-
-    x0Val = _mm256_loadu_ps(bPtr); // t0|t1|t2|t3|t4|t5|t6|t7
-    x1Val = _mm256_loadu_ps(bPtr+8);
-    x0loVal = _mm256_unpacklo_ps(x0Val, x0Val); // t0|t0|t1|t1|t4|t4|t5|t5
-    x0hiVal = _mm256_unpackhi_ps(x0Val, x0Val); // t2|t2|t3|t3|t6|t6|t7|t7
-    x1loVal = _mm256_unpacklo_ps(x1Val, x1Val);
-    x1hiVal = _mm256_unpackhi_ps(x1Val, x1Val);
-
-    // TODO: it may be possible to rearrange swizzling to better pipeline data
-    b0Val = _mm256_permute2f128_ps(x0loVal, x0hiVal, 0x20); // 
t0|t0|t1|t1|t2|t2|t3|t3
-    b1Val = _mm256_permute2f128_ps(x0loVal, x0hiVal, 0x31); // 
t4|t4|t5|t5|t6|t6|t7|t7
-    b2Val = _mm256_permute2f128_ps(x1loVal, x1hiVal, 0x20);
-    b3Val = _mm256_permute2f128_ps(x1loVal, x1hiVal, 0x31);
-
-    c0Val = _mm256_mul_ps(a0Val, b0Val);
-    c1Val = _mm256_mul_ps(a1Val, b1Val);
-    c2Val = _mm256_mul_ps(a2Val, b2Val);
-    c3Val = _mm256_mul_ps(a3Val, b3Val);
-
-    dotProdVal0 = _mm256_add_ps(c0Val, dotProdVal0);
-    dotProdVal1 = _mm256_add_ps(c1Val, dotProdVal1);
-    dotProdVal2 = _mm256_add_ps(c2Val, dotProdVal2);
-    dotProdVal3 = _mm256_add_ps(c3Val, dotProdVal3);
-
-    aPtr += 32;
-    bPtr += 16;
-  }
-
-  dotProdVal0 = _mm256_add_ps(dotProdVal0, dotProdVal1);
-  dotProdVal0 = _mm256_add_ps(dotProdVal0, dotProdVal2);
-  dotProdVal0 = _mm256_add_ps(dotProdVal0, dotProdVal3);
-
-  __VOLK_ATTR_ALIGNED(32) float dotProductVector[8];
-
-  _mm256_store_ps(dotProductVector,dotProdVal0); // Store the results back 
into the dot product vector
-
-  *realpt = dotProductVector[0];
-  *imagpt = dotProductVector[1];
-  *realpt += dotProductVector[2];
-  *imagpt += dotProductVector[3];
-  *realpt += dotProductVector[4];
-  *imagpt += dotProductVector[5];
-  *realpt += dotProductVector[6];
-  *imagpt += dotProductVector[7];
-
-  number = sixteenthPoints*16;
-  for(;number < num_points; number++){
-    *realpt += ((*aPtr++) * (*bPtr));
-    *imagpt += ((*aPtr++) * (*bPtr++));
-  }
-
-  *result = *(lv_32fc_t*)(&res[0]);
-}
-#endif /*LV_HAVE_AVX*/
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-
-static inline void volk_32fc_32f_dot_prod_32fc_neon_unroll ( lv_32fc_t* 
__restrict result, const  lv_32fc_t* __restrict input, const  float* __restrict 
taps, unsigned int num_points) {
-
-   unsigned int number;
-   const unsigned int quarterPoints = num_points / 8;
-
-   float res[2];
-   float *realpt = &res[0], *imagpt = &res[1];
-   const float* inputPtr = (float*)input;
-   const float* tapsPtr = taps;
-   float zero[4] = {0.0f, 0.0f, 0.0f, 0.0f };
-   float accVector_real[4];
-   float accVector_imag[4];
-
-   float32x4x2_t  inputVector0, inputVector1;
-   float32x4_t  tapsVector0, tapsVector1;
-   float32x4_t  tmp_real0, tmp_imag0;
-   float32x4_t  tmp_real1, tmp_imag1;
-   float32x4_t real_accumulator0, imag_accumulator0;
-   float32x4_t real_accumulator1, imag_accumulator1;
-
-   // zero out accumulators
-   // take a *float, return float32x4_t
-   real_accumulator0 = vld1q_f32( zero );
-   imag_accumulator0 = vld1q_f32( zero );
-   real_accumulator1 = vld1q_f32( zero );
-   imag_accumulator1 = vld1q_f32( zero );
-
-   for(number=0 ;number < quarterPoints; number++){
-      // load doublewords and duplicate in to second lane
-      tapsVector0 = vld1q_f32(tapsPtr );
-      tapsVector1 = vld1q_f32(tapsPtr+4 );
-
-      // load quadword of complex numbers in to 2 lanes. 1st lane is real, 2dn 
imag
-      inputVector0 = vld2q_f32(inputPtr );
-      inputVector1 = vld2q_f32(inputPtr+8 );
-      // inputVector is now a struct of two vectors, 0th is real, 1st is imag
-
-      tmp_real0 = vmulq_f32(tapsVector0, inputVector0.val[0]);
-      tmp_imag0 = vmulq_f32(tapsVector0, inputVector0.val[1]);
-
-      tmp_real1 = vmulq_f32(tapsVector1, inputVector1.val[0]);
-      tmp_imag1 = vmulq_f32(tapsVector1, inputVector1.val[1]);
-
-      real_accumulator0 = vaddq_f32(real_accumulator0, tmp_real0);
-      imag_accumulator0 = vaddq_f32(imag_accumulator0, tmp_imag0);
-
-      real_accumulator1 = vaddq_f32(real_accumulator1, tmp_real1);
-      imag_accumulator1 = vaddq_f32(imag_accumulator1, tmp_imag1);
-
-      tapsPtr += 8;
-      inputPtr += 16;
-   }
-
-   real_accumulator0 = vaddq_f32( real_accumulator0, real_accumulator1);
-   imag_accumulator0 = vaddq_f32( imag_accumulator0, imag_accumulator1);
-   // void vst1q_f32( float32_t * ptr, float32x4_t val);
-   // store results back to a complex (array of 2 floats)
-   vst1q_f32(accVector_real, real_accumulator0);
-   vst1q_f32(accVector_imag, imag_accumulator0);
-   *realpt = accVector_real[0] + accVector_real[1] +
-             accVector_real[2] + accVector_real[3] ;
-
-   *imagpt = accVector_imag[0] + accVector_imag[1] +
-             accVector_imag[2] + accVector_imag[3] ;
-
-  // clean up the remainder
-  for(number=quarterPoints*8; number < num_points; number++){
-    *realpt += ((*inputPtr++) * (*tapsPtr));
-    *imagpt += ((*inputPtr++) * (*tapsPtr++));
-  }
-
-  *result = *(lv_32fc_t*)(&res[0]);
-}
-
-#endif /*LV_HAVE_NEON*/
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-
-static inline void volk_32fc_32f_dot_prod_32fc_a_neon ( lv_32fc_t* __restrict 
result, const  lv_32fc_t* __restrict input, const  float* __restrict taps, 
unsigned int num_points) {
-
-   unsigned int number;
-   const unsigned int quarterPoints = num_points / 4;
-
-   float res[2];
-   float *realpt = &res[0], *imagpt = &res[1];
-   const float* inputPtr = (float*)input;
-   const float* tapsPtr = taps;
-   float zero[4] = {0.0f, 0.0f, 0.0f, 0.0f };
-   float accVector_real[4];
-   float accVector_imag[4];
-
-   float32x4x2_t  inputVector;
-   float32x4_t  tapsVector;
-   float32x4_t tmp_real, tmp_imag;
-   float32x4_t real_accumulator, imag_accumulator;
-
-
-   // zero out accumulators
-   // take a *float, return float32x4_t
-   real_accumulator = vld1q_f32( zero );
-   imag_accumulator = vld1q_f32( zero );
-
-   for(number=0 ;number < quarterPoints; number++){
-      // load taps ( float32x2x2_t = vld1q_f32( float32_t const * ptr) )
-      // load doublewords and duplicate in to second lane
-      tapsVector = vld1q_f32(tapsPtr );
-
-      // load quadword of complex numbers in to 2 lanes. 1st lane is real, 2dn 
imag
-      inputVector = vld2q_f32(inputPtr );
-
-      tmp_real = vmulq_f32(tapsVector, inputVector.val[0]);
-      tmp_imag = vmulq_f32(tapsVector, inputVector.val[1]);
-
-      real_accumulator = vaddq_f32(real_accumulator, tmp_real);
-      imag_accumulator = vaddq_f32(imag_accumulator, tmp_imag);
-
-
-      tapsPtr += 4;
-      inputPtr += 8;
-
-   }
-
-   // store results back to a complex (array of 2 floats)
-   vst1q_f32(accVector_real, real_accumulator);
-   vst1q_f32(accVector_imag, imag_accumulator);
-   *realpt = accVector_real[0] + accVector_real[1] +
-             accVector_real[2] + accVector_real[3] ;
-
-   *imagpt = accVector_imag[0] + accVector_imag[1] +
-             accVector_imag[2] + accVector_imag[3] ;
-
-  // clean up the remainder
-  for(number=quarterPoints*4; number < num_points; number++){
-    *realpt += ((*inputPtr++) * (*tapsPtr));
-    *imagpt += ((*inputPtr++) * (*tapsPtr++));
-  }
-
-  *result = *(lv_32fc_t*)(&res[0]);
-}
-
-#endif /*LV_HAVE_NEON*/
-
-#ifdef LV_HAVE_NEON
-extern void volk_32fc_32f_dot_prod_32fc_a_neonasm ( lv_32fc_t* result, const  
lv_32fc_t* input, const  float* taps, unsigned int num_points);
-#endif /*LV_HAVE_NEON*/
-
-#ifdef LV_HAVE_NEON
-extern void volk_32fc_32f_dot_prod_32fc_a_neonpipeline ( lv_32fc_t* result, 
const  lv_32fc_t* input, const  float* taps, unsigned int num_points);
-#endif /*LV_HAVE_NEON*/
-
-#ifdef LV_HAVE_SSE
-
-static inline void volk_32fc_32f_dot_prod_32fc_u_sse( lv_32fc_t* result, const 
 lv_32fc_t* input, const  float* taps, unsigned int num_points) {
-
-  unsigned int number = 0;
-  const unsigned int sixteenthPoints = num_points / 8;
-
-  float res[2];
-  float *realpt = &res[0], *imagpt = &res[1];
-  const float* aPtr = (float*)input;
-  const float* bPtr = taps;
-
-  __m128 a0Val, a1Val, a2Val, a3Val;
-  __m128 b0Val, b1Val, b2Val, b3Val;
-  __m128 x0Val, x1Val, x2Val, x3Val;
-  __m128 c0Val, c1Val, c2Val, c3Val;
-
-  __m128 dotProdVal0 = _mm_setzero_ps();
-  __m128 dotProdVal1 = _mm_setzero_ps();
-  __m128 dotProdVal2 = _mm_setzero_ps();
-  __m128 dotProdVal3 = _mm_setzero_ps();
-
-  for(;number < sixteenthPoints; number++){
-
-    a0Val = _mm_loadu_ps(aPtr);
-    a1Val = _mm_loadu_ps(aPtr+4);
-    a2Val = _mm_loadu_ps(aPtr+8);
-    a3Val = _mm_loadu_ps(aPtr+12);
-
-    x0Val = _mm_loadu_ps(bPtr);
-    x1Val = _mm_loadu_ps(bPtr);
-    x2Val = _mm_loadu_ps(bPtr+4);
-    x3Val = _mm_loadu_ps(bPtr+4);
-    b0Val = _mm_unpacklo_ps(x0Val, x1Val);
-    b1Val = _mm_unpackhi_ps(x0Val, x1Val);
-    b2Val = _mm_unpacklo_ps(x2Val, x3Val);
-    b3Val = _mm_unpackhi_ps(x2Val, x3Val);
-
-    c0Val = _mm_mul_ps(a0Val, b0Val);
-    c1Val = _mm_mul_ps(a1Val, b1Val);
-    c2Val = _mm_mul_ps(a2Val, b2Val);
-    c3Val = _mm_mul_ps(a3Val, b3Val);
-
-    dotProdVal0 = _mm_add_ps(c0Val, dotProdVal0);
-    dotProdVal1 = _mm_add_ps(c1Val, dotProdVal1);
-    dotProdVal2 = _mm_add_ps(c2Val, dotProdVal2);
-    dotProdVal3 = _mm_add_ps(c3Val, dotProdVal3);
-
-    aPtr += 16;
-    bPtr += 8;
-  }
-
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal1);
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal2);
-  dotProdVal0 = _mm_add_ps(dotProdVal0, dotProdVal3);
-
-  __VOLK_ATTR_ALIGNED(16) float dotProductVector[4];
-
-  _mm_store_ps(dotProductVector,dotProdVal0); // Store the results back into 
the dot product vector
-
-  *realpt = dotProductVector[0];
-  *imagpt = dotProductVector[1];
-  *realpt += dotProductVector[2];
-  *imagpt += dotProductVector[3];
-
-  number = sixteenthPoints*8;
-  for(;number < num_points; number++){
-    *realpt += ((*aPtr++) * (*bPtr));
-    *imagpt += ((*aPtr++) * (*bPtr++));
-  }
-
-  *result = *(lv_32fc_t*)(&res[0]);
-}
-
-#endif /*LV_HAVE_SSE*/
-
-
-#endif /*INCLUDED_volk_32fc_32f_dot_prod_32fc_H*/
diff --git a/volk/kernels/volk/volk_32fc_32f_multiply_32fc.h 
b/volk/kernels/volk/volk_32fc_32f_multiply_32fc.h
deleted file mode 100644
index 0f5a141..0000000
--- a/volk/kernels/volk/volk_32fc_32f_multiply_32fc.h
+++ /dev/null
@@ -1,213 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_32f_multiply_32fc_a_H
-#define INCLUDED_volk_32fc_32f_multiply_32fc_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-  /*!
-    \brief Multiplies the input complex vector with the input float vector and 
store their results in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector The complex vector to be multiplied
-    \param bVector The vectors containing the float values to be multiplied 
against each complex value in aVector
-    \param num_points The number of values in aVector and bVector to be 
multiplied together and stored into cVector
-  */
-static inline void volk_32fc_32f_multiply_32fc_a_avx(lv_32fc_t* cVector, const 
lv_32fc_t* aVector, const float* bVector, unsigned int num_points)
-{
-    unsigned int number = 0;
-    const unsigned int eighthPoints = num_points / 8;
-
-    lv_32fc_t* cPtr = cVector;
-    const lv_32fc_t* aPtr = aVector;
-    const float* bPtr=  bVector;
-
-    __m256 aVal1, aVal2, bVal, bVal1, bVal2, cVal1, cVal2;
-
-    __m256i permute_mask = _mm256_set_epi32(3, 3, 2, 2, 1, 1, 0, 0);
-
-    for(;number < eighthPoints; number++){
-
-      aVal1 = _mm256_load_ps((float *)aPtr);
-      aPtr += 4;
-
-      aVal2 = _mm256_load_ps((float *)aPtr);
-      aPtr += 4;
-
-      bVal = _mm256_load_ps(bPtr); // b0|b1|b2|b3|b4|b5|b6|b7
-      bPtr += 8;
-
-      bVal1 = _mm256_permute2f128_ps(bVal, bVal, 0x00); // 
b0|b1|b2|b3|b0|b1|b2|b3
-      bVal2 = _mm256_permute2f128_ps(bVal, bVal, 0x11); // 
b4|b5|b6|b7|b4|b5|b6|b7
-
-      bVal1 = _mm256_permutevar_ps(bVal1, permute_mask); // 
b0|b0|b1|b1|b2|b2|b3|b3
-      bVal2 = _mm256_permutevar_ps(bVal2, permute_mask); // 
b4|b4|b5|b5|b6|b6|b7|b7
-
-      cVal1 = _mm256_mul_ps(aVal1, bVal1);
-      cVal2 = _mm256_mul_ps(aVal2, bVal2);
-
-      _mm256_store_ps((float*)cPtr,cVal1); // Store the results back into the 
C container
-      cPtr += 4;
-
-      _mm256_store_ps((float*)cPtr,cVal2); // Store the results back into the 
C container
-      cPtr += 4;
-    }
-
-    number = eighthPoints * 8;
-    for(;number < num_points; ++number){
-      *cPtr++ = (*aPtr++) * (*bPtr++);
-    }
-}
-#endif /* LV_HAVE_AVX */
-
-
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-  /*!
-    \brief Multiplies the input complex vector with the input float vector and 
store their results in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector The complex vector to be multiplied
-    \param bVector The vectors containing the float values to be multiplied 
against each complex value in aVector
-    \param num_points The number of values in aVector and bVector to be 
multiplied together and stored into cVector
-  */
-static inline void volk_32fc_32f_multiply_32fc_a_sse(lv_32fc_t* cVector, const 
lv_32fc_t* aVector, const float* bVector, unsigned int num_points)
-{
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    lv_32fc_t* cPtr = cVector;
-    const lv_32fc_t* aPtr = aVector;
-    const float* bPtr=  bVector;
-
-    __m128 aVal1, aVal2, bVal, bVal1, bVal2, cVal;
-    for(;number < quarterPoints; number++){
-
-      aVal1 = _mm_load_ps((const float*)aPtr);
-      aPtr += 2;
-
-      aVal2 = _mm_load_ps((const float*)aPtr);
-      aPtr += 2;
-
-      bVal = _mm_load_ps(bPtr);
-      bPtr += 4;
-
-      bVal1 = _mm_shuffle_ps(bVal, bVal, _MM_SHUFFLE(1,1,0,0));
-      bVal2 = _mm_shuffle_ps(bVal, bVal, _MM_SHUFFLE(3,3,2,2));
-
-      cVal = _mm_mul_ps(aVal1, bVal1);
-
-      _mm_store_ps((float*)cPtr,cVal); // Store the results back into the C 
container
-      cPtr += 2;
-
-      cVal = _mm_mul_ps(aVal2, bVal2);
-
-      _mm_store_ps((float*)cPtr,cVal); // Store the results back into the C 
container
-
-      cPtr += 2;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-      *cPtr++ = (*aPtr++) * (*bPtr);
-      bPtr++;
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Multiplies the input complex vector with the input lv_32fc_t vector 
and store their results in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector The complex vector to be multiplied
-    \param bVector The vectors containing the lv_32fc_t values to be 
multiplied against each complex value in aVector
-    \param num_points The number of values in aVector and bVector to be 
multiplied together and stored into cVector
-  */
-static inline void volk_32fc_32f_multiply_32fc_generic(lv_32fc_t* cVector, 
const lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
-  lv_32fc_t* cPtr = cVector;
-  const lv_32fc_t* aPtr = aVector;
-  const float* bPtr=  bVector;
-  unsigned int number = 0;
-
-  for(number = 0; number < num_points; number++){
-    *cPtr++ = (*aPtr++) * (*bPtr++);
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-  /*!
-    \brief Multiplies the input complex vector with the input lv_32fc_t vector 
and store their results in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector The complex vector to be multiplied
-    \param bVector The vectors containing the lv_32fc_t values to be 
multiplied against each complex value in aVector
-    \param num_points The number of values in aVector and bVector to be 
multiplied together and stored into cVector
-  */
-static inline void volk_32fc_32f_multiply_32fc_neon(lv_32fc_t* cVector, const 
lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
-  lv_32fc_t* cPtr = cVector;
-  const lv_32fc_t* aPtr = aVector;
-  const float* bPtr=  bVector;
-  unsigned int number = 0;
-  unsigned int quarter_points = num_points / 4;
-
-  float32x4x2_t inputVector, outputVector;
-  float32x4_t tapsVector;
-  for(number = 0; number < quarter_points; number++){
-    inputVector = vld2q_f32((float*)aPtr);
-    tapsVector = vld1q_f32(bPtr);
-
-    outputVector.val[0] = vmulq_f32(inputVector.val[0], tapsVector);
-    outputVector.val[1] = vmulq_f32(inputVector.val[1], tapsVector);
-
-    vst2q_f32((float*)cPtr, outputVector);
-    aPtr += 4;
-    bPtr += 4;
-    cPtr += 4;
-  }
-
-  for(number = quarter_points * 4; number < num_points; number++){
-    *cPtr++ = (*aPtr++) * (*bPtr++);
-  }
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_ORC
-  /*!
-    \brief Multiplies the input complex vector with the input lv_32fc_t vector 
and store their results in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector The complex vector to be multiplied
-    \param bVector The vectors containing the lv_32fc_t values to be 
multiplied against each complex value in aVector
-    \param num_points The number of values in aVector and bVector to be 
multiplied together and stored into cVector
-  */
-extern void volk_32fc_32f_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const 
lv_32fc_t* aVector, const float* bVector, unsigned int num_points);
-static inline void volk_32fc_32f_multiply_32fc_u_orc(lv_32fc_t* cVector, const 
lv_32fc_t* aVector, const float* bVector, unsigned int num_points){
-    volk_32fc_32f_multiply_32fc_a_orc_impl(cVector, aVector, bVector, 
num_points);
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-#endif /* INCLUDED_volk_32fc_32f_multiply_32fc_a_H */
diff --git a/volk/kernels/volk/volk_32fc_conjugate_32fc.h 
b/volk/kernels/volk/volk_32fc_conjugate_32fc.h
deleted file mode 100644
index 49d8a0f..0000000
--- a/volk/kernels/volk/volk_32fc_conjugate_32fc.h
+++ /dev/null
@@ -1,261 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_conjugate_32fc_u_H
-#define INCLUDED_volk_32fc_conjugate_32fc_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <volk/volk_complex.h>
-#include <float.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-  /*!
-    \brief Takes the conjugate of a complex vector.
-    \param cVector The vector where the results will be stored
-    \param aVector Vector to be conjugated
-    \param num_points The number of complex values in aVector to be conjugated 
and stored into cVector
-  */
-static inline void volk_32fc_conjugate_32fc_u_avx(lv_32fc_t* cVector, const 
lv_32fc_t* aVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    __m256 x;
-    lv_32fc_t* c = cVector;
-    const lv_32fc_t* a = aVector;
-
-    __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
-
-    for(;number < quarterPoints; number++){
-
-      x = _mm256_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi
-
-      x = _mm256_xor_ps(x, conjugator); // conjugate register
-
-      _mm256_storeu_ps((float*)c,x); // Store the results back into the C 
container
-
-      a += 4;
-      c += 4;
-    }
-
-    number = quarterPoints * 4;
-
-    for(;number < num_points; number++) {
-      *c++ = lv_conj(*a++);
-    }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE3
-#include <pmmintrin.h>
-  /*!
-    \brief Takes the conjugate of a complex vector.
-    \param cVector The vector where the results will be stored
-    \param aVector Vector to be conjugated
-    \param num_points The number of complex values in aVector to be conjugated 
and stored into cVector
-  */
-static inline void volk_32fc_conjugate_32fc_u_sse3(lv_32fc_t* cVector, const 
lv_32fc_t* aVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int halfPoints = num_points / 2;
-
-    __m128 x;
-    lv_32fc_t* c = cVector;
-    const lv_32fc_t* a = aVector;
-
-    __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
-
-    for(;number < halfPoints; number++){
-
-      x = _mm_loadu_ps((float*)a); // Load the complex data as ar,ai,br,bi
-
-      x = _mm_xor_ps(x, conjugator); // conjugate register
-
-      _mm_storeu_ps((float*)c,x); // Store the results back into the C 
container
-
-      a += 2;
-      c += 2;
-    }
-
-    if((num_points % 2) != 0) {
-      *c = lv_conj(*a);
-    }
-}
-#endif /* LV_HAVE_SSE3 */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Takes the conjugate of a complex vector.
-    \param cVector The vector where the results will be stored
-    \param aVector Vector to be conjugated
-    \param num_points The number of complex values in aVector to be conjugated 
and stored into cVector
-  */
-static inline void volk_32fc_conjugate_32fc_generic(lv_32fc_t* cVector, const 
lv_32fc_t* aVector, unsigned int num_points){
-    lv_32fc_t* cPtr = cVector;
-    const lv_32fc_t* aPtr = aVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      *cPtr++ = lv_conj(*aPtr++);
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-#endif /* INCLUDED_volk_32fc_conjugate_32fc_u_H */
-#ifndef INCLUDED_volk_32fc_conjugate_32fc_a_H
-#define INCLUDED_volk_32fc_conjugate_32fc_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <volk/volk_complex.h>
-#include <float.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-  /*!
-    \brief Takes the conjugate of a complex vector.
-    \param cVector The vector where the results will be stored
-    \param aVector Vector to be conjugated
-    \param num_points The number of complex values in aVector to be conjugated 
and stored into cVector
-  */
-static inline void volk_32fc_conjugate_32fc_a_avx(lv_32fc_t* cVector, const 
lv_32fc_t* aVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    __m256 x;
-    lv_32fc_t* c = cVector;
-    const lv_32fc_t* a = aVector;
-
-    __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
-
-    for(;number < quarterPoints; number++){
-
-      x = _mm256_load_ps((float*)a); // Load the complex data as ar,ai,br,bi
-
-      x = _mm256_xor_ps(x, conjugator); // conjugate register
-
-      _mm256_store_ps((float*)c,x); // Store the results back into the C 
container
-
-      a += 4;
-      c += 4;
-    }
-
-    number = quarterPoints * 4;
-
-    for(;number < num_points; number++) {
-      *c++ = lv_conj(*a++);
-    }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE3
-#include <pmmintrin.h>
-  /*!
-    \brief Takes the conjugate of a complex vector.
-    \param cVector The vector where the results will be stored
-    \param aVector Vector to be conjugated
-    \param num_points The number of complex values in aVector to be conjugated 
and stored into cVector
-  */
-static inline void volk_32fc_conjugate_32fc_a_sse3(lv_32fc_t* cVector, const 
lv_32fc_t* aVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int halfPoints = num_points / 2;
-
-    __m128 x;
-    lv_32fc_t* c = cVector;
-    const lv_32fc_t* a = aVector;
-
-    __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
-
-    for(;number < halfPoints; number++){
-
-      x = _mm_load_ps((float*)a); // Load the complex data as ar,ai,br,bi
-
-      x = _mm_xor_ps(x, conjugator); // conjugate register
-
-      _mm_store_ps((float*)c,x); // Store the results back into the C container
-
-      a += 2;
-      c += 2;
-    }
-
-    if((num_points % 2) != 0) {
-      *c = lv_conj(*a);
-    }
-}
-#endif /* LV_HAVE_SSE3 */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-  /*!
-    \brief Takes the conjugate of a complex vector.
-    \param cVector The vector where the results will be stored
-    \param aVector Vector to be conjugated
-    \param num_points The number of complex values in aVector to be conjugated 
and stored into cVector
-  */
-static inline void volk_32fc_conjugate_32fc_a_neon(lv_32fc_t* cVector, const 
lv_32fc_t* aVector, unsigned int num_points){
-    unsigned int number;
-    const unsigned int quarterPoints = num_points / 4;
-
-    float32x4x2_t x;
-    lv_32fc_t* c = cVector;
-    const lv_32fc_t* a = aVector;
-
-    for(number=0; number < quarterPoints; number++){
-      __builtin_prefetch(a+4);
-      x = vld2q_f32((float*)a); // Load the complex data as ar,br,cr,dr; 
ai,bi,ci,di
-
-      // xor the imaginary lane
-      x.val[1] = vnegq_f32( x.val[1]);
-
-      vst2q_f32((float*)c,x); // Store the results back into the C container
-
-      a += 4;
-      c += 4;
-    }
-
-    for(number=quarterPoints*4; number < num_points; number++){
-      *c++ = lv_conj(*a++);
-    }
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Takes the conjugate of a complex vector.
-    \param cVector The vector where the results will be stored
-    \param aVector Vector to be conjugated
-    \param num_points The number of complex values in aVector to be conjugated 
and stored into cVector
-  */
-static inline void volk_32fc_conjugate_32fc_a_generic(lv_32fc_t* cVector, 
const lv_32fc_t* aVector, unsigned int num_points){
-    lv_32fc_t* cPtr = cVector;
-    const lv_32fc_t* aPtr = aVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      *cPtr++ = lv_conj(*aPtr++);
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-#endif /* INCLUDED_volk_32fc_conjugate_32fc_a_H */
diff --git a/volk/kernels/volk/volk_32fc_deinterleave_32f_x2.h 
b/volk/kernels/volk/volk_32fc_deinterleave_32f_x2.h
deleted file mode 100644
index aed7f09..0000000
--- a/volk/kernels/volk/volk_32fc_deinterleave_32f_x2.h
+++ /dev/null
@@ -1,179 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
-#define INCLUDED_volk_32fc_deinterleave_32f_x2_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-/*!
-  \brief Deinterleaves the complex vector into I & Q vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param qBuffer The Q buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_32fc_deinterleave_32f_x2_a_avx(float* iBuffer, float* 
qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
-  const float* complexVectorPtr = (float*)complexVector;
-  float* iBufferPtr = iBuffer;
-  float* qBufferPtr = qBuffer;
-
-  unsigned int number = 0;
-  // Mask for real and imaginary parts
-  const unsigned int eighthPoints = num_points / 8;
-  __m256 cplxValue1, cplxValue2, complex1, complex2, iValue, qValue;
-  for(;number < eighthPoints; number++){
-
-    cplxValue1 = _mm256_load_ps(complexVectorPtr);
-    complexVectorPtr += 8;
-
-    cplxValue2 = _mm256_load_ps(complexVectorPtr);
-    complexVectorPtr += 8;
-
-    complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
-    complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
-
-    // Arrange in i1i2i3i4 format
-    iValue = _mm256_shuffle_ps(complex1, complex2, 0x88);
-    // Arrange in q1q2q3q4 format
-    qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
-
-    _mm256_store_ps(iBufferPtr, iValue);
-    _mm256_store_ps(qBufferPtr, qValue);
-
-    iBufferPtr += 8;
-    qBufferPtr += 8;
-  }
-
-  number = eighthPoints * 8;
-  for(; number < num_points; number++){
-    *iBufferPtr++ = *complexVectorPtr++;
-    *qBufferPtr++ = *complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Deinterleaves the complex vector into I & Q vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param qBuffer The Q buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_32fc_deinterleave_32f_x2_a_sse(float* iBuffer, float* 
qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
-  const float* complexVectorPtr = (float*)complexVector;
-  float* iBufferPtr = iBuffer;
-  float* qBufferPtr = qBuffer;
-
-  unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;
-  __m128 cplxValue1, cplxValue2, iValue, qValue;
-  for(;number < quarterPoints; number++){
-
-    cplxValue1 = _mm_load_ps(complexVectorPtr);
-    complexVectorPtr += 4;
-
-    cplxValue2 = _mm_load_ps(complexVectorPtr);
-    complexVectorPtr += 4;
-
-    // Arrange in i1i2i3i4 format
-    iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
-    // Arrange in q1q2q3q4 format
-    qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
-
-    _mm_store_ps(iBufferPtr, iValue);
-    _mm_store_ps(qBufferPtr, qValue);
-
-    iBufferPtr += 4;
-    qBufferPtr += 4;
-  }
-
-  number = quarterPoints * 4;
-  for(; number < num_points; number++){
-    *iBufferPtr++ = *complexVectorPtr++;
-    *qBufferPtr++ = *complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-/*!
-  \brief Deinterleaves the complex vector into I & Q vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param qBuffer The Q buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_32fc_deinterleave_32f_x2_neon(float* iBuffer, float* 
qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  unsigned int quarter_points = num_points / 4;
-  const float* complexVectorPtr = (float*)complexVector;
-  float* iBufferPtr = iBuffer;
-  float* qBufferPtr = qBuffer;
-  float32x4x2_t complexInput;
-
-  for(number = 0; number < quarter_points; number++){
-    complexInput = vld2q_f32(complexVectorPtr);
-    vst1q_f32( iBufferPtr, complexInput.val[0] );
-    vst1q_f32( qBufferPtr, complexInput.val[1] );
-    complexVectorPtr += 8;
-    iBufferPtr += 4;
-    qBufferPtr += 4;
-  }
-
-  for(number = quarter_points*4; number < num_points; number++){
-    *iBufferPtr++ = *complexVectorPtr++;
-    *qBufferPtr++ = *complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Deinterleaves the complex vector into I & Q vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param qBuffer The Q buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_32fc_deinterleave_32f_x2_generic(float* iBuffer, 
float* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
-  const float* complexVectorPtr = (float*)complexVector;
-  float* iBufferPtr = iBuffer;
-  float* qBufferPtr = qBuffer;
-  unsigned int number;
-  for(number = 0; number < num_points; number++){
-    *iBufferPtr++ = *complexVectorPtr++;
-    *qBufferPtr++ = *complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32fc_deinterleave_32f_x2_a_H */
diff --git a/volk/kernels/volk/volk_32fc_deinterleave_64f_x2.h 
b/volk/kernels/volk/volk_32fc_deinterleave_64f_x2.h
deleted file mode 100644
index 9b872e0..0000000
--- a/volk/kernels/volk/volk_32fc_deinterleave_64f_x2.h
+++ /dev/null
@@ -1,280 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_u_H
-#define INCLUDED_volk_32fc_deinterleave_64f_x2_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-/*!
-  \brief Deinterleaves the lv_32fc_t vector into double I & Q vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param qBuffer The Q buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_32fc_deinterleave_64f_x2_u_avx(double* iBuffer, 
double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
-    unsigned int number = 0;
-
-    const float* complexVectorPtr = (float*)complexVector;
-    double* iBufferPtr = iBuffer;
-    double* qBufferPtr = qBuffer;
-
-    const unsigned int quarterPoints = num_points / 4;
-    __m256 cplxValue;
-    __m128 complexH, complexL, fVal;
-    __m256d dVal;
-
-    for(;number < quarterPoints; number++){
-
-      cplxValue = _mm256_loadu_ps(complexVectorPtr);
-      complexVectorPtr += 8;
-
-      complexH = _mm256_extractf128_ps(cplxValue, 1);
-      complexL = _mm256_extractf128_ps(cplxValue, 0);
-
-      // Arrange in i1i2i1i2 format
-      fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2,0,2,0));
-      dVal = _mm256_cvtps_pd(fVal);
-      _mm256_storeu_pd(iBufferPtr, dVal);
-
-      // Arrange in q1q2q1q2 format
-      fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3,1,3,1));
-      dVal = _mm256_cvtps_pd(fVal);
-      _mm256_storeu_pd(qBufferPtr, dVal);
-
-      iBufferPtr += 4;
-      qBufferPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(; number < num_points; number++){
-      *iBufferPtr++ = *complexVectorPtr++;
-      *qBufferPtr++ = *complexVectorPtr++;
-    }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-/*!
-  \brief Deinterleaves the lv_32fc_t vector into double I & Q vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param qBuffer The Q buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_32fc_deinterleave_64f_x2_u_sse2(double* iBuffer, 
double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-
-    const float* complexVectorPtr = (float*)complexVector;
-    double* iBufferPtr = iBuffer;
-    double* qBufferPtr = qBuffer;
-
-    const unsigned int halfPoints = num_points / 2;
-    __m128 cplxValue, fVal;
-    __m128d dVal;
-
-    for(;number < halfPoints; number++){
-
-      cplxValue = _mm_loadu_ps(complexVectorPtr);
-      complexVectorPtr += 4;
-
-      // Arrange in i1i2i1i2 format
-      fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2,0,2,0));
-      dVal = _mm_cvtps_pd(fVal);
-      _mm_storeu_pd(iBufferPtr, dVal);
-
-      // Arrange in q1q2q1q2 format
-      fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3,1,3,1));
-      dVal = _mm_cvtps_pd(fVal);
-      _mm_storeu_pd(qBufferPtr, dVal);
-
-      iBufferPtr += 2;
-      qBufferPtr += 2;
-    }
-
-    number = halfPoints * 2;
-    for(; number < num_points; number++){
-      *iBufferPtr++ = *complexVectorPtr++;
-      *qBufferPtr++ = *complexVectorPtr++;
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Deinterleaves the lv_32fc_t vector into double I & Q vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param qBuffer The Q buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_32fc_deinterleave_64f_x2_generic(double* iBuffer, 
double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const float* complexVectorPtr = (float*)complexVector;
-  double* iBufferPtr = iBuffer;
-  double* qBufferPtr = qBuffer;
-
-  for(number = 0; number < num_points; number++){
-    *iBufferPtr++ = (double)*complexVectorPtr++;
-    *qBufferPtr++ = (double)*complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_u_H */
-#ifndef INCLUDED_volk_32fc_deinterleave_64f_x2_a_H
-#define INCLUDED_volk_32fc_deinterleave_64f_x2_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-/*!
-  \brief Deinterleaves the lv_32fc_t vector into double I & Q vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param qBuffer The Q buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_32fc_deinterleave_64f_x2_a_avx(double* iBuffer, 
double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
-    unsigned int number = 0;
-
-    const float* complexVectorPtr = (float*)complexVector;
-    double* iBufferPtr = iBuffer;
-    double* qBufferPtr = qBuffer;
-
-    const unsigned int quarterPoints = num_points / 4;
-    __m256 cplxValue;
-    __m128 complexH, complexL, fVal;
-    __m256d dVal;
-
-    for(;number < quarterPoints; number++){
-
-      cplxValue = _mm256_load_ps(complexVectorPtr);
-      complexVectorPtr += 8;
-
-      complexH = _mm256_extractf128_ps(cplxValue, 1);
-      complexL = _mm256_extractf128_ps(cplxValue, 0);
-
-      // Arrange in i1i2i1i2 format
-      fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(2,0,2,0));
-      dVal = _mm256_cvtps_pd(fVal);
-      _mm256_store_pd(iBufferPtr, dVal);
-
-      // Arrange in q1q2q1q2 format
-      fVal = _mm_shuffle_ps(complexL, complexH, _MM_SHUFFLE(3,1,3,1));
-      dVal = _mm256_cvtps_pd(fVal);
-      _mm256_store_pd(qBufferPtr, dVal);
-
-      iBufferPtr += 4;
-      qBufferPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(; number < num_points; number++){
-      *iBufferPtr++ = *complexVectorPtr++;
-      *qBufferPtr++ = *complexVectorPtr++;
-    }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-/*!
-  \brief Deinterleaves the lv_32fc_t vector into double I & Q vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param qBuffer The Q buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_32fc_deinterleave_64f_x2_a_sse2(double* iBuffer, 
double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-
-    const float* complexVectorPtr = (float*)complexVector;
-    double* iBufferPtr = iBuffer;
-    double* qBufferPtr = qBuffer;
-
-    const unsigned int halfPoints = num_points / 2;
-    __m128 cplxValue, fVal;
-    __m128d dVal;
-
-    for(;number < halfPoints; number++){
-
-      cplxValue = _mm_load_ps(complexVectorPtr);
-      complexVectorPtr += 4;
-
-      // Arrange in i1i2i1i2 format
-      fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2,0,2,0));
-      dVal = _mm_cvtps_pd(fVal);
-      _mm_store_pd(iBufferPtr, dVal);
-
-      // Arrange in q1q2q1q2 format
-      fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(3,1,3,1));
-      dVal = _mm_cvtps_pd(fVal);
-      _mm_store_pd(qBufferPtr, dVal);
-
-      iBufferPtr += 2;
-      qBufferPtr += 2;
-    }
-
-    number = halfPoints * 2;
-    for(; number < num_points; number++){
-      *iBufferPtr++ = *complexVectorPtr++;
-      *qBufferPtr++ = *complexVectorPtr++;
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Deinterleaves the lv_32fc_t vector into double I & Q vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param qBuffer The Q buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_32fc_deinterleave_64f_x2_a_generic(double* iBuffer, 
double* qBuffer, const lv_32fc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const float* complexVectorPtr = (float*)complexVector;
-  double* iBufferPtr = iBuffer;
-  double* qBufferPtr = qBuffer;
-
-  for(number = 0; number < num_points; number++){
-    *iBufferPtr++ = (double)*complexVectorPtr++;
-    *qBufferPtr++ = (double)*complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32fc_deinterleave_64f_x2_a_H */
diff --git a/volk/kernels/volk/volk_32fc_deinterleave_imag_32f.h 
b/volk/kernels/volk/volk_32fc_deinterleave_imag_32f.h
deleted file mode 100644
index a590c83..0000000
--- a/volk/kernels/volk/volk_32fc_deinterleave_imag_32f.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_deinterleave_imag_32f_a_H
-#define INCLUDED_volk_32fc_deinterleave_imag_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-/*!
-  \brief Deinterleaves the complex vector into Q vector data
-  \param complexVector The complex input vector
-  \param qBuffer The Q buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_32fc_deinterleave_imag_32f_a_avx(float* qBuffer, const 
lv_32fc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const unsigned int eighthPoints = num_points / 8;
-  const float* complexVectorPtr = (const float*)complexVector;
-  float* qBufferPtr = qBuffer;
-
-  __m256 cplxValue1, cplxValue2, complex1, complex2, qValue;
-  for(;number < eighthPoints; number++){
-
-    cplxValue1 = _mm256_load_ps(complexVectorPtr);
-    complexVectorPtr += 8;
-
-    cplxValue2 = _mm256_load_ps(complexVectorPtr);
-    complexVectorPtr += 8;
-
-    complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
-    complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
-
-    // Arrange in q1q2q3q4 format
-    qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
-
-    _mm256_store_ps(qBufferPtr, qValue);
-
-    qBufferPtr += 8;
-  }
-
-  number = eighthPoints * 8;
-  for(; number < num_points; number++){
-    complexVectorPtr++;
-    *qBufferPtr++ = *complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Deinterleaves the complex vector into Q vector data
-  \param complexVector The complex input vector
-  \param qBuffer The Q buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_32fc_deinterleave_imag_32f_a_sse(float* qBuffer, const 
lv_32fc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;
-
-  const float* complexVectorPtr = (const float*)complexVector;
-  float* qBufferPtr = qBuffer;
-
-  __m128 cplxValue1, cplxValue2, iValue;
-  for(;number < quarterPoints; number++){
-
-    cplxValue1 = _mm_load_ps(complexVectorPtr);
-    complexVectorPtr += 4;
-
-    cplxValue2 = _mm_load_ps(complexVectorPtr);
-    complexVectorPtr += 4;
-
-    // Arrange in q1q2q3q4 format
-    iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
-
-    _mm_store_ps(qBufferPtr, iValue);
-
-    qBufferPtr += 4;
-  }
-
-  number = quarterPoints * 4;
-  for(; number < num_points; number++){
-    complexVectorPtr++;
-    *qBufferPtr++ = *complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-/*!
-  \brief Deinterleaves the complex vector into Q vector data
-  \param complexVector The complex input vector
-  \param qBuffer The Q buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_32fc_deinterleave_imag_32f_neon(float* qBuffer, const 
lv_32fc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  unsigned int quarter_points = num_points / 4;
-  const float* complexVectorPtr = (float*)complexVector;
-  float* qBufferPtr = qBuffer;
-  float32x4x2_t complexInput;
-
-  for(number = 0; number < quarter_points; number++){
-    complexInput = vld2q_f32(complexVectorPtr);
-    vst1q_f32( qBufferPtr, complexInput.val[1] );
-    complexVectorPtr += 8;
-    qBufferPtr += 4;
-  }
-
-  for(number = quarter_points*4; number < num_points; number++){
-    complexVectorPtr++;
-    *qBufferPtr++ = *complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Deinterleaves the complex vector into Q vector data
-  \param complexVector The complex input vector
-  \param qBuffer The I buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_32fc_deinterleave_imag_32f_generic(float* qBuffer, 
const lv_32fc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const float* complexVectorPtr = (float*)complexVector;
-  float* qBufferPtr = qBuffer;
-  for(number = 0; number < num_points; number++){
-    complexVectorPtr++;
-    *qBufferPtr++ = *complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32fc_deinterleave_imag_32f_a_H */
diff --git a/volk/kernels/volk/volk_32fc_deinterleave_real_32f.h 
b/volk/kernels/volk/volk_32fc_deinterleave_real_32f.h
deleted file mode 100644
index c0e8d8f..0000000
--- a/volk/kernels/volk/volk_32fc_deinterleave_real_32f.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_deinterleave_real_32f_a_H
-#define INCLUDED_volk_32fc_deinterleave_real_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Deinterleaves the complex vector into I vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_32fc_deinterleave_real_32f_a_sse(float* iBuffer, const 
lv_32fc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;
-
-  const float* complexVectorPtr = (const float*)complexVector;
-  float* iBufferPtr = iBuffer;
-
-  __m128 cplxValue1, cplxValue2, iValue;
-  for(;number < quarterPoints; number++){
-
-    cplxValue1 = _mm_load_ps(complexVectorPtr);
-    complexVectorPtr += 4;
-
-    cplxValue2 = _mm_load_ps(complexVectorPtr);
-    complexVectorPtr += 4;
-
-    // Arrange in i1i2i3i4 format
-    iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
-
-    _mm_store_ps(iBufferPtr, iValue);
-
-    iBufferPtr += 4;
-  }
-
-  number = quarterPoints * 4;
-  for(; number < num_points; number++){
-    *iBufferPtr++ = *complexVectorPtr++;
-    complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Deinterleaves the complex vector into I vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_32fc_deinterleave_real_32f_generic(float* iBuffer, 
const lv_32fc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const float* complexVectorPtr = (float*)complexVector;
-  float* iBufferPtr = iBuffer;
-  for(number = 0; number < num_points; number++){
-    *iBufferPtr++ = *complexVectorPtr++;
-    complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-/*!
-  \brief Deinterleaves the complex vector into I vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_32fc_deinterleave_real_32f_neon(float* iBuffer, const 
lv_32fc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  unsigned int quarter_points = num_points / 4;
-  const float* complexVectorPtr = (float*)complexVector;
-  float* iBufferPtr = iBuffer;
-  float32x4x2_t complexInput;
-
-  for(number = 0; number < quarter_points; number++){
-    complexInput = vld2q_f32(complexVectorPtr);
-    vst1q_f32( iBufferPtr, complexInput.val[0] );
-    complexVectorPtr += 8;
-    iBufferPtr += 4;
-  }
-
-  for(number = quarter_points*4; number < num_points; number++){
-    *iBufferPtr++ = *complexVectorPtr++;
-    complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_NEON */
-
-#endif /* INCLUDED_volk_32fc_deinterleave_real_32f_a_H */
diff --git a/volk/kernels/volk/volk_32fc_deinterleave_real_64f.h 
b/volk/kernels/volk/volk_32fc_deinterleave_real_64f.h
deleted file mode 100644
index eb1df38..0000000
--- a/volk/kernels/volk/volk_32fc_deinterleave_real_64f.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_deinterleave_real_64f_a_H
-#define INCLUDED_volk_32fc_deinterleave_real_64f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-/*!
-  \brief Deinterleaves the complex vector into I vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_32fc_deinterleave_real_64f_a_sse2(double* iBuffer, 
const lv_32fc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-
-  const float* complexVectorPtr = (float*)complexVector;
-  double* iBufferPtr = iBuffer;
-
-  const unsigned int halfPoints = num_points / 2;
-  __m128 cplxValue, fVal;
-  __m128d dVal;
-  for(;number < halfPoints; number++){
-
-    cplxValue = _mm_load_ps(complexVectorPtr);
-    complexVectorPtr += 4;
-
-    // Arrange in i1i2i1i2 format
-    fVal = _mm_shuffle_ps(cplxValue, cplxValue, _MM_SHUFFLE(2,0,2,0));
-    dVal = _mm_cvtps_pd(fVal);
-    _mm_store_pd(iBufferPtr, dVal);
-
-    iBufferPtr += 2;
-  }
-
-  number = halfPoints * 2;
-  for(; number < num_points; number++){
-    *iBufferPtr++ = (double)*complexVectorPtr++;
-    complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Deinterleaves the complex vector into I vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_32fc_deinterleave_real_64f_generic(double* iBuffer, 
const lv_32fc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const float* complexVectorPtr = (float*)complexVector;
-  double* iBufferPtr = iBuffer;
-  for(number = 0; number < num_points; number++){
-    *iBufferPtr++ = (double)*complexVectorPtr++;
-    complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32fc_deinterleave_real_64f_a_H */
diff --git a/volk/kernels/volk/volk_32fc_index_max_16u.h 
b/volk/kernels/volk/volk_32fc_index_max_16u.h
deleted file mode 100644
index bc47f79..0000000
--- a/volk/kernels/volk/volk_32fc_index_max_16u.h
+++ /dev/null
@@ -1,240 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_index_max_16u_a_H
-#define INCLUDED_volk_32fc_index_max_16u_a_H
-
-#include <volk/volk_common.h>
-#include<inttypes.h>
-#include<stdio.h>
-#include<volk/volk_complex.h>
-
-#ifdef LV_HAVE_SSE3
-#include<xmmintrin.h>
-#include<pmmintrin.h>
-
-
-static inline void volk_32fc_index_max_16u_a_sse3(unsigned int* target, 
lv_32fc_t* src0, unsigned int num_points) {
-
-  const unsigned int num_bytes = num_points*8;
-
-  union bit128 holderf;
-  union bit128 holderi;
-  float sq_dist = 0.0;
-
-
-
-
-  union bit128 xmm5, xmm4;
-  __m128 xmm1, xmm2, xmm3;
-  __m128i xmm8, xmm11, xmm12, xmmfive, xmmfour, xmm9, holder0, holder1, xmm10;
-
-  xmm5.int_vec = xmmfive = _mm_setzero_si128();
-  xmm4.int_vec = xmmfour = _mm_setzero_si128();
-  holderf.int_vec = holder0 = _mm_setzero_si128();
-  holderi.int_vec = holder1 = _mm_setzero_si128();
-
-
-  int bound = num_bytes >> 5;
-  int leftovers0 = (num_bytes >> 4) & 1;
-  int leftovers1 = (num_bytes >> 3) & 1;
-  int i = 0;
-
-
-  xmm8 = _mm_set_epi32(3, 2, 1, 0);//remember the crazy reverse order!
-  xmm9 = xmm8 = _mm_setzero_si128();
-  xmm10 = _mm_set_epi32(4, 4, 4, 4);
-  xmm3 = _mm_setzero_ps();
-;
-
-  //printf("%f, %f, %f, %f\n", ((float*)&xmm10)[0], ((float*)&xmm10)[1], 
((float*)&xmm10)[2], ((float*)&xmm10)[3]);
-
-  for(; i < bound; ++i) {
-
-    xmm1 = _mm_load_ps((float*)src0);
-    xmm2 = _mm_load_ps((float*)&src0[2]);
-
-
-    src0 += 4;
-
-
-    xmm1 = _mm_mul_ps(xmm1, xmm1);
-    xmm2 = _mm_mul_ps(xmm2, xmm2);
-
-
-    xmm1 = _mm_hadd_ps(xmm1, xmm2);
-
-    xmm3 = _mm_max_ps(xmm1, xmm3);
-
-    xmm4.float_vec = _mm_cmplt_ps(xmm1, xmm3);
-    xmm5.float_vec = _mm_cmpeq_ps(xmm1, xmm3);
-
-
-
-    xmm11 = _mm_and_si128(xmm8, xmm5.int_vec);
-    xmm12 = _mm_and_si128(xmm9, xmm4.int_vec);
-
-    xmm9 = _mm_add_epi32(xmm11,  xmm12);
-
-    xmm8 = _mm_add_epi32(xmm8, xmm10);
-
-
-    //printf("%f, %f, %f, %f\n", ((float*)&xmm3)[0], ((float*)&xmm3)[1], 
((float*)&xmm3)[2], ((float*)&xmm3)[3]);
-    //printf("%u, %u, %u, %u\n", ((uint32_t*)&xmm10)[0], 
((uint32_t*)&xmm10)[1], ((uint32_t*)&xmm10)[2], ((uint32_t*)&xmm10)[3]);
-
-  }
-
-
-  for(i = 0; i < leftovers0; ++i) {
-
-
-    xmm2 = _mm_load_ps((float*)src0);
-
-    xmm1 = _mm_movelh_ps(bit128_p(&xmm8)->float_vec, 
bit128_p(&xmm8)->float_vec);
-    xmm8 = bit128_p(&xmm1)->int_vec;
-
-    xmm2 = _mm_mul_ps(xmm2, xmm2);
-
-    src0 += 2;
-
-    xmm1 = _mm_hadd_ps(xmm2, xmm2);
-
-    xmm3 = _mm_max_ps(xmm1, xmm3);
-
-    xmm10 = _mm_set_epi32(2, 2, 2, 2);//load1_ps((float*)&init[2]);
-
-
-    xmm4.float_vec = _mm_cmplt_ps(xmm1, xmm3);
-    xmm5.float_vec = _mm_cmpeq_ps(xmm1, xmm3);
-
-
-
-    xmm11 = _mm_and_si128(xmm8, xmm5.int_vec);
-    xmm12 = _mm_and_si128(xmm9, xmm4.int_vec);
-
-    xmm9 = _mm_add_epi32(xmm11, xmm12);
-
-    xmm8 = _mm_add_epi32(xmm8, xmm10);
-    //printf("egads%u, %u, %u, %u\n", ((uint32_t*)&xmm9)[0], 
((uint32_t*)&xmm9)[1], ((uint32_t*)&xmm9)[2], ((uint32_t*)&xmm9)[3]);
-
-  }
-
-
-
-
-  for(i = 0; i < leftovers1; ++i) {
-    //printf("%u, %u, %u, %u\n", ((uint32_t*)&xmm9)[0], ((uint32_t*)&xmm9)[1], 
((uint32_t*)&xmm9)[2], ((uint32_t*)&xmm9)[3]);
-
-
-    sq_dist = lv_creal(src0[0]) * lv_creal(src0[0]) + lv_cimag(src0[0]) * 
lv_cimag(src0[0]);
-
-    xmm2 = _mm_load1_ps(&sq_dist);
-
-    xmm1 = xmm3;
-
-    xmm3 = _mm_max_ss(xmm3, xmm2);
-
-
-
-    xmm4.float_vec = _mm_cmplt_ps(xmm1, xmm3);
-    xmm5.float_vec = _mm_cmpeq_ps(xmm1, xmm3);
-
-
-    xmm8 = _mm_shuffle_epi32(xmm8, 0x00);
-
-    xmm11 = _mm_and_si128(xmm8, xmm4.int_vec);
-    xmm12 = _mm_and_si128(xmm9, xmm5.int_vec);
-
-
-    xmm9 = _mm_add_epi32(xmm11, xmm12);
-
-  }
-
-  //printf("%f, %f, %f, %f\n", ((float*)&xmm3)[0], ((float*)&xmm3)[1], 
((float*)&xmm3)[2], ((float*)&xmm3)[3]);
-
-  //printf("%u, %u, %u, %u\n", ((uint32_t*)&xmm9)[0], ((uint32_t*)&xmm9)[1], 
((uint32_t*)&xmm9)[2], ((uint32_t*)&xmm9)[3]);
-
-  _mm_store_ps((float*)&(holderf.f), xmm3);
-  _mm_store_si128(&(holderi.int_vec), xmm9);
-
-  target[0] = holderi.i[0];
-  sq_dist = holderf.f[0];
-  target[0] = (holderf.f[1] > sq_dist) ? holderi.i[1] : target[0];
-  sq_dist = (holderf.f[1] > sq_dist) ? holderf.f[1] : sq_dist;
-  target[0] = (holderf.f[2] > sq_dist) ? holderi.i[2] : target[0];
-  sq_dist = (holderf.f[2] > sq_dist) ? holderf.f[2] : sq_dist;
-  target[0] = (holderf.f[3] > sq_dist) ? holderi.i[3] : target[0];
-  sq_dist = (holderf.f[3] > sq_dist) ? holderf.f[3] : sq_dist;
-
-
-
-  /*
-  float placeholder = 0.0;
-  uint32_t temp0, temp1;
-  unsigned int g0 = (((float*)&xmm3)[0] > ((float*)&xmm3)[1]);
-  unsigned int l0 = g0 ^ 1;
-
-  unsigned int g1 = (((float*)&xmm3)[1] > ((float*)&xmm3)[2]);
-  unsigned int l1 = g1 ^ 1;
-
-  temp0 = g0 * ((uint32_t*)&xmm9)[0] + l0 * ((uint32_t*)&xmm9)[1];
-  temp1 = g0 * ((uint32_t*)&xmm9)[2] + l0 * ((uint32_t*)&xmm9)[3];
-  sq_dist = g0 * ((float*)&xmm3)[0] + l0 * ((float*)&xmm3)[1];
-  placeholder = g0 * ((float*)&xmm3)[2] + l0 * ((float*)&xmm3)[3];
-
-  g0 = (sq_dist > placeholder);
-  l0 = g0 ^ 1;
-  target[0] = g0 * temp0 + l0 * temp1;
-  */
-
-}
-
-#endif /*LV_HAVE_SSE3*/
-
-#ifdef LV_HAVE_GENERIC
-static inline void volk_32fc_index_max_16u_generic(unsigned int* target, 
lv_32fc_t* src0, unsigned int num_points) {
-
-  const unsigned int num_bytes = num_points*8;
-
-  float sq_dist = 0.0;
-  float max = 0.0;
-  unsigned int index = 0;
-
-  unsigned int i = 0;
-
-  for(; i < num_bytes >> 3; ++i) {
-
-    sq_dist = lv_creal(src0[i]) * lv_creal(src0[i]) + lv_cimag(src0[i]) * 
lv_cimag(src0[i]);
-
-    index = sq_dist > max ? i : index;
-    max = sq_dist > max ? sq_dist : max;
-
-
-  }
-  target[0] = index;
-
-}
-
-#endif /*LV_HAVE_GENERIC*/
-
-
-#endif /*INCLUDED_volk_32fc_index_max_16u_a_H*/
diff --git a/volk/kernels/volk/volk_32fc_magnitude_32f.h 
b/volk/kernels/volk/volk_32fc_magnitude_32f.h
deleted file mode 100644
index d057b70..0000000
--- a/volk/kernels/volk/volk_32fc_magnitude_32f.h
+++ /dev/null
@@ -1,472 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_magnitude_32f_u_H
-#define INCLUDED_volk_32fc_magnitude_32f_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <math.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-  /*!
-    \brief Calculates the magnitude of the complexVector and stores the 
results in the magnitudeVector
-    \param complexVector The vector containing the complex input values
-    \param magnitudeVector The vector containing the real output values
-    \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-  */
-static inline void volk_32fc_magnitude_32f_u_avx(float* magnitudeVector, const 
lv_32fc_t* complexVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int eighthPoints = num_points / 8;
-
-    const float* complexVectorPtr = (float*)complexVector;
-    float* magnitudeVectorPtr = magnitudeVector;
-
-    __m256 cplxValue1, cplxValue2, complex1, complex2, result;
-    for(;number < eighthPoints; number++){
-      cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
-      complexVectorPtr += 8;
-
-      cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
-      complexVectorPtr += 8;
-
-      cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
-      cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
-
-      complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
-      complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
-
-      result = _mm256_hadd_ps(complex1, complex2); // Add the I2 and Q2 values
-
-      result = _mm256_sqrt_ps(result);
-
-      _mm256_storeu_ps(magnitudeVectorPtr, result);
-      magnitudeVectorPtr += 8;
-    }
-
-    number = eighthPoints * 8;
-    for(; number < num_points; number++){
-      float val1Real = *complexVectorPtr++;
-      float val1Imag = *complexVectorPtr++;
-      *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * 
val1Imag));
-    }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE3
-#include <pmmintrin.h>
-  /*!
-    \brief Calculates the magnitude of the complexVector and stores the 
results in the magnitudeVector
-    \param complexVector The vector containing the complex input values
-    \param magnitudeVector The vector containing the real output values
-    \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-  */
-static inline void volk_32fc_magnitude_32f_u_sse3(float* magnitudeVector, 
const lv_32fc_t* complexVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    const float* complexVectorPtr = (float*)complexVector;
-    float* magnitudeVectorPtr = magnitudeVector;
-
-    __m128 cplxValue1, cplxValue2, result;
-    for(;number < quarterPoints; number++){
-      cplxValue1 = _mm_loadu_ps(complexVectorPtr);
-      complexVectorPtr += 4;
-
-      cplxValue2 = _mm_loadu_ps(complexVectorPtr);
-      complexVectorPtr += 4;
-
-      cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
-      cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
-
-      result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
-
-      result = _mm_sqrt_ps(result);
-
-      _mm_storeu_ps(magnitudeVectorPtr, result);
-      magnitudeVectorPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(; number < num_points; number++){
-      float val1Real = *complexVectorPtr++;
-      float val1Imag = *complexVectorPtr++;
-      *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * 
val1Imag));
-    }
-}
-#endif /* LV_HAVE_SSE3 */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-  /*!
-    \brief Calculates the magnitude of the complexVector and stores the 
results in the magnitudeVector
-    \param complexVector The vector containing the complex input values
-    \param magnitudeVector The vector containing the real output values
-    \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-  */
-static inline void volk_32fc_magnitude_32f_u_sse(float* magnitudeVector, const 
lv_32fc_t* complexVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    const float* complexVectorPtr = (float*)complexVector;
-    float* magnitudeVectorPtr = magnitudeVector;
-
-    __m128 cplxValue1, cplxValue2, iValue, qValue, result;
-    for(;number < quarterPoints; number++){
-      cplxValue1 = _mm_loadu_ps(complexVectorPtr);
-      complexVectorPtr += 4;
-
-      cplxValue2 = _mm_loadu_ps(complexVectorPtr);
-      complexVectorPtr += 4;
-
-      // Arrange in i1i2i3i4 format
-      iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
-      // Arrange in q1q2q3q4 format
-      qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
-
-      iValue = _mm_mul_ps(iValue, iValue); // Square the I values
-      qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
-
-      result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
-
-      result = _mm_sqrt_ps(result);
-
-      _mm_storeu_ps(magnitudeVectorPtr, result);
-      magnitudeVectorPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(; number < num_points; number++){
-       float val1Real = *complexVectorPtr++;
-       float val1Imag = *complexVectorPtr++;
-      *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * 
val1Imag));
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Calculates the magnitude of the complexVector and stores the 
results in the magnitudeVector
-    \param complexVector The vector containing the complex input values
-    \param magnitudeVector The vector containing the real output values
-    \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-  */
-static inline void volk_32fc_magnitude_32f_generic(float* magnitudeVector, 
const lv_32fc_t* complexVector, unsigned int num_points){
-  const float* complexVectorPtr = (float*)complexVector;
-  float* magnitudeVectorPtr = magnitudeVector;
-  unsigned int number = 0;
-  for(number = 0; number < num_points; number++){
-    const float real = *complexVectorPtr++;
-    const float imag = *complexVectorPtr++;
-    *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
-#ifndef INCLUDED_volk_32fc_magnitude_32f_a_H
-#define INCLUDED_volk_32fc_magnitude_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <math.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-  /*!
-    \brief Calculates the magnitude of the complexVector and stores the 
results in the magnitudeVector
-    \param complexVector The vector containing the complex input values
-    \param magnitudeVector The vector containing the real output values
-    \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-  */
-static inline void volk_32fc_magnitude_32f_a_avx(float* magnitudeVector, const 
lv_32fc_t* complexVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int eighthPoints = num_points / 8;
-
-    const float* complexVectorPtr = (float*)complexVector;
-    float* magnitudeVectorPtr = magnitudeVector;
-
-    __m256 cplxValue1, cplxValue2, complex1, complex2, result;
-    for(;number < eighthPoints; number++){
-      cplxValue1 = _mm256_load_ps(complexVectorPtr);
-      complexVectorPtr += 8;
-
-      cplxValue2 = _mm256_load_ps(complexVectorPtr);
-      complexVectorPtr += 8;
-
-      cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
-      cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
-
-      complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
-      complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
-
-      result = _mm256_hadd_ps(complex1, complex2); // Add the I2 and Q2 values
-
-      result = _mm256_sqrt_ps(result);
-
-      _mm256_store_ps(magnitudeVectorPtr, result);
-      magnitudeVectorPtr += 8;
-    }
-
-    number = eighthPoints * 8;
-    for(; number < num_points; number++){
-      float val1Real = *complexVectorPtr++;
-      float val1Imag = *complexVectorPtr++;
-      *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * 
val1Imag));
-    }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE3
-#include <pmmintrin.h>
-  /*!
-    \brief Calculates the magnitude of the complexVector and stores the 
results in the magnitudeVector
-    \param complexVector The vector containing the complex input values
-    \param magnitudeVector The vector containing the real output values
-    \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-  */
-static inline void volk_32fc_magnitude_32f_a_sse3(float* magnitudeVector, 
const lv_32fc_t* complexVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    const float* complexVectorPtr = (float*)complexVector;
-    float* magnitudeVectorPtr = magnitudeVector;
-
-    __m128 cplxValue1, cplxValue2, result;
-    for(;number < quarterPoints; number++){
-      cplxValue1 = _mm_load_ps(complexVectorPtr);
-      complexVectorPtr += 4;
-
-      cplxValue2 = _mm_load_ps(complexVectorPtr);
-      complexVectorPtr += 4;
-
-      cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
-      cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
-
-      result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
-
-      result = _mm_sqrt_ps(result);
-
-      _mm_store_ps(magnitudeVectorPtr, result);
-      magnitudeVectorPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(; number < num_points; number++){
-      float val1Real = *complexVectorPtr++;
-      float val1Imag = *complexVectorPtr++;
-      *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * 
val1Imag));
-    }
-}
-#endif /* LV_HAVE_SSE3 */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-  /*!
-    \brief Calculates the magnitude of the complexVector and stores the 
results in the magnitudeVector
-    \param complexVector The vector containing the complex input values
-    \param magnitudeVector The vector containing the real output values
-    \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-  */
-static inline void volk_32fc_magnitude_32f_a_sse(float* magnitudeVector, const 
lv_32fc_t* complexVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    const float* complexVectorPtr = (float*)complexVector;
-    float* magnitudeVectorPtr = magnitudeVector;
-
-    __m128 cplxValue1, cplxValue2, iValue, qValue, result;
-    for(;number < quarterPoints; number++){
-      cplxValue1 = _mm_load_ps(complexVectorPtr);
-      complexVectorPtr += 4;
-
-      cplxValue2 = _mm_load_ps(complexVectorPtr);
-      complexVectorPtr += 4;
-
-      // Arrange in i1i2i3i4 format
-      iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
-      // Arrange in q1q2q3q4 format
-      qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
-
-      iValue = _mm_mul_ps(iValue, iValue); // Square the I values
-      qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
-
-      result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
-
-      result = _mm_sqrt_ps(result);
-
-      _mm_store_ps(magnitudeVectorPtr, result);
-      magnitudeVectorPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(; number < num_points; number++){
-       float val1Real = *complexVectorPtr++;
-       float val1Imag = *complexVectorPtr++;
-      *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * 
val1Imag));
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Calculates the magnitude of the complexVector and stores the 
results in the magnitudeVector
-    \param complexVector The vector containing the complex input values
-    \param magnitudeVector The vector containing the real output values
-    \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-  */
-static inline void volk_32fc_magnitude_32f_a_generic(float* magnitudeVector, 
const lv_32fc_t* complexVector, unsigned int num_points){
-  const float* complexVectorPtr = (float*)complexVector;
-  float* magnitudeVectorPtr = magnitudeVector;
-  unsigned int number = 0;
-  for(number = 0; number < num_points; number++){
-    const float real = *complexVectorPtr++;
-    const float imag = *complexVectorPtr++;
-    *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-
-  /*!
-    \brief Calculates the magnitude of the complexVector and stores the 
results in the magnitudeVector
-    \param complexVector The vector containing the complex input values
-    \param magnitudeVector The vector containing the real output values
-    \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-  */
-static inline void volk_32fc_magnitude_32f_neon(float* magnitudeVector, const 
lv_32fc_t* complexVector, unsigned int num_points){
-    unsigned int number;
-    unsigned int quarter_points = num_points / 4;
-    const float* complexVectorPtr = (float*)complexVector;
-    float* magnitudeVectorPtr = magnitudeVector;
-
-    float32x4x2_t complex_vec;
-    float32x4_t magnitude_vec;
-    for(number = 0; number < quarter_points; number++){
-        complex_vec = vld2q_f32(complexVectorPtr);
-        complex_vec.val[0] = vmulq_f32(complex_vec.val[0], complex_vec.val[0]);
-        magnitude_vec = vmlaq_f32(complex_vec.val[0], complex_vec.val[1], 
complex_vec.val[1]);
-    magnitude_vec = vrsqrteq_f32(magnitude_vec);
-    magnitude_vec = vrecpeq_f32( magnitude_vec ); // no plain ol' sqrt
-        vst1q_f32(magnitudeVectorPtr, magnitude_vec);
-
-        complexVectorPtr += 8;
-        magnitudeVectorPtr += 4;
-    }
-
-    for(number = quarter_points*4; number < num_points; number++){
-      const float real = *complexVectorPtr++;
-      const float imag = *complexVectorPtr++;
-      *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
-    }
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_NEON
-  /*!
-    \brief Calculates the magnitude of the complexVector and stores the 
results in the magnitudeVector
-
-    This is an approximation from "Streamlining Digital Signal Processing" by
-    Richard Lyons. Apparently max error is about 1% and mean error is about 
0.6%.
-    The basic idea is to do a weighted sum of the abs. value of imag and real 
parts
-    where weight A is always assigned to max(imag, real) and B is always 
min(imag,real).
-    There are two pairs of cofficients chosen based on whether min <= 0.4142 
max.
-    This method is called equiripple-error magnitude estimation proposed by 
Filip in '73
-
-    \param complexVector The vector containing the complex input values
-    \param magnitudeVector The vector containing the real output values
-    \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-  */
-static inline void volk_32fc_magnitude_32f_neon_fancy_sweet(float* 
magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
-    unsigned int number;
-    unsigned int quarter_points = num_points / 4;
-    const float* complexVectorPtr = (float*)complexVector;
-    float* magnitudeVectorPtr = magnitudeVector;
-
-    const float threshold = 0.4142135;
-
-    float32x4_t a_vec, b_vec, a_high, a_low, b_high, b_low;
-    a_high = vdupq_n_f32( 0.84 );
-    b_high = vdupq_n_f32( 0.561);
-    a_low  = vdupq_n_f32( 0.99 );
-    b_low  = vdupq_n_f32( 0.197);
-
-    uint32x4_t comp0, comp1;
-
-    float32x4x2_t complex_vec;
-    float32x4_t min_vec, max_vec, magnitude_vec;
-    float32x4_t real_abs, imag_abs;
-    for(number = 0; number < quarter_points; number++){
-        complex_vec = vld2q_f32(complexVectorPtr);
-
-        real_abs = vabsq_f32(complex_vec.val[0]);
-        imag_abs = vabsq_f32(complex_vec.val[1]);
-
-        min_vec = vminq_f32(real_abs, imag_abs);
-        max_vec = vmaxq_f32(real_abs, imag_abs);
-
-        // effective branch to choose coefficient pair.
-        comp0 = vcgtq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
-        comp1 = vcleq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
-
-        // and 0s or 1s with coefficients from previous effective branch
-        a_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, 
(int32x4_t)a_high), vandq_s32((int32x4_t)comp1, (int32x4_t)a_low));
-        b_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, 
(int32x4_t)b_high), vandq_s32((int32x4_t)comp1, (int32x4_t)b_low));
-
-        // coefficients chosen, do the weighted sum
-        min_vec = vmulq_f32(min_vec, b_vec);
-        max_vec = vmulq_f32(max_vec, a_vec);
-
-        magnitude_vec = vaddq_f32(min_vec, max_vec);
-        vst1q_f32(magnitudeVectorPtr, magnitude_vec);
-
-        complexVectorPtr += 8;
-        magnitudeVectorPtr += 4;
-    }
-
-    for(number = quarter_points*4; number < num_points; number++){
-      const float real = *complexVectorPtr++;
-      const float imag = *complexVectorPtr++;
-      *magnitudeVectorPtr++ = sqrtf((real*real) + (imag*imag));
-    }
-}
-#endif /* LV_HAVE_NEON */
-
-
-#ifdef LV_HAVE_ORC
-  /*!
-    \brief Calculates the magnitude of the complexVector and stores the 
results in the magnitudeVector
-    \param complexVector The vector containing the complex input values
-    \param magnitudeVector The vector containing the real output values
-    \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-  */
-extern void volk_32fc_magnitude_32f_a_orc_impl(float* magnitudeVector, const 
lv_32fc_t* complexVector, unsigned int num_points);
-static inline void volk_32fc_magnitude_32f_u_orc(float* magnitudeVector, const 
lv_32fc_t* complexVector, unsigned int num_points){
-    volk_32fc_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, 
num_points);
-}
-#endif /* LV_HAVE_ORC */
-
-
-#endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */
diff --git a/volk/kernels/volk/volk_32fc_magnitude_squared_32f.h 
b/volk/kernels/volk/volk_32fc_magnitude_squared_32f.h
deleted file mode 100644
index ed8eac6..0000000
--- a/volk/kernels/volk/volk_32fc_magnitude_squared_32f.h
+++ /dev/null
@@ -1,380 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_magnitude_squared_32f_u_H
-#define INCLUDED_volk_32fc_magnitude_squared_32f_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <math.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-  /*!
-    \brief Calculates the magnitude squared of the complexVector and stores 
the results in the magnitudeVector
-    \param complexVector The vector containing the complex input values
-    \param magnitudeVector The vector containing the real output values
-    \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-  */
-static inline void volk_32fc_magnitude_squared_32f_u_avx(float* 
magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int eighthPoints = num_points / 8;
-
-    const float* complexVectorPtr = (float*)complexVector;
-    float* magnitudeVectorPtr = magnitudeVector;
-
-    __m256 cplxValue1, cplxValue2, complex1, complex2, result;
-    for(;number < eighthPoints; number++){
-      cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
-      complexVectorPtr += 8;
-
-      cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
-      complexVectorPtr += 8;
-
-      cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
-      cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
-
-      complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
-      complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
-
-      result = _mm256_hadd_ps(complex1, complex2); // Add the I2 and Q2 values
-
-      _mm256_storeu_ps(magnitudeVectorPtr, result);
-      magnitudeVectorPtr += 8;
-    }
-
-    number = eighthPoints * 8;
-    for(; number < num_points; number++){
-      float val1Real = *complexVectorPtr++;
-      float val1Imag = *complexVectorPtr++;
-      *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
-    }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE3
-#include <pmmintrin.h>
-  /*!
-    \brief Calculates the magnitude squared of the complexVector and stores 
the results in the magnitudeVector
-    \param complexVector The vector containing the complex input values
-    \param magnitudeVector The vector containing the real output values
-    \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-  */
-static inline void volk_32fc_magnitude_squared_32f_u_sse3(float* 
magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    const float* complexVectorPtr = (float*)complexVector;
-    float* magnitudeVectorPtr = magnitudeVector;
-
-    __m128 cplxValue1, cplxValue2, result;
-    for(;number < quarterPoints; number++){
-      cplxValue1 = _mm_loadu_ps(complexVectorPtr);
-      complexVectorPtr += 4;
-
-      cplxValue2 = _mm_loadu_ps(complexVectorPtr);
-      complexVectorPtr += 4;
-
-      cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
-      cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
-
-      result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
-
-      _mm_storeu_ps(magnitudeVectorPtr, result);
-      magnitudeVectorPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(; number < num_points; number++){
-      float val1Real = *complexVectorPtr++;
-      float val1Imag = *complexVectorPtr++;
-      *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
-    }
-}
-#endif /* LV_HAVE_SSE3 */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-  /*!
-    \brief Calculates the magnitude squared of the complexVector and stores 
the results in the magnitudeVector
-    \param complexVector The vector containing the complex input values
-    \param magnitudeVector The vector containing the real output values
-    \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-  */
-static inline void volk_32fc_magnitude_squared_32f_u_sse(float* 
magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    const float* complexVectorPtr = (float*)complexVector;
-    float* magnitudeVectorPtr = magnitudeVector;
-
-    __m128 cplxValue1, cplxValue2, iValue, qValue, result;
-    for(;number < quarterPoints; number++){
-      cplxValue1 = _mm_loadu_ps(complexVectorPtr);
-      complexVectorPtr += 4;
-
-      cplxValue2 = _mm_loadu_ps(complexVectorPtr);
-      complexVectorPtr += 4;
-
-      // Arrange in i1i2i3i4 format
-      iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
-      // Arrange in q1q2q3q4 format
-      qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
-
-      iValue = _mm_mul_ps(iValue, iValue); // Square the I values
-      qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
-
-      result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
-
-      _mm_storeu_ps(magnitudeVectorPtr, result);
-      magnitudeVectorPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(; number < num_points; number++){
-       float val1Real = *complexVectorPtr++;
-       float val1Imag = *complexVectorPtr++;
-      *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Calculates the magnitude squared of the complexVector and stores 
the results in the magnitudeVector
-    \param complexVector The vector containing the complex input values
-    \param magnitudeVector The vector containing the real output values
-    \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-  */
-static inline void volk_32fc_magnitude_squared_32f_generic(float* 
magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
-  const float* complexVectorPtr = (float*)complexVector;
-  float* magnitudeVectorPtr = magnitudeVector;
-  unsigned int number = 0;
-  for(number = 0; number < num_points; number++){
-    const float real = *complexVectorPtr++;
-    const float imag = *complexVectorPtr++;
-    *magnitudeVectorPtr++ = (real*real) + (imag*imag);
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
-#ifndef INCLUDED_volk_32fc_magnitude_squared_32f_a_H
-#define INCLUDED_volk_32fc_magnitude_squared_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <math.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-  /*!
-    \brief Calculates the magnitude squared of the complexVector and stores 
the results in the magnitudeVector
-    \param complexVector The vector containing the complex input values
-    \param magnitudeVector The vector containing the real output values
-    \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-  */
-static inline void volk_32fc_magnitude_squared_32f_a_avx(float* 
magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int eighthPoints = num_points / 8;
-
-    const float* complexVectorPtr = (float*)complexVector;
-    float* magnitudeVectorPtr = magnitudeVector;
-
-    __m256 cplxValue1, cplxValue2, complex1, complex2, result;
-    for(;number < eighthPoints; number++){
-      cplxValue1 = _mm256_load_ps(complexVectorPtr);
-      complexVectorPtr += 8;
-
-      cplxValue2 = _mm256_load_ps(complexVectorPtr);
-      complexVectorPtr += 8;
-
-      cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
-      cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
-
-      complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
-      complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
-
-      result = _mm256_hadd_ps(complex1, complex2); // Add the I2 and Q2 values
-
-      _mm256_store_ps(magnitudeVectorPtr, result);
-      magnitudeVectorPtr += 8;
-    }
-
-    number = eighthPoints * 8;
-    for(; number < num_points; number++){
-      float val1Real = *complexVectorPtr++;
-      float val1Imag = *complexVectorPtr++;
-      *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
-    }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE3
-#include <pmmintrin.h>
-  /*!
-    \brief Calculates the magnitude squared of the complexVector and stores 
the results in the magnitudeVector
-    \param complexVector The vector containing the complex input values
-    \param magnitudeVector The vector containing the real output values
-    \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-  */
-static inline void volk_32fc_magnitude_squared_32f_a_sse3(float* 
magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    const float* complexVectorPtr = (float*)complexVector;
-    float* magnitudeVectorPtr = magnitudeVector;
-
-    __m128 cplxValue1, cplxValue2, result;
-    for(;number < quarterPoints; number++){
-      cplxValue1 = _mm_load_ps(complexVectorPtr);
-      complexVectorPtr += 4;
-
-      cplxValue2 = _mm_load_ps(complexVectorPtr);
-      complexVectorPtr += 4;
-
-      cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
-      cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
-
-      result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
-
-      _mm_store_ps(magnitudeVectorPtr, result);
-      magnitudeVectorPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(; number < num_points; number++){
-      float val1Real = *complexVectorPtr++;
-      float val1Imag = *complexVectorPtr++;
-      *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
-    }
-}
-#endif /* LV_HAVE_SSE3 */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-  /*!
-    \brief Calculates the magnitude squared of the complexVector and stores 
the results in the magnitudeVector
-    \param complexVector The vector containing the complex input values
-    \param magnitudeVector The vector containing the real output values
-    \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-  */
-static inline void volk_32fc_magnitude_squared_32f_a_sse(float* 
magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    const float* complexVectorPtr = (float*)complexVector;
-    float* magnitudeVectorPtr = magnitudeVector;
-
-    __m128 cplxValue1, cplxValue2, iValue, qValue, result;
-    for(;number < quarterPoints; number++){
-      cplxValue1 = _mm_load_ps(complexVectorPtr);
-      complexVectorPtr += 4;
-
-      cplxValue2 = _mm_load_ps(complexVectorPtr);
-      complexVectorPtr += 4;
-
-      // Arrange in i1i2i3i4 format
-      iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
-      // Arrange in q1q2q3q4 format
-      qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
-
-      iValue = _mm_mul_ps(iValue, iValue); // Square the I values
-      qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
-
-      result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
-
-      _mm_store_ps(magnitudeVectorPtr, result);
-      magnitudeVectorPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(; number < num_points; number++){
-       float val1Real = *complexVectorPtr++;
-       float val1Imag = *complexVectorPtr++;
-      *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-//
-
-  /*!
-    \brief Calculates the magnitude squared of the complexVector and stores 
the results in the magnitudeVector
-    \param complexVector The vector containing the complex input values
-    \param magnitudeVector The vector containing the real output values
-    \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-  */
-static inline void volk_32fc_magnitude_squared_32f_neon(float* 
magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    const float* complexVectorPtr = (float*)complexVector;
-    float* magnitudeVectorPtr = magnitudeVector;
-
-    float32x4x2_t cmplx_val;
-    float32x4_t result;
-    for(;number < quarterPoints; number++){
-      cmplx_val = vld2q_f32(complexVectorPtr);
-      complexVectorPtr += 8;
-
-      cmplx_val.val[0] = vmulq_f32(cmplx_val.val[0], cmplx_val.val[0]); // 
Square the values
-      cmplx_val.val[1] = vmulq_f32(cmplx_val.val[1], cmplx_val.val[1]); // 
Square the values
-
-      result = vaddq_f32(cmplx_val.val[0], cmplx_val.val[1]); // Add the I2 
and Q2 values
-
-      vst1q_f32(magnitudeVectorPtr, result);
-      magnitudeVectorPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(; number < num_points; number++){
-      float val1Real = *complexVectorPtr++;
-      float val1Imag = *complexVectorPtr++;
-      *magnitudeVectorPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
-    }
-}
-#endif /* LV_HAVE_NEON */
-
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Calculates the magnitude squared of the complexVector and stores 
the results in the magnitudeVector
-    \param complexVector The vector containing the complex input values
-    \param magnitudeVector The vector containing the real output values
-    \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-  */
-static inline void volk_32fc_magnitude_squared_32f_a_generic(float* 
magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points){
-  const float* complexVectorPtr = (float*)complexVector;
-  float* magnitudeVectorPtr = magnitudeVector;
-  unsigned int number = 0;
-  for(number = 0; number < num_points; number++){
-    const float real = *complexVectorPtr++;
-    const float imag = *complexVectorPtr++;
-    *magnitudeVectorPtr++ = (real*real) + (imag*imag);
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */
diff --git a/volk/kernels/volk/volk_32fc_s32f_atan2_32f.h 
b/volk/kernels/volk/volk_32fc_s32f_atan2_32f.h
deleted file mode 100644
index 2e9221b..0000000
--- a/volk/kernels/volk/volk_32fc_s32f_atan2_32f.h
+++ /dev/null
@@ -1,180 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_s32f_atan2_32f_a_H
-#define INCLUDED_volk_32fc_s32f_atan2_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <math.h>
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-#include <simdmath.h>
-#endif /* LV_HAVE_LIB_SIMDMATH */
-
-/*!
-  \brief performs the atan2 on the input vector and stores the results in the 
output vector.
-  \param outputVector The byte-aligned vector where the results will be stored.
-  \param inputVector The byte-aligned input vector containing interleaved IQ 
data (I = cos, Q = sin).
-  \param normalizeFactor The atan2 results will be divided by this 
normalization factor.
-  \param num_points The number of complex values in the input vector.
-*/
-static inline void volk_32fc_s32f_atan2_32f_a_sse4_1(float* outputVector,  
const lv_32fc_t* complexVector, const float normalizeFactor, unsigned int 
num_points){
-  const float* complexVectorPtr = (float*)complexVector;
-  float* outPtr = outputVector;
-
-  unsigned int number = 0;
-  const float invNormalizeFactor = 1.0 / normalizeFactor;
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-  const unsigned int quarterPoints = num_points / 4;
-  __m128 testVector = _mm_set_ps1(2*M_PI);
-  __m128 correctVector = _mm_set_ps1(M_PI);
-  __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
-  __m128 phase;
-  __m128 complex1, complex2, iValue, qValue;
-  __m128 keepMask;
-
-  for (; number < quarterPoints; number++) {
-    // Load IQ data:
-    complex1 = _mm_load_ps(complexVectorPtr);
-    complexVectorPtr += 4;
-    complex2 = _mm_load_ps(complexVectorPtr);
-    complexVectorPtr += 4;
-    // Deinterleave IQ data:
-    iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2,0,2,0));
-    qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3,1,3,1));
-    // Arctan to get phase:
-    phase = atan2f4(qValue, iValue);
-    // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
-    // Compare to 2pi:
-    keepMask = _mm_cmpneq_ps(phase,testVector);
-    phase = _mm_blendv_ps(correctVector, phase, keepMask);
-    // done with above correction.
-    phase = _mm_mul_ps(phase, vNormalizeFactor);
-    _mm_store_ps((float*)outPtr, phase);
-    outPtr += 4;
-  }
-  number = quarterPoints * 4;
-#endif /* LV_HAVE_SIMDMATH_H */
-
-  for (; number < num_points; number++) {
-    const float real = *complexVectorPtr++;
-    const float imag = *complexVectorPtr++;
-    *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
-  }
-}
-#endif /* LV_HAVE_SSE4_1 */
-
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-#include <simdmath.h>
-#endif /* LV_HAVE_LIB_SIMDMATH */
-
-/*!
-  \brief performs the atan2 on the input vector and stores the results in the 
output vector.
-  \param outputVector The byte-aligned vector where the results will be stored.
-  \param inputVector The byte-aligned input vector containing interleaved IQ 
data (I = cos, Q = sin).
-  \param normalizeFactor The atan2 results will be divided by this 
normalization factor.
-  \param num_points The number of complex values in the input vector.
-*/
-static inline void volk_32fc_s32f_atan2_32f_a_sse(float* outputVector,  const 
lv_32fc_t* complexVector, const float normalizeFactor, unsigned int num_points){
-  const float* complexVectorPtr = (float*)complexVector;
-  float* outPtr = outputVector;
-
-  unsigned int number = 0;
-  const float invNormalizeFactor = 1.0 / normalizeFactor;
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-  const unsigned int quarterPoints = num_points / 4;
-  __m128 testVector = _mm_set_ps1(2*M_PI);
-  __m128 correctVector = _mm_set_ps1(M_PI);
-  __m128 vNormalizeFactor = _mm_set_ps1(invNormalizeFactor);
-  __m128 phase;
-  __m128 complex1, complex2, iValue, qValue;
-  __m128 mask;
-  __m128 keepMask;
-
-  for (; number < quarterPoints; number++) {
-    // Load IQ data:
-    complex1 = _mm_load_ps(complexVectorPtr);
-    complexVectorPtr += 4;
-    complex2 = _mm_load_ps(complexVectorPtr);
-    complexVectorPtr += 4;
-    // Deinterleave IQ data:
-    iValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(2,0,2,0));
-    qValue = _mm_shuffle_ps(complex1, complex2, _MM_SHUFFLE(3,1,3,1));
-    // Arctan to get phase:
-    phase = atan2f4(qValue, iValue);
-    // When Q = 0 and I < 0, atan2f4 sucks and returns 2pi vice pi.
-    // Compare to 2pi:
-    keepMask = _mm_cmpneq_ps(phase,testVector);
-    phase = _mm_and_ps(phase, keepMask);
-    mask = _mm_andnot_ps(keepMask, correctVector);
-    phase = _mm_or_ps(phase, mask);
-    // done with above correction.
-    phase = _mm_mul_ps(phase, vNormalizeFactor);
-    _mm_store_ps((float*)outPtr, phase);
-    outPtr += 4;
-  }
-  number = quarterPoints * 4;
-#endif /* LV_HAVE_SIMDMATH_H */
-
-  for (; number < num_points; number++) {
-    const float real = *complexVectorPtr++;
-    const float imag = *complexVectorPtr++;
-    *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
-  }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief performs the atan2 on the input vector and stores the results in the 
output vector.
-  \param outputVector The vector where the results will be stored.
-  \param inputVector Input vector containing interleaved IQ data (I = cos, Q = 
sin).
-  \param normalizeFactor The atan2 results will be divided by this 
normalization factor.
-  \param num_points The number of complex values in the input vector.
-*/
-static inline void volk_32fc_s32f_atan2_32f_generic(float* outputVector, const 
lv_32fc_t* inputVector, const float normalizeFactor, unsigned int num_points){
-  float* outPtr = outputVector;
-  const float* inPtr = (float*)inputVector;
-  const float invNormalizeFactor = 1.0 / normalizeFactor;
-  unsigned int number;
-  for ( number = 0; number < num_points; number++) {
-    const float real = *inPtr++;
-    const float imag = *inPtr++;
-    *outPtr++ = atan2f(imag, real) * invNormalizeFactor;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32fc_s32f_atan2_32f_a_H */
diff --git a/volk/kernels/volk/volk_32fc_s32f_deinterleave_real_16i.h 
b/volk/kernels/volk/volk_32fc_s32f_deinterleave_real_16i.h
deleted file mode 100644
index 0f24512..0000000
--- a/volk/kernels/volk/volk_32fc_s32f_deinterleave_real_16i.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
-#define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
-
-#include <volk/volk_common.h>
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Deinterleaves the complex vector, multiply the value by the scalar, 
convert to 16t, and in I vector data
-  \param complexVector The complex input vector
-  \param scalar The value to be multiply against each of the input values
-  \param iBuffer The I buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_32fc_s32f_deinterleave_real_16i_a_sse(int16_t* 
iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int 
num_points){
-  unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;
-
-  const float* complexVectorPtr = (float*)complexVector;
-  int16_t* iBufferPtr = iBuffer;
-
-  __m128 vScalar = _mm_set_ps1(scalar);
-
-  __m128 cplxValue1, cplxValue2, iValue;
-
-  __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
-
-  for(;number < quarterPoints; number++){
-    cplxValue1 = _mm_load_ps(complexVectorPtr);
-    complexVectorPtr += 4;
-
-    cplxValue2 = _mm_load_ps(complexVectorPtr);
-    complexVectorPtr += 4;
-
-    // Arrange in i1i2i3i4 format
-    iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
-
-    iValue = _mm_mul_ps(iValue, vScalar);
-
-    _mm_store_ps(floatBuffer, iValue);
-    *iBufferPtr++ = (int16_t)(floatBuffer[0]);
-    *iBufferPtr++ = (int16_t)(floatBuffer[1]);
-    *iBufferPtr++ = (int16_t)(floatBuffer[2]);
-    *iBufferPtr++ = (int16_t)(floatBuffer[3]);
-  }
-
-  number = quarterPoints * 4;
-  iBufferPtr = &iBuffer[number];
-  for(; number < num_points; number++){
-    *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
-    complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Deinterleaves the complex vector, multiply the value by the scalar, 
convert to 16t, and in I vector data
-  \param complexVector The complex input vector
-  \param scalar The value to be multiply against each of the input values
-  \param iBuffer The I buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_32fc_s32f_deinterleave_real_16i_generic(int16_t* 
iBuffer, const lv_32fc_t* complexVector, const float scalar, unsigned int 
num_points){
-  const float* complexVectorPtr = (float*)complexVector;
-  int16_t* iBufferPtr = iBuffer;
-  unsigned int number = 0;
-  for(number = 0; number < num_points; number++){
-    *iBufferPtr++ = (int16_t)(*complexVectorPtr++ * scalar);
-    complexVectorPtr++;
-  }
-
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H */
diff --git a/volk/kernels/volk/volk_32fc_s32f_magnitude_16i.h 
b/volk/kernels/volk/volk_32fc_s32f_magnitude_16i.h
deleted file mode 100644
index b8ac93a..0000000
--- a/volk/kernels/volk/volk_32fc_s32f_magnitude_16i.h
+++ /dev/null
@@ -1,181 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
-#define INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
-
-#include <volk/volk_common.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <math.h>
-
-#ifdef LV_HAVE_SSE3
-#include <pmmintrin.h>
-/*!
-  \brief Calculates the magnitude of the complexVector, scales the resulting 
value and stores the results in the magnitudeVector
-  \param complexVector The vector containing the complex input values
-  \param scalar The scale value multiplied to the magnitude of each complex 
vector
-  \param magnitudeVector The vector containing the real output values
-  \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-*/
-static inline void volk_32fc_s32f_magnitude_16i_a_sse3(int16_t* 
magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned 
int num_points){
-  unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;
-
-  const float* complexVectorPtr = (const float*)complexVector;
-  int16_t* magnitudeVectorPtr = magnitudeVector;
-
-  __m128 vScalar = _mm_set_ps1(scalar);
-
-  __m128 cplxValue1, cplxValue2, result;
-
-  __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
-
-  for(;number < quarterPoints; number++){
-    cplxValue1 = _mm_load_ps(complexVectorPtr);
-    complexVectorPtr += 4;
-
-    cplxValue2 = _mm_load_ps(complexVectorPtr);
-    complexVectorPtr += 4;
-
-    cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
-    cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
-
-    result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
-
-    result = _mm_sqrt_ps(result);
-
-    result = _mm_mul_ps(result, vScalar);
-
-    _mm_store_ps(floatBuffer, result);
-    *magnitudeVectorPtr++ = (int16_t)(floatBuffer[0]);
-    *magnitudeVectorPtr++ = (int16_t)(floatBuffer[1]);
-    *magnitudeVectorPtr++ = (int16_t)(floatBuffer[2]);
-    *magnitudeVectorPtr++ = (int16_t)(floatBuffer[3]);
-  }
-
-  number = quarterPoints * 4;
-  magnitudeVectorPtr = &magnitudeVector[number];
-  for(; number < num_points; number++){
-    float val1Real = *complexVectorPtr++;
-    float val1Imag = *complexVectorPtr++;
-    *magnitudeVectorPtr++ = (int16_t)(sqrtf((val1Real * val1Real) + (val1Imag 
* val1Imag)) * scalar);
-  }
-}
-#endif /* LV_HAVE_SSE3 */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Calculates the magnitude of the complexVector, scales the resulting 
value and stores the results in the magnitudeVector
-  \param complexVector The vector containing the complex input values
-  \param scalar The scale value multiplied to the magnitude of each complex 
vector
-  \param magnitudeVector The vector containing the real output values
-  \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-*/
-static inline void volk_32fc_s32f_magnitude_16i_a_sse(int16_t* 
magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned 
int num_points){
-  unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;
-
-  const float* complexVectorPtr = (const float*)complexVector;
-  int16_t* magnitudeVectorPtr = magnitudeVector;
-
-  __m128 vScalar = _mm_set_ps1(scalar);
-
-  __m128 cplxValue1, cplxValue2, iValue, qValue, result;
-
-  __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
-
-  for(;number < quarterPoints; number++){
-    cplxValue1 = _mm_load_ps(complexVectorPtr);
-    complexVectorPtr += 4;
-
-    cplxValue2 = _mm_load_ps(complexVectorPtr);
-    complexVectorPtr += 4;
-
-    // Arrange in i1i2i3i4 format
-    iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
-    // Arrange in q1q2q3q4 format
-    qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
-
-    iValue = _mm_mul_ps(iValue, iValue); // Square the I values
-    qValue = _mm_mul_ps(qValue, qValue); // Square the Q Values
-
-    result = _mm_add_ps(iValue, qValue); // Add the I2 and Q2 values
-
-    result = _mm_sqrt_ps(result);
-
-    result = _mm_mul_ps(result, vScalar);
-
-    _mm_store_ps(floatBuffer, result);
-    *magnitudeVectorPtr++ = (int16_t)(floatBuffer[0]);
-    *magnitudeVectorPtr++ = (int16_t)(floatBuffer[1]);
-    *magnitudeVectorPtr++ = (int16_t)(floatBuffer[2]);
-    *magnitudeVectorPtr++ = (int16_t)(floatBuffer[3]);
-  }
-
-  number = quarterPoints * 4;
-  magnitudeVectorPtr = &magnitudeVector[number];
-  for(; number < num_points; number++){
-    float val1Real = *complexVectorPtr++;
-    float val1Imag = *complexVectorPtr++;
-    *magnitudeVectorPtr++ = (int16_t)(sqrtf((val1Real * val1Real) + (val1Imag 
* val1Imag)) * scalar);
-  }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Calculates the magnitude of the complexVector, scales the resulting 
value and stores the results in the magnitudeVector
-  \param complexVector The vector containing the complex input values
-  \param scalar The scale value multiplied to the magnitude of each complex 
vector
-  \param magnitudeVector The vector containing the real output values
-  \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-*/
-static inline void volk_32fc_s32f_magnitude_16i_generic(int16_t* 
magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned 
int num_points){
-  const float* complexVectorPtr = (float*)complexVector;
-  int16_t* magnitudeVectorPtr = magnitudeVector;
-  unsigned int number = 0;
-  for(number = 0; number < num_points; number++){
-    const float real = *complexVectorPtr++;
-    const float imag = *complexVectorPtr++;
-    *magnitudeVectorPtr++ = (int16_t)(sqrtf((real*real) + (imag*imag)) * 
scalar);
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_ORC
-/*!
-  \brief Calculates the magnitude of the complexVector, scales the resulting 
value and stores the results in the magnitudeVector
-  \param complexVector The vector containing the complex input values
-  \param scalar The scale value multiplied to the magnitude of each complex 
vector
-  \param magnitudeVector The vector containing the real output values
-  \param num_points The number of complex values in complexVector to be 
calculated and stored into cVector
-*/
-extern void volk_32fc_s32f_magnitude_16i_a_orc_impl(int16_t* magnitudeVector, 
const lv_32fc_t* complexVector, const float scalar, unsigned int num_points);
-static inline void volk_32fc_s32f_magnitude_16i_u_orc(int16_t* 
magnitudeVector, const lv_32fc_t* complexVector, const float scalar, unsigned 
int num_points){
-    volk_32fc_s32f_magnitude_16i_a_orc_impl(magnitudeVector, complexVector, 
scalar, num_points);
-}
-#endif /* LV_HAVE_ORC */
-
-
-#endif /* INCLUDED_volk_32fc_s32f_magnitude_16i_a_H */
diff --git a/volk/kernels/volk/volk_32fc_s32f_power_32fc.h 
b/volk/kernels/volk/volk_32fc_s32f_power_32fc.h
deleted file mode 100644
index 9ecca50..0000000
--- a/volk/kernels/volk/volk_32fc_s32f_power_32fc.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_s32f_power_32fc_a_H
-#define INCLUDED_volk_32fc_s32f_power_32fc_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <math.h>
-
-//! raise a complex float to a real float power
-static inline lv_32fc_t __volk_s32fc_s32f_power_s32fc_a(const lv_32fc_t exp, 
const float power){
-    const float arg = power*atan2f(lv_creal(exp), lv_cimag(exp));
-    const float mag = powf(lv_creal(exp)*lv_creal(exp) + 
lv_cimag(exp)*lv_cimag(exp), power/2);
-    return mag*lv_cmake(-cosf(arg), sinf(arg));
-}
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-#include <simdmath.h>
-#endif /* LV_HAVE_LIB_SIMDMATH */
-
-/*!
-  \brief Takes each the input complex vector value to the specified power and 
stores the results in the return vector
-  \param cVector The vector where the results will be stored
-  \param aVector The complex vector of values to be taken to a power
-  \param power The power value to be applied to each data point
-  \param num_points The number of values in aVector to be taken to the 
specified power level and stored into cVector
-*/
-static inline void volk_32fc_s32f_power_32fc_a_sse(lv_32fc_t* cVector, const 
lv_32fc_t* aVector, const float power, unsigned int num_points){
-  unsigned int number = 0;
-
-  lv_32fc_t* cPtr = cVector;
-  const lv_32fc_t* aPtr = aVector;
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-  const unsigned int quarterPoints = num_points / 4;
-  __m128 vPower = _mm_set_ps1(power);
-
-  __m128 cplxValue1, cplxValue2, magnitude, phase, iValue, qValue;
-  for(;number < quarterPoints; number++){
-
-    cplxValue1 = _mm_load_ps((float*)aPtr);
-    aPtr += 2;
-
-    cplxValue2 = _mm_load_ps((float*)aPtr);
-    aPtr += 2;
-
-    // Convert to polar coordinates
-
-    // Arrange in i1i2i3i4 format
-    iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
-    // Arrange in q1q2q3q4 format
-    qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
-
-    phase = atan2f4(qValue, iValue); // Calculate the Phase
-
-    magnitude = _mm_sqrt_ps(_mm_add_ps(_mm_mul_ps(iValue, iValue), 
_mm_mul_ps(qValue, qValue))); // Calculate the magnitude by square rooting the 
added I2 and Q2 values
-
-    // Now calculate the power of the polar coordinate data
-    magnitude = powf4(magnitude, vPower); // Take the magnitude to the 
specified power
-
-    phase = _mm_mul_ps(phase, vPower); // Multiply the phase by the specified 
power
-
-    // Convert back to cartesian coordinates
-    iValue = _mm_mul_ps( cosf4(phase), magnitude); // Multiply the cos of the 
phase by the magnitude
-    qValue = _mm_mul_ps( sinf4(phase), magnitude); // Multiply the sin of the 
phase by the magnitude
-
-    cplxValue1 = _mm_unpacklo_ps(iValue, qValue); // Interleave the lower two 
i & q values
-    cplxValue2 = _mm_unpackhi_ps(iValue, qValue); // Interleave the upper two 
i & q values
-
-    _mm_store_ps((float*)cPtr,cplxValue1); // Store the results back into the 
C container
-
-    cPtr += 2;
-
-    _mm_store_ps((float*)cPtr,cplxValue2); // Store the results back into the 
C container
-
-    cPtr += 2;
-  }
-
-  number = quarterPoints * 4;
-#endif /* LV_HAVE_LIB_SIMDMATH */
-
-  for(;number < num_points; number++){
-    *cPtr++ = __volk_s32fc_s32f_power_s32fc_a((*aPtr++), power);
-  }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Takes each the input complex vector value to the specified power 
and stores the results in the return vector
-    \param cVector The vector where the results will be stored
-    \param aVector The complex vector of values to be taken to a power
-    \param power The power value to be applied to each data point
-    \param num_points The number of values in aVector to be taken to the 
specified power level and stored into cVector
-  */
-static inline void volk_32fc_s32f_power_32fc_generic(lv_32fc_t* cVector, const 
lv_32fc_t* aVector, const float power, unsigned int num_points){
-  lv_32fc_t* cPtr = cVector;
-  const lv_32fc_t* aPtr = aVector;
-  unsigned int number = 0;
-
-  for(number = 0; number < num_points; number++){
-    *cPtr++ = __volk_s32fc_s32f_power_s32fc_a((*aPtr++), power);
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32fc_s32f_power_32fc_a_H */
diff --git a/volk/kernels/volk/volk_32fc_s32f_power_spectrum_32f.h 
b/volk/kernels/volk/volk_32fc_s32f_power_spectrum_32f.h
deleted file mode 100644
index 8927112..0000000
--- a/volk/kernels/volk/volk_32fc_s32f_power_spectrum_32f.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
-#define INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <math.h>
-
-#ifdef LV_HAVE_SSE3
-#include <pmmintrin.h>
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-#include <simdmath.h>
-#endif /* LV_HAVE_LIB_SIMDMATH */
-
-/*!
-  \brief Calculates the log10 power value for each input point
-  \param logPowerOutput The 10.0 * log10(r*r + i*i) for each data point
-  \param complexFFTInput The complex data output from the FFT point
-  \param normalizationFactor This value is divided against all the input 
values before the power is calculated
-  \param num_points The number of fft data points
-*/
-static inline void volk_32fc_s32f_power_spectrum_32f_a_sse3(float* 
logPowerOutput, const lv_32fc_t* complexFFTInput, const float 
normalizationFactor, unsigned int num_points){
-  const float* inputPtr = (const float*)complexFFTInput;
-  float* destPtr = logPowerOutput;
-  uint64_t number = 0;
-  const float iNormalizationFactor = 1.0 / normalizationFactor;
-#ifdef LV_HAVE_LIB_SIMDMATH
-  __m128 magScalar = _mm_set_ps1(10.0);
-  magScalar = _mm_div_ps(magScalar, logf4(magScalar));
-
-  __m128 invNormalizationFactor = _mm_set_ps1(iNormalizationFactor);
-
-  __m128 power;
-  __m128 input1, input2;
-  const uint64_t quarterPoints = num_points / 4;
-  for(;number < quarterPoints; number++){
-    // Load the complex values
-    input1 =_mm_load_ps(inputPtr);
-    inputPtr += 4;
-    input2 =_mm_load_ps(inputPtr);
-    inputPtr += 4;
-
-    // Apply the normalization factor
-    input1 = _mm_mul_ps(input1, invNormalizationFactor);
-    input2 = _mm_mul_ps(input2, invNormalizationFactor);
-
-    // Multiply each value by itself
-    // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
-    input1 = _mm_mul_ps(input1, input1);
-    // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
-    input2 = _mm_mul_ps(input2, input2);
-
-    // Horizontal add, to add (r*r) + (i*i) for each complex value
-    // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
-    power = _mm_hadd_ps(input1, input2);
-
-    // Calculate the natural log power
-    power = logf4(power);
-
-    // Convert to log10 and multiply by 10.0
-    power = _mm_mul_ps(power, magScalar);
-
-    // Store the floating point results
-    _mm_store_ps(destPtr, power);
-
-    destPtr += 4;
-  }
-
-  number = quarterPoints*4;
-#endif /* LV_HAVE_LIB_SIMDMATH */
-  // Calculate the FFT for any remaining points
-
-  for(; number < num_points; number++){
-    // Calculate dBm
-    // 50 ohm load assumption
-    // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
-    // 75 ohm load assumption
-    // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
-
-    const float real = *inputPtr++ * iNormalizationFactor;
-    const float imag = *inputPtr++ * iNormalizationFactor;
-
-    *destPtr = 10.0*log10f(((real * real) + (imag * imag)) + 1e-20);
-
-    destPtr++;
-  }
-
-}
-#endif /* LV_HAVE_SSE3 */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Calculates the log10 power value for each input point
-  \param logPowerOutput The 10.0 * log10(r*r + i*i) for each data point
-  \param complexFFTInput The complex data output from the FFT point
-  \param normalizationFactor This value is divided agains all the input values 
before the power is calculated
-  \param num_points The number of fft data points
-*/
-static inline void volk_32fc_s32f_power_spectrum_32f_generic(float* 
logPowerOutput, const lv_32fc_t* complexFFTInput, const float 
normalizationFactor, unsigned int num_points){
-  // Calculate the Power of the complex point
-  const float* inputPtr = (float*)complexFFTInput;
-  float* realFFTDataPointsPtr = logPowerOutput;
-  const float iNormalizationFactor = 1.0 / normalizationFactor;
-  unsigned int point;
-  for(point = 0; point < num_points; point++){
-    // Calculate dBm
-    // 50 ohm load assumption
-    // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
-    // 75 ohm load assumption
-    // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
-
-    const float real = *inputPtr++ * iNormalizationFactor;
-    const float imag = *inputPtr++ * iNormalizationFactor;
-
-    *realFFTDataPointsPtr = 10.0*log10f(((real * real) + (imag * imag)) + 
1e-20);
-
-
-    realFFTDataPointsPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H */
diff --git a/volk/kernels/volk/volk_32fc_s32f_x2_power_spectral_density_32f.h 
b/volk/kernels/volk/volk_32fc_s32f_x2_power_spectral_density_32f.h
deleted file mode 100644
index 517ade4..0000000
--- a/volk/kernels/volk/volk_32fc_s32f_x2_power_spectral_density_32f.h
+++ /dev/null
@@ -1,248 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H
-#define INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <math.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-#include <simdmath.h>
-#endif /* LV_HAVE_LIB_SIMDMATH */
-
-/*!
-  \brief Calculates the log10 power value divided by the RBW for each input 
point
-  \param logPowerOutput The 10.0 * log10((r*r + i*i)/RBW) for each data point
-  \param complexFFTInput The complex data output from the FFT point
-  \param normalizationFactor This value is divided against all the input 
values before the power is calculated
-  \param rbw The resolution bandwith of the fft spectrum
-  \param num_points The number of fft data points
-*/
-static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a_avx(float* 
logPowerOutput, const lv_32fc_t* complexFFTInput, const float 
normalizationFactor, const float rbw, unsigned int num_points){
-  const float* inputPtr = (const float*)complexFFTInput;
-  float* destPtr = logPowerOutput;
-  uint64_t number = 0;
-  const float iRBW = 1.0 / rbw;
-  const float iNormalizationFactor = 1.0 / normalizationFactor;
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-  __m256 magScalar = _mm256_set1_ps(10.0);
-  magScalar = _mm256_div_ps(magScalar, logf4(magScalar));
-
-  __m256 invRBW = _mm256_set1_ps(iRBW);
-
-  __m256 invNormalizationFactor = _mm256_set1_ps(iNormalizationFactor);
-
-  __m256 power;
-  __m256 input1, input2;
-  const uint64_t eighthPoints = num_points / 8;
-  for(;number < eighthPoints; number++){
-    // Load the complex values
-    input1 =_mm256_load_ps(inputPtr);
-    inputPtr += 8;
-    input2 =_mm256_load_ps(inputPtr);
-    inputPtr += 8;
-
-    // Apply the normalization factor
-    input1 = _mm256_mul_ps(input1, invNormalizationFactor);
-    input2 = _mm256_mul_ps(input2, invNormalizationFactor);
-
-    // Multiply each value by itself
-    // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
-    input1 = _mm256_mul_ps(input1, input1);
-    // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
-    input2 = _mm256_mul_ps(input2, input2);
-
-    // Horizontal add, to add (r*r) + (i*i) for each complex value
-    // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
-    inputVal1 = _mm256_permute2f128_ps(input1, input2, 0x20);
-    inputVal2 = _mm256_permute2f128_ps(input1, input2, 0x31);
-
-    power = _mm256_hadd_ps(inputVal1, inputVal2);
-
-    // Divide by the rbw
-    power = _mm256_mul_ps(power, invRBW);
-
-    // Calculate the natural log power
-    power = logf4(power);
-
-    // Convert to log10 and multiply by 10.0
-    power = _mm256_mul_ps(power, magScalar);
-
-    // Store the floating point results
-    _mm256_store_ps(destPtr, power);
-
-    destPtr += 8;
-  }
-
-  number = eighthPoints*8;
-#endif /* LV_HAVE_LIB_SIMDMATH */
-  // Calculate the FFT for any remaining points
-  for(; number < num_points; number++){
-    // Calculate dBm
-    // 50 ohm load assumption
-    // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
-    // 75 ohm load assumption
-    // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
-
-    const float real = *inputPtr++ * iNormalizationFactor;
-    const float imag = *inputPtr++ * iNormalizationFactor;
-
-    *destPtr = 10.0*log10f((((real * real) + (imag * imag)) + 1e-20) * iRBW);
-    destPtr++;
-  }
-
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE3
-#include <pmmintrin.h>
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-#include <simdmath.h>
-#endif /* LV_HAVE_LIB_SIMDMATH */
-
-/*!
-  \brief Calculates the log10 power value divided by the RBW for each input 
point
-  \param logPowerOutput The 10.0 * log10((r*r + i*i)/RBW) for each data point
-  \param complexFFTInput The complex data output from the FFT point
-  \param normalizationFactor This value is divided against all the input 
values before the power is calculated
-  \param rbw The resolution bandwith of the fft spectrum
-  \param num_points The number of fft data points
-*/
-static inline void volk_32fc_s32f_x2_power_spectral_density_32f_a_sse3(float* 
logPowerOutput, const lv_32fc_t* complexFFTInput, const float 
normalizationFactor, const float rbw, unsigned int num_points){
-  const float* inputPtr = (const float*)complexFFTInput;
-  float* destPtr = logPowerOutput;
-  uint64_t number = 0;
-  const float iRBW = 1.0 / rbw;
-  const float iNormalizationFactor = 1.0 / normalizationFactor;
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-  __m128 magScalar = _mm_set_ps1(10.0);
-  magScalar = _mm_div_ps(magScalar, logf4(magScalar));
-
-  __m128 invRBW = _mm_set_ps1(iRBW);
-
-  __m128 invNormalizationFactor = _mm_set_ps1(iNormalizationFactor);
-
-  __m128 power;
-  __m128 input1, input2;
-  const uint64_t quarterPoints = num_points / 4;
-  for(;number < quarterPoints; number++){
-    // Load the complex values
-    input1 =_mm_load_ps(inputPtr);
-    inputPtr += 4;
-    input2 =_mm_load_ps(inputPtr);
-    inputPtr += 4;
-
-    // Apply the normalization factor
-    input1 = _mm_mul_ps(input1, invNormalizationFactor);
-    input2 = _mm_mul_ps(input2, invNormalizationFactor);
-
-    // Multiply each value by itself
-    // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
-    input1 = _mm_mul_ps(input1, input1);
-    // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
-    input2 = _mm_mul_ps(input2, input2);
-
-    // Horizontal add, to add (r*r) + (i*i) for each complex value
-    // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
-    power = _mm_hadd_ps(input1, input2);
-
-    // Divide by the rbw
-    power = _mm_mul_ps(power, invRBW);
-
-    // Calculate the natural log power
-    power = logf4(power);
-
-    // Convert to log10 and multiply by 10.0
-    power = _mm_mul_ps(power, magScalar);
-
-    // Store the floating point results
-    _mm_store_ps(destPtr, power);
-
-    destPtr += 4;
-  }
-
-  number = quarterPoints*4;
-#endif /* LV_HAVE_LIB_SIMDMATH */
-  // Calculate the FFT for any remaining points
-  for(; number < num_points; number++){
-    // Calculate dBm
-    // 50 ohm load assumption
-    // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
-    // 75 ohm load assumption
-    // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
-
-    const float real = *inputPtr++ * iNormalizationFactor;
-    const float imag = *inputPtr++ * iNormalizationFactor;
-
-    *destPtr = 10.0*log10f((((real * real) + (imag * imag)) + 1e-20) * iRBW);
-    destPtr++;
-  }
-
-}
-#endif /* LV_HAVE_SSE3 */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Calculates the log10 power value divided by the RBW for each input 
point
-  \param logPowerOutput The 10.0 * log10((r*r + i*i)/RBW) for each data point
-  \param complexFFTInput The complex data output from the FFT point
-  \param normalizationFactor This value is divided against all the input 
values before the power is calculated
-  \param rbw The resolution bandwith of the fft spectrum
-  \param num_points The number of fft data points
-*/
-static inline void volk_32fc_s32f_x2_power_spectral_density_32f_generic(float* 
logPowerOutput, const lv_32fc_t* complexFFTInput, const float 
normalizationFactor, const float rbw, unsigned int num_points){
-  // Calculate the Power of the complex point
-  const float* inputPtr = (float*)complexFFTInput;
-  float* realFFTDataPointsPtr = logPowerOutput;
-  unsigned int point;
-  const float invRBW = 1.0 / rbw;
-  const float iNormalizationFactor = 1.0 / normalizationFactor;
-
-  for(point = 0; point < num_points; point++){
-    // Calculate dBm
-    // 50 ohm load assumption
-    // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
-    // 75 ohm load assumption
-    // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
-
-    const float real = *inputPtr++ * iNormalizationFactor;
-    const float imag = *inputPtr++ * iNormalizationFactor;
-
-    *realFFTDataPointsPtr = 10.0*log10f((((real * real) + (imag * imag)) + 
1e-20) * invRBW);
-
-    realFFTDataPointsPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32fc_s32f_x2_power_spectral_density_32f_a_H */
diff --git a/volk/kernels/volk/volk_32fc_s32fc_multiply_32fc.h 
b/volk/kernels/volk/volk_32fc_s32fc_multiply_32fc.h
deleted file mode 100644
index 474b982..0000000
--- a/volk/kernels/volk/volk_32fc_s32fc_multiply_32fc.h
+++ /dev/null
@@ -1,325 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_s32fc_multiply_32fc_u_H
-#define INCLUDED_volk_32fc_s32fc_multiply_32fc_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <volk/volk_complex.h>
-#include <float.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-  /*!
-    \brief Multiplies the two input complex vectors and stores their results 
in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector One of the vectors to be multiplied
-    \param bVector One of the vectors to be multiplied
-    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
-  */
-static inline void volk_32fc_s32fc_multiply_32fc_u_avx(lv_32fc_t* cVector, 
const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
-    unsigned int number = 0;
-    unsigned int i = 0;
-    const unsigned int quarterPoints = num_points / 4;
-    unsigned int isodd = num_points & 3;
-    __m256 x, yl, yh, z, tmp1, tmp2;
-    lv_32fc_t* c = cVector;
-    const lv_32fc_t* a = aVector;
-
-    // Set up constant scalar vector
-    yl = _mm256_set1_ps(lv_creal(scalar));
-    yh = _mm256_set1_ps(lv_cimag(scalar));
-
-    for(;number < quarterPoints; number++){
-      x = _mm256_loadu_ps((float*)a); // Load the ar + ai, br + bi as 
ar,ai,br,bi
-
-      tmp1 = _mm256_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-
-      x = _mm256_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-
-      tmp2 = _mm256_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-
-      z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, 
br*dr-bi*di, bi*dr+br*di
-
-      _mm256_storeu_ps((float*)c,z); // Store the results back into the C 
container
-
-      a += 4;
-      c += 4;
-    }
-
-    for(i = num_points-isodd; i < num_points; i++) {
-        *c++ = (*a++) * scalar;
-    }
-
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE3
-#include <pmmintrin.h>
-/*!
-  \brief Multiplies the input vector by a scalar and stores the results in the 
third vector
-  \param cVector The vector where the results will be stored
-  \param aVector The vector to be multiplied
-  \param scalar The complex scalar to multiply aVector
-  \param num_points The number of complex values in aVector and bVector to be 
multiplied together and stored into cVector
-*/
-static inline void volk_32fc_s32fc_multiply_32fc_u_sse3(lv_32fc_t* cVector, 
const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
-  unsigned int number = 0;
-    const unsigned int halfPoints = num_points / 2;
-
-    __m128 x, yl, yh, z, tmp1, tmp2;
-    lv_32fc_t* c = cVector;
-    const lv_32fc_t* a = aVector;
-
-    // Set up constant scalar vector
-    yl = _mm_set_ps1(lv_creal(scalar));
-    yh = _mm_set_ps1(lv_cimag(scalar));
-
-    for(;number < halfPoints; number++){
-
-      x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
-
-      tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-
-      x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-
-      tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-
-      z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, 
bi*dr+br*di
-
-      _mm_storeu_ps((float*)c,z); // Store the results back into the C 
container
-
-      a += 2;
-      c += 2;
-    }
-
-    if((num_points % 2) != 0) {
-      *c = (*a) * scalar;
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Multiplies the input vector by a scalar and stores the results in the 
third vector
-  \param cVector The vector where the results will be stored
-  \param aVector The vector to be multiplied
-  \param scalar The complex scalar to multiply aVector
-  \param num_points The number of complex values in aVector and bVector to be 
multiplied together and stored into cVector
-*/
-static inline void volk_32fc_s32fc_multiply_32fc_generic(lv_32fc_t* cVector, 
const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
-    lv_32fc_t* cPtr = cVector;
-    const lv_32fc_t* aPtr = aVector;
-    unsigned int number = num_points;
-
-    // unwrap loop
-    while (number >= 8){
-      *cPtr++ = (*aPtr++) * scalar;
-      *cPtr++ = (*aPtr++) * scalar;
-      *cPtr++ = (*aPtr++) * scalar;
-      *cPtr++ = (*aPtr++) * scalar;
-      *cPtr++ = (*aPtr++) * scalar;
-      *cPtr++ = (*aPtr++) * scalar;
-      *cPtr++ = (*aPtr++) * scalar;
-      *cPtr++ = (*aPtr++) * scalar;
-      number -= 8;
-    }
-
-    // clean up any remaining
-    while (number-- > 0)
-      *cPtr++ = *aPtr++ * scalar;
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-#endif /* INCLUDED_volk_32fc_x2_multiply_32fc_u_H */
-#ifndef INCLUDED_volk_32fc_s32fc_multiply_32fc_a_H
-#define INCLUDED_volk_32fc_s32fc_multiply_32fc_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <volk/volk_complex.h>
-#include <float.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-  /*!
-    \brief Multiplies the two input complex vectors and stores their results 
in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector One of the vectors to be multiplied
-    \param bVector One of the vectors to be multiplied
-    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
-  */
-static inline void volk_32fc_s32fc_multiply_32fc_a_avx(lv_32fc_t* cVector, 
const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
-    unsigned int number = 0;
-    unsigned int i = 0;
-    const unsigned int quarterPoints = num_points / 4;
-    unsigned int isodd = num_points & 3;
-    __m256 x, yl, yh, z, tmp1, tmp2;
-    lv_32fc_t* c = cVector;
-    const lv_32fc_t* a = aVector;
-
-    // Set up constant scalar vector
-    yl = _mm256_set1_ps(lv_creal(scalar));
-    yh = _mm256_set1_ps(lv_cimag(scalar));
-
-    for(;number < quarterPoints; number++){
-      x = _mm256_load_ps((float*)a); // Load the ar + ai, br + bi as 
ar,ai,br,bi
-
-      tmp1 = _mm256_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-
-      x = _mm256_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-
-      tmp2 = _mm256_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-
-      z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, 
br*dr-bi*di, bi*dr+br*di
-
-      _mm256_store_ps((float*)c,z); // Store the results back into the C 
container
-
-      a += 4;
-      c += 4;
-    }
-
-    for(i = num_points-isodd; i < num_points; i++) {
-        *c++ = (*a++) * scalar;
-    }
-
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE3
-#include <pmmintrin.h>
-  /*!
-    \brief Multiplies the two input complex vectors and stores their results 
in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector One of the vectors to be multiplied
-    \param bVector One of the vectors to be multiplied
-    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
-  */
-static inline void volk_32fc_s32fc_multiply_32fc_a_sse3(lv_32fc_t* cVector, 
const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
-  unsigned int number = 0;
-    const unsigned int halfPoints = num_points / 2;
-
-    __m128 x, yl, yh, z, tmp1, tmp2;
-    lv_32fc_t* c = cVector;
-    const lv_32fc_t* a = aVector;
-
-    // Set up constant scalar vector
-    yl = _mm_set_ps1(lv_creal(scalar));
-    yh = _mm_set_ps1(lv_cimag(scalar));
-
-    for(;number < halfPoints; number++){
-
-      x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
-
-      tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-
-      x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-
-      tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-
-      z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, 
bi*dr+br*di
-
-      _mm_store_ps((float*)c,z); // Store the results back into the C container
-
-      a += 2;
-      c += 2;
-    }
-
-    if((num_points % 2) != 0) {
-      *c = (*a) * scalar;
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-  /*!
-    \brief Multiplies the two input complex vectors and stores their results 
in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector One of the vectors to be multiplied
-    \param bVector One of the vectors to be multiplied
-    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
-  */
-static inline void volk_32fc_s32fc_multiply_32fc_neon(lv_32fc_t* cVector, 
const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
-    lv_32fc_t* cPtr = cVector;
-    const lv_32fc_t* aPtr = aVector;
-    unsigned int number = num_points;
-    unsigned int quarter_points = num_points / 4;
-
-    float32x4x2_t a_val, scalar_val;
-    float32x4x2_t tmp_imag;
-
-    scalar_val = vld2q_f32((const float*)&scalar);
-    for(number = 0; number < quarter_points; ++number) {
-        a_val = vld2q_f32((float*)aPtr);
-        tmp_imag.val[1] = vmulq_f32(a_val.val[1], scalar_val.val[0]);
-        tmp_imag.val[0] = vmulq_f32(a_val.val[0], scalar_val.val[0]);
-
-        tmp_imag.val[1] = vmlaq_f32(tmp_imag.val[1], a_val.val[0], 
scalar_val.val[1]);
-        tmp_imag.val[0] = vmlaq_f32(tmp_imag.val[0], a_val.val[1], 
scalar_val.val[1]);
-
-        vst2q_f32((float*)cVector, tmp_imag);
-        aPtr += 4;
-        cVector += 4;
-    }
-
-    for(number = quarter_points*4; number < num_points; number++){
-      *cPtr++ = *aPtr++ * scalar;
-    }
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Multiplies the two input complex vectors and stores their results 
in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector One of the vectors to be multiplied
-    \param bVector One of the vectors to be multiplied
-    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
-  */
-static inline void volk_32fc_s32fc_multiply_32fc_a_generic(lv_32fc_t* cVector, 
const lv_32fc_t* aVector, const lv_32fc_t scalar, unsigned int num_points){
-    lv_32fc_t* cPtr = cVector;
-    const lv_32fc_t* aPtr = aVector;
-    unsigned int number = num_points;
-
-    // unwrap loop
-    while (number >= 8){
-      *cPtr++ = (*aPtr++) * scalar;
-      *cPtr++ = (*aPtr++) * scalar;
-      *cPtr++ = (*aPtr++) * scalar;
-      *cPtr++ = (*aPtr++) * scalar;
-      *cPtr++ = (*aPtr++) * scalar;
-      *cPtr++ = (*aPtr++) * scalar;
-      *cPtr++ = (*aPtr++) * scalar;
-      *cPtr++ = (*aPtr++) * scalar;
-      number -= 8;
-    }
-
-    // clean up any remaining
-    while (number-- > 0)
-      *cPtr++ = *aPtr++ * scalar;
-}
-#endif /* LV_HAVE_GENERIC */
-
-#endif /* INCLUDED_volk_32fc_x2_multiply_32fc_a_H */
diff --git a/volk/kernels/volk/volk_32fc_s32fc_rotatorpuppet_32fc.h 
b/volk/kernels/volk/volk_32fc_s32fc_rotatorpuppet_32fc.h
deleted file mode 100644
index cbbc436..0000000
--- a/volk/kernels/volk/volk_32fc_s32fc_rotatorpuppet_32fc.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_s32fc_rotatorpuppet_32fc_a_H
-#define INCLUDED_volk_32fc_s32fc_rotatorpuppet_32fc_a_H
-
-
-#include <volk/volk_complex.h>
-#include <stdio.h>
-#include <volk/volk_32fc_s32fc_x2_rotator_32fc.h>
-
-
-#ifdef LV_HAVE_GENERIC
-
-/*!
-  \brief rotate input vector at fixed rate per sample from initial phase offset
-  \param outVector The vector where the results will be stored
-  \param inVector Vector to be rotated
-  \param phase_inc rotational velocity
-  \param phase initial phase offset
-  \param num_points The number of values in inVector to be rotated and stored 
into cVector
-*/
-static inline void volk_32fc_s32fc_rotatorpuppet_32fc_generic(lv_32fc_t* 
outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, unsigned int 
num_points){
-    lv_32fc_t phase[1] = {lv_cmake(.3, 0.95393)};
-    volk_32fc_s32fc_x2_rotator_32fc_generic(outVector, inVector, phase_inc, 
phase, num_points);
-
-}
-
-#endif /* LV_HAVE_GENERIC */
-
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-
-static inline void volk_32fc_s32fc_rotatorpuppet_32fc_a_sse4_1(lv_32fc_t* 
outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, unsigned int 
num_points){
-    lv_32fc_t phase[1] = {lv_cmake(.3, .95393)};
-    volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(outVector, inVector, phase_inc, 
phase, num_points);
-
-}
-
-#endif /* LV_HAVE_SSE4_1 */
-
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-static inline void volk_32fc_s32fc_rotatorpuppet_32fc_u_sse4_1(lv_32fc_t* 
outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, unsigned int 
num_points){
-    lv_32fc_t phase[1] = {lv_cmake(.3, .95393)};
-    volk_32fc_s32fc_x2_rotator_32fc_u_sse4_1(outVector, inVector, phase_inc, 
phase, num_points);
-
-}
-
-#endif /* LV_HAVE_SSE4_1 */
-
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-
-/*!
-  \brief rotate input vector at fixed rate per sample from initial phase offset
-  \param outVector The vector where the results will be stored
-  \param inVector Vector to be rotated
-  \param phase_inc rotational velocity
-  \param phase initial phase offset
-  \param num_points The number of values in inVector to be rotated and stored 
into cVector
-*/
-static inline void volk_32fc_s32fc_rotatorpuppet_32fc_a_avx(lv_32fc_t* 
outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, unsigned int 
num_points){
-    lv_32fc_t phase[1] = {lv_cmake(.3, .95393)};
-    volk_32fc_s32fc_x2_rotator_32fc_a_avx(outVector, inVector, phase_inc, 
phase, num_points);
-}
-
-#endif /* LV_HAVE_AVX */
-
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-
-static inline void volk_32fc_s32fc_rotatorpuppet_32fc_u_avx(lv_32fc_t* 
outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, unsigned int 
num_points){
-    lv_32fc_t phase[1] = {lv_cmake(.3, .95393)};
-    volk_32fc_s32fc_x2_rotator_32fc_u_avx(outVector, inVector, phase_inc, 
phase, num_points);
-}
-
-#endif /* LV_HAVE_AVX */
-
-#endif /* INCLUDED_volk_32fc_s32fc_rotatorpuppet_32fc_a_H */
diff --git a/volk/kernels/volk/volk_32fc_s32fc_x2_rotator_32fc.h 
b/volk/kernels/volk/volk_32fc_s32fc_x2_rotator_32fc.h
deleted file mode 100644
index 0650078..0000000
--- a/volk/kernels/volk/volk_32fc_s32fc_x2_rotator_32fc.h
+++ /dev/null
@@ -1,484 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H
-#define INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H
-
-
-#include <volk/volk_complex.h>
-#include <stdio.h>
-#include <stdlib.h>
-#define ROTATOR_RELOAD 512
-
-
-#ifdef LV_HAVE_GENERIC
-
-/*!
-  \brief rotate input vector at fixed rate per sample from initial phase offset
-  \param outVector The vector where the results will be stored
-  \param inVector Vector to be rotated
-  \param phase_inc rotational velocity
-  \param phase initial phase offset
-  \param num_points The number of values in inVector to be rotated and stored 
into cVector
-*/
-static inline void volk_32fc_s32fc_x2_rotator_32fc_generic(lv_32fc_t* 
outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* 
phase, unsigned int num_points){
-    unsigned int i = 0;
-    int j = 0;
-    for(i = 0; i < (unsigned int)(num_points/ROTATOR_RELOAD); ++i) {
-        for(j = 0; j < ROTATOR_RELOAD; ++j) {
-            *outVector++ = *inVector++ * (*phase);
-            (*phase) *= phase_inc;
-        }
-#ifdef __cplusplus
-        (*phase) /= std::abs((*phase));
-#else
-        (*phase) /= cabsf((*phase));
-#endif
-    }
-    for(i = 0; i < num_points%ROTATOR_RELOAD; ++i) {
-        *outVector++ = *inVector++ * (*phase);
-        (*phase) *= phase_inc;
-    }
-
-}
-
-#endif /* LV_HAVE_GENERIC */
-
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-
-static inline void volk_32fc_s32fc_x2_rotator_32fc_a_sse4_1(lv_32fc_t* 
outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* 
phase, unsigned int num_points){
-    lv_32fc_t* cPtr = outVector;
-    const lv_32fc_t* aPtr = inVector;
-    lv_32fc_t incr = 1;
-    lv_32fc_t phase_Ptr[2] = {(*phase), (*phase)};
-
-    unsigned int i, j = 0;
-
-    for(i = 0; i < 2; ++i) {
-        phase_Ptr[i] *= incr;
-        incr *= (phase_inc);
-    }
-
-    /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
-    printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
-    printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
-    __m128 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, 
tmp2p;
-
-    phase_Val = _mm_loadu_ps((float*)phase_Ptr);
-    inc_Val = _mm_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), 
lv_creal(incr));
-
-    const unsigned int halfPoints = num_points / 2;
-
-
-    for(i = 0; i < (unsigned int)(halfPoints/ROTATOR_RELOAD); i++) {
-        for(j = 0; j < ROTATOR_RELOAD; ++j) {
-
-            aVal = _mm_load_ps((float*)aPtr);
-
-            yl = _mm_moveldup_ps(phase_Val);
-            yh = _mm_movehdup_ps(phase_Val);
-            ylp = _mm_moveldup_ps(inc_Val);
-            yhp = _mm_movehdup_ps(inc_Val);
-
-            tmp1 = _mm_mul_ps(aVal, yl);
-            tmp1p = _mm_mul_ps(phase_Val, ylp);
-
-            aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
-            phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
-            tmp2 = _mm_mul_ps(aVal, yh);
-            tmp2p = _mm_mul_ps(phase_Val, yhp);
-
-            z = _mm_addsub_ps(tmp1, tmp2);
-            phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
-
-            _mm_store_ps((float*)cPtr, z);
-
-            aPtr += 2;
-            cPtr += 2;
-        }
-        tmp1 = _mm_mul_ps(phase_Val, phase_Val);
-        tmp2 = _mm_hadd_ps(tmp1, tmp1);
-        tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
-        tmp2 = _mm_sqrt_ps(tmp1);
-        phase_Val = _mm_div_ps(phase_Val, tmp2);
-    }
-    for(i = 0; i < halfPoints%ROTATOR_RELOAD; ++i) {
-        aVal = _mm_load_ps((float*)aPtr);
-
-        yl = _mm_moveldup_ps(phase_Val);
-        yh = _mm_movehdup_ps(phase_Val);
-        ylp = _mm_moveldup_ps(inc_Val);
-        yhp = _mm_movehdup_ps(inc_Val);
-
-        tmp1 = _mm_mul_ps(aVal, yl);
-
-        tmp1p = _mm_mul_ps(phase_Val, ylp);
-
-        aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
-        phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
-        tmp2 = _mm_mul_ps(aVal, yh);
-        tmp2p = _mm_mul_ps(phase_Val, yhp);
-
-        z = _mm_addsub_ps(tmp1, tmp2);
-        phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
-
-        _mm_store_ps((float*)cPtr, z);
-
-        aPtr += 2;
-        cPtr += 2;
-    }
-
-    _mm_storeu_ps((float*)phase_Ptr, phase_Val);
-    for(i = 0; i < num_points%2; ++i) {
-        *cPtr++ = *aPtr++ * phase_Ptr[0];
-        phase_Ptr[0] *= (phase_inc);
-    }
-
-    (*phase) = phase_Ptr[0];
-
-}
-
-#endif /* LV_HAVE_SSE4_1 for aligned */
-
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-
-/*!
-  \brief rotate input vector at fixed rate per sample from initial phase offset
-  \param outVector The vector where the results will be stored
-  \param inVector Vector to be rotated
-  \param phase_inc rotational velocity
-  \param phase initial phase offset
-  \param num_points The number of values in inVector to be rotated and stored 
into cVector
-*/
-static inline void volk_32fc_s32fc_x2_rotator_32fc_u_sse4_1(lv_32fc_t* 
outVector, const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* 
phase, unsigned int num_points){
-    lv_32fc_t* cPtr = outVector;
-    const lv_32fc_t* aPtr = inVector;
-    lv_32fc_t incr = 1;
-    lv_32fc_t phase_Ptr[2] = {(*phase), (*phase)};
-
-    unsigned int i, j = 0;
-
-    for(i = 0; i < 2; ++i) {
-        phase_Ptr[i] *= incr;
-        incr *= (phase_inc);
-    }
-
-    /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
-    printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
-    printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
-    __m128 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, 
tmp2p;
-
-    phase_Val = _mm_loadu_ps((float*)phase_Ptr);
-    inc_Val = _mm_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), 
lv_creal(incr));
-
-    const unsigned int halfPoints = num_points / 2;
-
-
-    for(i = 0; i < (unsigned int)(halfPoints/ROTATOR_RELOAD); i++) {
-        for(j = 0; j < ROTATOR_RELOAD; ++j) {
-
-            aVal = _mm_loadu_ps((float*)aPtr);
-
-            yl = _mm_moveldup_ps(phase_Val);
-            yh = _mm_movehdup_ps(phase_Val);
-            ylp = _mm_moveldup_ps(inc_Val);
-            yhp = _mm_movehdup_ps(inc_Val);
-
-            tmp1 = _mm_mul_ps(aVal, yl);
-            tmp1p = _mm_mul_ps(phase_Val, ylp);
-
-            aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
-            phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
-            tmp2 = _mm_mul_ps(aVal, yh);
-            tmp2p = _mm_mul_ps(phase_Val, yhp);
-
-            z = _mm_addsub_ps(tmp1, tmp2);
-            phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
-
-            _mm_storeu_ps((float*)cPtr, z);
-
-            aPtr += 2;
-            cPtr += 2;
-        }
-        tmp1 = _mm_mul_ps(phase_Val, phase_Val);
-        tmp2 = _mm_hadd_ps(tmp1, tmp1);
-        tmp1 = _mm_shuffle_ps(tmp2, tmp2, 0xD8);
-        tmp2 = _mm_sqrt_ps(tmp1);
-        phase_Val = _mm_div_ps(phase_Val, tmp2);
-    }
-    for(i = 0; i < halfPoints%ROTATOR_RELOAD; ++i) {
-        aVal = _mm_loadu_ps((float*)aPtr);
-
-        yl = _mm_moveldup_ps(phase_Val);
-        yh = _mm_movehdup_ps(phase_Val);
-        ylp = _mm_moveldup_ps(inc_Val);
-        yhp = _mm_movehdup_ps(inc_Val);
-
-        tmp1 = _mm_mul_ps(aVal, yl);
-
-        tmp1p = _mm_mul_ps(phase_Val, ylp);
-
-        aVal = _mm_shuffle_ps(aVal, aVal, 0xB1);
-        phase_Val = _mm_shuffle_ps(phase_Val, phase_Val, 0xB1);
-        tmp2 = _mm_mul_ps(aVal, yh);
-        tmp2p = _mm_mul_ps(phase_Val, yhp);
-
-        z = _mm_addsub_ps(tmp1, tmp2);
-        phase_Val = _mm_addsub_ps(tmp1p, tmp2p);
-
-        _mm_storeu_ps((float*)cPtr, z);
-
-        aPtr += 2;
-        cPtr += 2;
-    }
-
-    _mm_storeu_ps((float*)phase_Ptr, phase_Val);
-    for(i = 0; i < num_points%2; ++i) {
-        *cPtr++ = *aPtr++ * phase_Ptr[0];
-        phase_Ptr[0] *= (phase_inc);
-    }
-
-    (*phase) = phase_Ptr[0];
-
-}
-
-#endif /* LV_HAVE_SSE4_1 */
-
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-
-/*!
-  \brief rotate input vector at fixed rate per sample from initial phase offset
-  \param outVector The vector where the results will be stored
-  \param inVector Vector to be rotated
-  \param phase_inc rotational velocity
-  \param phase initial phase offset
-  \param num_points The number of values in inVector to be rotated and stored 
into cVector
-*/
-static inline void volk_32fc_s32fc_x2_rotator_32fc_a_avx(lv_32fc_t* outVector, 
const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, 
unsigned int num_points){
-    lv_32fc_t* cPtr = outVector;
-    const lv_32fc_t* aPtr = inVector;
-    lv_32fc_t incr = 1;
-    lv_32fc_t phase_Ptr[4] = {(*phase), (*phase), (*phase), (*phase)};
-
-    unsigned int i, j = 0;
-
-    for(i = 0; i < 4; ++i) {
-        phase_Ptr[i] *= incr;
-        incr *= (phase_inc);
-    }
-
-    /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
-    printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
-    printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2]));
-    printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3]));
-    printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
-    __m256 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, 
tmp2p;
-
-    phase_Val = _mm256_loadu_ps((float*)phase_Ptr);
-    inc_Val = _mm256_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), 
lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
-    const unsigned int fourthPoints = num_points / 4;
-
-
-    for(i = 0; i < (unsigned int)(fourthPoints/ROTATOR_RELOAD); i++) {
-        for(j = 0; j < ROTATOR_RELOAD; ++j) {
-
-            aVal = _mm256_load_ps((float*)aPtr);
-
-            yl = _mm256_moveldup_ps(phase_Val);
-            yh = _mm256_movehdup_ps(phase_Val);
-            ylp = _mm256_moveldup_ps(inc_Val);
-            yhp = _mm256_movehdup_ps(inc_Val);
-
-            tmp1 = _mm256_mul_ps(aVal, yl);
-            tmp1p = _mm256_mul_ps(phase_Val, ylp);
-
-            aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
-            phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
-            tmp2 = _mm256_mul_ps(aVal, yh);
-            tmp2p = _mm256_mul_ps(phase_Val, yhp);
-
-            z = _mm256_addsub_ps(tmp1, tmp2);
-            phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
-
-            _mm256_store_ps((float*)cPtr, z);
-
-            aPtr += 4;
-            cPtr += 4;
-        }
-        tmp1 = _mm256_mul_ps(phase_Val, phase_Val);
-        tmp2 = _mm256_hadd_ps(tmp1, tmp1);
-        tmp1 = _mm256_shuffle_ps(tmp2, tmp2, 0xD8);
-        tmp2 = _mm256_sqrt_ps(tmp1);
-        phase_Val = _mm256_div_ps(phase_Val, tmp2);
-    }
-    for(i = 0; i < fourthPoints%ROTATOR_RELOAD; ++i) {
-        aVal = _mm256_load_ps((float*)aPtr);
-
-        yl = _mm256_moveldup_ps(phase_Val);
-        yh = _mm256_movehdup_ps(phase_Val);
-        ylp = _mm256_moveldup_ps(inc_Val);
-        yhp = _mm256_movehdup_ps(inc_Val);
-
-        tmp1 = _mm256_mul_ps(aVal, yl);
-
-        tmp1p = _mm256_mul_ps(phase_Val, ylp);
-
-        aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
-        phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
-        tmp2 = _mm256_mul_ps(aVal, yh);
-        tmp2p = _mm256_mul_ps(phase_Val, yhp);
-
-        z = _mm256_addsub_ps(tmp1, tmp2);
-        phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
-
-        _mm256_store_ps((float*)cPtr, z);
-
-        aPtr += 4;
-        cPtr += 4;
-    }
-
-    _mm256_storeu_ps((float*)phase_Ptr, phase_Val);
-    for(i = 0; i < num_points%4; ++i) {
-        *cPtr++ = *aPtr++ * phase_Ptr[0];
-        phase_Ptr[0] *= (phase_inc);
-    }
-
-    (*phase) = phase_Ptr[0];
-
-}
-
-#endif /* LV_HAVE_AVX for aligned */
-
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-
-/*!
-  \brief rotate input vector at fixed rate per sample from initial phase offset
-  \param outVector The vector where the results will be stored
-  \param inVector Vector to be rotated
-  \param phase_inc rotational velocity
-  \param phase initial phase offset
-  \param num_points The number of values in inVector to be rotated and stored 
into cVector
-*/
-static inline void volk_32fc_s32fc_x2_rotator_32fc_u_avx(lv_32fc_t* outVector, 
const lv_32fc_t* inVector, const lv_32fc_t phase_inc, lv_32fc_t* phase, 
unsigned int num_points){
-    lv_32fc_t* cPtr = outVector;
-    const lv_32fc_t* aPtr = inVector;
-    lv_32fc_t incr = 1;
-    lv_32fc_t phase_Ptr[4] = {(*phase), (*phase), (*phase), (*phase)};
-
-    unsigned int i, j = 0;
-
-    for(i = 0; i < 4; ++i) {
-        phase_Ptr[i] *= incr;
-        incr *= (phase_inc);
-    }
-
-    /*printf("%f, %f\n", lv_creal(phase_Ptr[0]), lv_cimag(phase_Ptr[0]));
-    printf("%f, %f\n", lv_creal(phase_Ptr[1]), lv_cimag(phase_Ptr[1]));
-    printf("%f, %f\n", lv_creal(phase_Ptr[2]), lv_cimag(phase_Ptr[2]));
-    printf("%f, %f\n", lv_creal(phase_Ptr[3]), lv_cimag(phase_Ptr[3]));
-    printf("incr: %f, %f\n", lv_creal(incr), lv_cimag(incr));*/
-    __m256 aVal, phase_Val, inc_Val, yl, yh, tmp1, tmp2, z, ylp, yhp, tmp1p, 
tmp2p;
-
-    phase_Val = _mm256_loadu_ps((float*)phase_Ptr);
-    inc_Val = _mm256_set_ps(lv_cimag(incr), lv_creal(incr),lv_cimag(incr), 
lv_creal(incr),lv_cimag(incr), lv_creal(incr),lv_cimag(incr), lv_creal(incr));
-    const unsigned int fourthPoints = num_points / 4;
-
-
-    for(i = 0; i < (unsigned int)(fourthPoints/ROTATOR_RELOAD); i++) {
-        for(j = 0; j < ROTATOR_RELOAD; ++j) {
-
-            aVal = _mm256_loadu_ps((float*)aPtr);
-
-            yl = _mm256_moveldup_ps(phase_Val);
-            yh = _mm256_movehdup_ps(phase_Val);
-            ylp = _mm256_moveldup_ps(inc_Val);
-            yhp = _mm256_movehdup_ps(inc_Val);
-
-            tmp1 = _mm256_mul_ps(aVal, yl);
-            tmp1p = _mm256_mul_ps(phase_Val, ylp);
-
-            aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
-            phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
-            tmp2 = _mm256_mul_ps(aVal, yh);
-            tmp2p = _mm256_mul_ps(phase_Val, yhp);
-
-            z = _mm256_addsub_ps(tmp1, tmp2);
-            phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
-
-            _mm256_storeu_ps((float*)cPtr, z);
-
-            aPtr += 4;
-            cPtr += 4;
-        }
-        tmp1 = _mm256_mul_ps(phase_Val, phase_Val);
-        tmp2 = _mm256_hadd_ps(tmp1, tmp1);
-        tmp1 = _mm256_shuffle_ps(tmp2, tmp2, 0xD8);
-        tmp2 = _mm256_sqrt_ps(tmp1);
-        phase_Val = _mm256_div_ps(phase_Val, tmp2);
-    }
-    for(i = 0; i < fourthPoints%ROTATOR_RELOAD; ++i) {
-        aVal = _mm256_loadu_ps((float*)aPtr);
-
-        yl = _mm256_moveldup_ps(phase_Val);
-        yh = _mm256_movehdup_ps(phase_Val);
-        ylp = _mm256_moveldup_ps(inc_Val);
-        yhp = _mm256_movehdup_ps(inc_Val);
-
-        tmp1 = _mm256_mul_ps(aVal, yl);
-
-        tmp1p = _mm256_mul_ps(phase_Val, ylp);
-
-        aVal = _mm256_shuffle_ps(aVal, aVal, 0xB1);
-        phase_Val = _mm256_shuffle_ps(phase_Val, phase_Val, 0xB1);
-        tmp2 = _mm256_mul_ps(aVal, yh);
-        tmp2p = _mm256_mul_ps(phase_Val, yhp);
-
-        z = _mm256_addsub_ps(tmp1, tmp2);
-        phase_Val = _mm256_addsub_ps(tmp1p, tmp2p);
-
-        _mm256_storeu_ps((float*)cPtr, z);
-
-        aPtr += 4;
-        cPtr += 4;
-    }
-
-    _mm256_storeu_ps((float*)phase_Ptr, phase_Val);
-    for(i = 0; i < num_points%4; ++i) {
-        *cPtr++ = *aPtr++ * phase_Ptr[0];
-        phase_Ptr[0] *= (phase_inc);
-    }
-
-    (*phase) = phase_Ptr[0];
-
-}
-
-#endif /* LV_HAVE_AVX */
-
-#endif /* INCLUDED_volk_32fc_s32fc_rotator_32fc_a_H */
diff --git a/volk/kernels/volk/volk_32fc_x2_conjugate_dot_prod_32fc.h 
b/volk/kernels/volk/volk_32fc_x2_conjugate_dot_prod_32fc.h
deleted file mode 100644
index 8964434..0000000
--- a/volk/kernels/volk/volk_32fc_x2_conjugate_dot_prod_32fc.h
+++ /dev/null
@@ -1,569 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_u_H
-#define INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_u_H
-
-
-#include<volk/volk_complex.h>
-
-
-#ifdef LV_HAVE_GENERIC
-
-
-static inline void volk_32fc_x2_conjugate_dot_prod_32fc_generic(lv_32fc_t* 
result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) 
{
-
-  const unsigned int num_bytes = num_points*8;
-
-  float * res = (float*) result;
-  float * in = (float*) input;
-  float * tp = (float*) taps;
-  unsigned int n_2_ccomplex_blocks = num_bytes >> 4;
-  unsigned int isodd = (num_bytes >> 3) &1;
-
-
-
-  float sum0[2] = {0,0};
-  float sum1[2] = {0,0};
-  unsigned int i = 0;
-
-
-  for(i = 0; i < n_2_ccomplex_blocks; ++i) {
-
-    sum0[0] += in[0] * tp[0] + in[1] * tp[1];
-    sum0[1] += (-in[0] * tp[1]) + in[1] * tp[0];
-    sum1[0] += in[2] * tp[2] + in[3] * tp[3];
-    sum1[1] += (-in[2] * tp[3]) + in[3] * tp[2];
-
-
-    in += 4;
-    tp += 4;
-
-  }
-
-
-  res[0] = sum0[0] + sum1[0];
-  res[1] = sum0[1] + sum1[1];
-
-
-
-  for(i = 0; i < isodd; ++i) {
-
-
-    *result += input[(num_bytes >> 3) - 1] * lv_conj(taps[(num_bytes >> 3) - 
1]);
-
-  }
-  /*
-  for(i = 0; i < num_bytes >> 3; ++i) {
-    *result += input[i] * conjf(taps[i]);
-  }
-  */
-}
-
-#endif /*LV_HAVE_GENERIC*/
-
-#ifdef LV_HAVE_SSE3
-
-#include <xmmintrin.h>
-#include <pmmintrin.h>
-#include <mmintrin.h>
-
-
-static inline void volk_32fc_x2_conjugate_dot_prod_32fc_u_sse3(lv_32fc_t* 
result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) 
{
-
-  unsigned int num_bytes = num_points*8;
-
-  // Variable never used?
-  //__VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 
0x80000000, 0x00000000, 0x80000000};
-
-  union HalfMask {
-    uint32_t intRep[4];
-    __m128 vec;
-    } halfMask;
-
-  union NegMask {
-    int intRep[4];
-    __m128 vec;
-  } negMask;
-
-  unsigned int offset = 0;
-  float Rsum=0, Isum=0;
-  float Im,Re;
-
-  __m128 in1, in2, Rv, fehg, Iv, Rs, Ivm, Is;
-  __m128 zv = {0,0,0,0};
-
-  halfMask.intRep[0] = halfMask.intRep[1] = 0xFFFFFFFF;
-  halfMask.intRep[2] = halfMask.intRep[3] = 0x00000000;
-
-  negMask.intRep[0] = negMask.intRep[2] = 0x80000000;
-  negMask.intRep[1] = negMask.intRep[3] = 0;
-
-  // main loop
-  while(num_bytes >= 4*sizeof(float)){
-
-    in1 = _mm_loadu_ps( (float*) (input+offset) );
-    in2 = _mm_loadu_ps( (float*) (taps+offset) );
-    Rv = _mm_mul_ps(in1, in2);
-    fehg = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(2,3,0,1));
-    Iv = _mm_mul_ps(in1, fehg);
-    Rs = _mm_hadd_ps( _mm_hadd_ps(Rv, zv) ,zv);
-    Ivm = _mm_xor_ps( negMask.vec, Iv );
-    Is = _mm_hadd_ps( _mm_hadd_ps(Ivm, zv) ,zv);
-    _mm_store_ss( &Im, Is );
-    _mm_store_ss( &Re, Rs );
-    num_bytes -= 4*sizeof(float);
-    offset += 2;
-    Rsum += Re;
-    Isum += Im;
-  }
-
-  // handle the last complex case ...
-  if(num_bytes > 0){
-
-    if(num_bytes != 4){
-      // bad things are happening
-    }
-
-    in1 = _mm_loadu_ps( (float*) (input+offset) );
-    in2 = _mm_loadu_ps( (float*) (taps+offset) );
-    Rv = _mm_and_ps(_mm_mul_ps(in1, in2), halfMask.vec);
-    fehg = _mm_shuffle_ps(in2, in2, _MM_SHUFFLE(2,3,0,1));
-    Iv = _mm_and_ps(_mm_mul_ps(in1, fehg), halfMask.vec);
-    Rs = _mm_hadd_ps(_mm_hadd_ps(Rv, zv),zv);
-    Ivm = _mm_xor_ps( negMask.vec, Iv );
-    Is = _mm_hadd_ps(_mm_hadd_ps(Ivm, zv),zv);
-    _mm_store_ss( &Im, Is );
-    _mm_store_ss( &Re, Rs );
-    Rsum += Re;
-    Isum += Im;
-  }
-
-  result[0] = lv_cmake(Rsum,Isum);
-  return;
-}
-
-#endif /*LV_HAVE_SSE3*/
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-static inline void volk_32fc_x2_conjugate_dot_prod_32fc_neon(lv_32fc_t* 
result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) 
{
-
-    unsigned int quarter_points = num_points / 4;
-    unsigned int number;
-
-    lv_32fc_t* a_ptr = (lv_32fc_t*) taps;
-    lv_32fc_t* b_ptr = (lv_32fc_t*) input;
-    // for 2-lane vectors, 1st lane holds the real part,
-    // 2nd lane holds the imaginary part
-    float32x4x2_t a_val, b_val, accumulator;
-    float32x4x2_t tmp_imag;
-    accumulator.val[0] = vdupq_n_f32(0);
-    accumulator.val[1] = vdupq_n_f32(0);
-
-    for(number = 0; number < quarter_points; ++number) {
-        a_val = vld2q_f32((float*)a_ptr); // a0r|a1r|a2r|a3r || a0i|a1i|a2i|a3i
-        b_val = vld2q_f32((float*)b_ptr); // b0r|b1r|b2r|b3r || b0i|b1i|b2i|b3i
-        __builtin_prefetch(a_ptr+8);
-        __builtin_prefetch(b_ptr+8);
-
-        // do the first multiply
-        tmp_imag.val[1] = vmulq_f32(a_val.val[1], b_val.val[0]);
-        tmp_imag.val[0] = vmulq_f32(a_val.val[0], b_val.val[0]);
-
-        // use multiply accumulate/subtract to get result
-        tmp_imag.val[1] = vmlsq_f32(tmp_imag.val[1], a_val.val[0], 
b_val.val[1]);
-        tmp_imag.val[0] = vmlaq_f32(tmp_imag.val[0], a_val.val[1], 
b_val.val[1]);
-
-        accumulator.val[0] = vaddq_f32(accumulator.val[0], tmp_imag.val[0]);
-        accumulator.val[1] = vaddq_f32(accumulator.val[1], tmp_imag.val[1]);
-
-        // increment pointers
-        a_ptr += 4;
-        b_ptr += 4;
-    }
-    lv_32fc_t accum_result[4];
-    vst2q_f32((float*)accum_result, accumulator);
-    *result = accum_result[0] + accum_result[1] + accum_result[2] + 
accum_result[3];
-
-    // tail case
-    for(number = quarter_points*4; number < num_points; ++number) {
-      *result += (*a_ptr++) * lv_conj(*b_ptr++);
-    }
-    *result = lv_conj(*result);
-
-}
-#endif /*LV_HAVE_NEON*/
-
-#endif /*INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_u_H*/
-
-#ifndef INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a_H
-#define INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a_H
-
-#include <volk/volk_common.h>
-#include<volk/volk_complex.h>
-#include<stdio.h>
-
-
-#ifdef LV_HAVE_GENERIC
-
-
-static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_generic(lv_32fc_t* 
result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) 
{
-
-  const unsigned int num_bytes = num_points*8;
-
-  float * res = (float*) result;
-  float * in = (float*) input;
-  float * tp = (float*) taps;
-  unsigned int n_2_ccomplex_blocks = num_bytes >> 4;
-  unsigned int isodd = (num_bytes >> 3) &1;
-
-
-
-  float sum0[2] = {0,0};
-  float sum1[2] = {0,0};
-  unsigned int i = 0;
-
-
-  for(i = 0; i < n_2_ccomplex_blocks; ++i) {
-
-
-    sum0[0] += in[0] * tp[0] + in[1] * tp[1];
-    sum0[1] += (-in[0] * tp[1]) + in[1] * tp[0];
-    sum1[0] += in[2] * tp[2] + in[3] * tp[3];
-    sum1[1] += (-in[2] * tp[3]) + in[3] * tp[2];
-
-
-    in += 4;
-    tp += 4;
-
-  }
-
-
-  res[0] = sum0[0] + sum1[0];
-  res[1] = sum0[1] + sum1[1];
-
-
-
-  for(i = 0; i < isodd; ++i) {
-
-
-    *result += input[(num_bytes >> 3) - 1] * lv_conj(taps[(num_bytes >> 3) - 
1]);
-
-  }
-  /*
-  for(i = 0; i < num_bytes >> 3; ++i) {
-    *result += input[i] * conjf(taps[i]);
-  }
-  */
-}
-
-#endif /*LV_HAVE_GENERIC*/
-
-
-#if LV_HAVE_SSE && LV_HAVE_64
-
-
-static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_sse(lv_32fc_t* 
result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) 
{
-
-  const unsigned int num_bytes = num_points*8;
-
-  __VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 
0x80000000, 0x00000000, 0x80000000};
-
-
-
-
-  asm volatile
-    (
-     "#  ccomplex_conjugate_dotprod_generic (float* result, const float 
*input,\n\t"
-     "#                         const float *taps, unsigned num_bytes)\n\t"
-     "#    float sum0 = 0;\n\t"
-     "#    float sum1 = 0;\n\t"
-     "#    float sum2 = 0;\n\t"
-     "#    float sum3 = 0;\n\t"
-     "#    do {\n\t"
-     "#      sum0 += input[0] * taps[0] - input[1] * taps[1];\n\t"
-     "#      sum1 += input[0] * taps[1] + input[1] * taps[0];\n\t"
-     "#      sum2 += input[2] * taps[2] - input[3] * taps[3];\n\t"
-     "#      sum3 += input[2] * taps[3] + input[3] * taps[2];\n\t"
-     "#      input += 4;\n\t"
-     "#      taps += 4;  \n\t"
-     "#    } while (--n_2_ccomplex_blocks != 0);\n\t"
-     "#    result[0] = sum0 + sum2;\n\t"
-     "#    result[1] = sum1 + sum3;\n\t"
-     "# TODO: prefetch and better scheduling\n\t"
-     "  xor    %%r9,  %%r9\n\t"
-     "  xor    %%r10, %%r10\n\t"
-     "  movq   %[conjugator], %%r9\n\t"
-     "  movq   %%rcx, %%rax\n\t"
-     "  movaps 0(%%r9), %%xmm8\n\t"
-     "  movq   %%rcx, %%r8\n\t"
-     "  movq   %[rsi],  %%r9\n\t"
-     "  movq   %[rdx], %%r10\n\t"
-     " xorps   %%xmm6, %%xmm6          # zero accumulators\n\t"
-     " movaps  0(%%r9), %%xmm0\n\t"
-     " xorps   %%xmm7, %%xmm7          # zero accumulators\n\t"
-     " movups  0(%%r10), %%xmm2\n\t"
-     " shr     $5, %%rax               # rax = n_2_ccomplex_blocks / 2\n\t"
-     "  shr     $4, %%r8\n\t"
-     "  xorps  %%xmm8, %%xmm2\n\t"
-     " jmp     .%=L1_test\n\t"
-     " # 4 taps / loop\n\t"
-     " # something like ?? cycles / loop\n\t"
-     ".%=Loop1:        \n\t"
-     "# complex prod: C += A * B,  w/ temp Z & Y (or B), 
xmmPN=$0x8000000080000000\n\t"
-     "#        movaps  (%%r9), %%xmmA\n\t"
-     "#        movaps  (%%r10), %%xmmB\n\t"
-     "#        movaps  %%xmmA, %%xmmZ\n\t"
-     "#        shufps  $0xb1, %%xmmZ, %%xmmZ   # swap internals\n\t"
-     "#        mulps   %%xmmB, %%xmmA\n\t"
-     "#        mulps   %%xmmZ, %%xmmB\n\t"
-     "#        # SSE replacement for: pfpnacc %%xmmB, %%xmmA\n\t"
-     "#        xorps   %%xmmPN, %%xmmA\n\t"
-     "#        movaps  %%xmmA, %%xmmZ\n\t"
-     "#        unpcklps %%xmmB, %%xmmA\n\t"
-     "#        unpckhps %%xmmB, %%xmmZ\n\t"
-     "#        movaps  %%xmmZ, %%xmmY\n\t"
-     "#        shufps  $0x44, %%xmmA, %%xmmZ   # b01000100\n\t"
-     "#        shufps  $0xee, %%xmmY, %%xmmA   # b11101110\n\t"
-     "#        addps   %%xmmZ, %%xmmA\n\t"
-     "#        addps   %%xmmA, %%xmmC\n\t"
-     "# A=xmm0, B=xmm2, Z=xmm4\n\t"
-     "# A'=xmm1, B'=xmm3, Z'=xmm5\n\t"
-     " movaps  16(%%r9), %%xmm1\n\t"
-     " movaps  %%xmm0, %%xmm4\n\t"
-     " mulps   %%xmm2, %%xmm0\n\t"
-     " shufps  $0xb1, %%xmm4, %%xmm4   # swap internals\n\t"
-     " movaps  16(%%r10), %%xmm3\n\t"
-     " movaps  %%xmm1, %%xmm5\n\t"
-     "  xorps   %%xmm8, %%xmm3\n\t"
-     " addps   %%xmm0, %%xmm6\n\t"
-     " mulps   %%xmm3, %%xmm1\n\t"
-     " shufps  $0xb1, %%xmm5, %%xmm5   # swap internals\n\t"
-     " addps   %%xmm1, %%xmm6\n\t"
-     " mulps   %%xmm4, %%xmm2\n\t"
-     " movaps  32(%%r9), %%xmm0\n\t"
-     " addps   %%xmm2, %%xmm7\n\t"
-     " mulps   %%xmm5, %%xmm3\n\t"
-     " add     $32, %%r9\n\t"
-     " movaps  32(%%r10), %%xmm2\n\t"
-     " addps   %%xmm3, %%xmm7\n\t"
-     " add     $32, %%r10\n\t"
-     "  xorps   %%xmm8, %%xmm2\n\t"
-     ".%=L1_test:\n\t"
-     " dec     %%rax\n\t"
-     " jge     .%=Loop1\n\t"
-     " # We've handled the bulk of multiplies up to here.\n\t"
-     " # Let's sse if original n_2_ccomplex_blocks was odd.\n\t"
-     " # If so, we've got 2 more taps to do.\n\t"
-     " and     $1, %%r8\n\t"
-     " je      .%=Leven\n\t"
-     " # The count was odd, do 2 more taps.\n\t"
-     " # Note that we've already got mm0/mm2 preloaded\n\t"
-     " # from the main loop.\n\t"
-     " movaps  %%xmm0, %%xmm4\n\t"
-     " mulps   %%xmm2, %%xmm0\n\t"
-     " shufps  $0xb1, %%xmm4, %%xmm4   # swap internals\n\t"
-     " addps   %%xmm0, %%xmm6\n\t"
-     " mulps   %%xmm4, %%xmm2\n\t"
-     " addps   %%xmm2, %%xmm7\n\t"
-     ".%=Leven:\n\t"
-     " # neg inversor\n\t"
-     " xorps   %%xmm1, %%xmm1\n\t"
-     " mov     $0x80000000, %%r9\n\t"
-     " movd    %%r9, %%xmm1\n\t"
-     " shufps  $0x11, %%xmm1, %%xmm1   # b00010001 # 0 -0 0 -0\n\t"
-     " # pfpnacc\n\t"
-     " xorps   %%xmm1, %%xmm6\n\t"
-     " movaps  %%xmm6, %%xmm2\n\t"
-     " unpcklps %%xmm7, %%xmm6\n\t"
-     " unpckhps %%xmm7, %%xmm2\n\t"
-     " movaps  %%xmm2, %%xmm3\n\t"
-     " shufps  $0x44, %%xmm6, %%xmm2   # b01000100\n\t"
-     " shufps  $0xee, %%xmm3, %%xmm6   # b11101110\n\t"
-     " addps   %%xmm2, %%xmm6\n\t"
-     "                                 # xmm6 = r1 i2 r3 i4\n\t"
-     " movhlps %%xmm6, %%xmm4          # xmm4 = r3 i4 ?? ??\n\t"
-     " addps   %%xmm4, %%xmm6          # xmm6 = r1+r3 i2+i4 ?? ??\n\t"
-     " movlps  %%xmm6, (%[rdi])                # store low 2x32 bits (complex) 
to memory\n\t"
-     :
-     :[rsi] "r" (input), [rdx] "r" (taps), "c" (num_bytes), [rdi] "r" 
(result), [conjugator] "r" (conjugator)
-     :"rax", "r8", "r9", "r10"
-     );
-
-
-  int getem = num_bytes % 16;
-
-
-  for(; getem > 0; getem -= 8) {
-
-
-    *result += (input[(num_bytes >> 3) - 1] * lv_conj(taps[(num_bytes >> 3) - 
1]));
-
-  }
-
-  return;
-}
-#endif
-
-#if LV_HAVE_SSE && LV_HAVE_32
-static inline void volk_32fc_x2_conjugate_dot_prod_32fc_a_sse_32(lv_32fc_t* 
result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) 
{
-
-  const unsigned int num_bytes = num_points*8;
-
-  __VOLK_ATTR_ALIGNED(16) static const uint32_t conjugator[4]= {0x00000000, 
0x80000000, 0x00000000, 0x80000000};
-
-  int bound = num_bytes >> 4;
-  int leftovers = num_bytes % 16;
-
-
-  asm volatile
-    (
-     " #pushl  %%ebp\n\t"
-     " #movl   %%esp, %%ebp\n\t"
-     " #movl   12(%%ebp), %%eax                # input\n\t"
-     " #movl   16(%%ebp), %%edx                # taps\n\t"
-     " #movl   20(%%ebp), %%ecx                # n_bytes\n\t"
-     "  movaps  0(%[conjugator]), %%xmm1\n\t"
-     " xorps   %%xmm6, %%xmm6          # zero accumulators\n\t"
-     " movaps  0(%[eax]), %%xmm0\n\t"
-     " xorps   %%xmm7, %%xmm7          # zero accumulators\n\t"
-     " movaps  0(%[edx]), %%xmm2\n\t"
-     "  movl    %[ecx], (%[out])\n\t"
-     " shrl    $5, %[ecx]              # ecx = n_2_ccomplex_blocks / 2\n\t"
-
-     "  xorps   %%xmm1, %%xmm2\n\t"
-     " jmp     .%=L1_test\n\t"
-     " # 4 taps / loop\n\t"
-     " # something like ?? cycles / loop\n\t"
-     ".%=Loop1:        \n\t"
-     "# complex prod: C += A * B,  w/ temp Z & Y (or B), 
xmmPN=$0x8000000080000000\n\t"
-     "#        movaps  (%[eax]), %%xmmA\n\t"
-     "#        movaps  (%[edx]), %%xmmB\n\t"
-     "#        movaps  %%xmmA, %%xmmZ\n\t"
-     "#        shufps  $0xb1, %%xmmZ, %%xmmZ   # swap internals\n\t"
-     "#        mulps   %%xmmB, %%xmmA\n\t"
-     "#        mulps   %%xmmZ, %%xmmB\n\t"
-     "#        # SSE replacement for: pfpnacc %%xmmB, %%xmmA\n\t"
-     "#        xorps   %%xmmPN, %%xmmA\n\t"
-     "#        movaps  %%xmmA, %%xmmZ\n\t"
-     "#        unpcklps %%xmmB, %%xmmA\n\t"
-     "#        unpckhps %%xmmB, %%xmmZ\n\t"
-     "#        movaps  %%xmmZ, %%xmmY\n\t"
-     "#        shufps  $0x44, %%xmmA, %%xmmZ   # b01000100\n\t"
-     "#        shufps  $0xee, %%xmmY, %%xmmA   # b11101110\n\t"
-     "#        addps   %%xmmZ, %%xmmA\n\t"
-     "#        addps   %%xmmA, %%xmmC\n\t"
-     "# A=xmm0, B=xmm2, Z=xmm4\n\t"
-     "# A'=xmm1, B'=xmm3, Z'=xmm5\n\t"
-     " movaps  16(%[edx]), %%xmm3\n\t"
-     " movaps  %%xmm0, %%xmm4\n\t"
-     "  xorps   %%xmm1, %%xmm3\n\t"
-     " mulps   %%xmm2, %%xmm0\n\t"
-     " movaps  16(%[eax]), %%xmm1\n\t"
-     " shufps  $0xb1, %%xmm4, %%xmm4   # swap internals\n\t"
-     " movaps  %%xmm1, %%xmm5\n\t"
-     " addps   %%xmm0, %%xmm6\n\t"
-     " mulps   %%xmm3, %%xmm1\n\t"
-     " shufps  $0xb1, %%xmm5, %%xmm5   # swap internals\n\t"
-     " addps   %%xmm1, %%xmm6\n\t"
-     "  movaps  0(%[conjugator]), %%xmm1\n\t"
-     " mulps   %%xmm4, %%xmm2\n\t"
-     " movaps  32(%[eax]), %%xmm0\n\t"
-     " addps   %%xmm2, %%xmm7\n\t"
-     " mulps   %%xmm5, %%xmm3\n\t"
-     " addl    $32, %[eax]\n\t"
-     " movaps  32(%[edx]), %%xmm2\n\t"
-     " addps   %%xmm3, %%xmm7\n\t"
-     "  xorps   %%xmm1, %%xmm2\n\t"
-     " addl    $32, %[edx]\n\t"
-     ".%=L1_test:\n\t"
-     " decl    %[ecx]\n\t"
-     " jge     .%=Loop1\n\t"
-     " # We've handled the bulk of multiplies up to here.\n\t"
-     " # Let's sse if original n_2_ccomplex_blocks was odd.\n\t"
-     " # If so, we've got 2 more taps to do.\n\t"
-     " movl    0(%[out]), %[ecx]               # n_2_ccomplex_blocks\n\t"
-     "  shrl    $4, %[ecx]\n\t"
-     " andl    $1, %[ecx]\n\t"
-     " je      .%=Leven\n\t"
-     " # The count was odd, do 2 more taps.\n\t"
-     " # Note that we've already got mm0/mm2 preloaded\n\t"
-     " # from the main loop.\n\t"
-     " movaps  %%xmm0, %%xmm4\n\t"
-     " mulps   %%xmm2, %%xmm0\n\t"
-     " shufps  $0xb1, %%xmm4, %%xmm4   # swap internals\n\t"
-     " addps   %%xmm0, %%xmm6\n\t"
-     " mulps   %%xmm4, %%xmm2\n\t"
-     " addps   %%xmm2, %%xmm7\n\t"
-     ".%=Leven:\n\t"
-     " # neg inversor\n\t"
-     "  #movl 8(%%ebp), %[eax] \n\t"
-     " xorps   %%xmm1, %%xmm1\n\t"
-     "  movl   $0x80000000, (%[out])\n\t"
-     " movss   (%[out]), %%xmm1\n\t"
-     " shufps  $0x11, %%xmm1, %%xmm1   # b00010001 # 0 -0 0 -0\n\t"
-     " # pfpnacc\n\t"
-     " xorps   %%xmm1, %%xmm6\n\t"
-     " movaps  %%xmm6, %%xmm2\n\t"
-     " unpcklps %%xmm7, %%xmm6\n\t"
-     " unpckhps %%xmm7, %%xmm2\n\t"
-     " movaps  %%xmm2, %%xmm3\n\t"
-     " shufps  $0x44, %%xmm6, %%xmm2   # b01000100\n\t"
-     " shufps  $0xee, %%xmm3, %%xmm6   # b11101110\n\t"
-     " addps   %%xmm2, %%xmm6\n\t"
-     "                                 # xmm6 = r1 i2 r3 i4\n\t"
-     " #movl   8(%%ebp), %[eax]                # @result\n\t"
-     " movhlps %%xmm6, %%xmm4          # xmm4 = r3 i4 ?? ??\n\t"
-     " addps   %%xmm4, %%xmm6          # xmm6 = r1+r3 i2+i4 ?? ??\n\t"
-     " movlps  %%xmm6, (%[out])                # store low 2x32 bits (complex) 
to memory\n\t"
-     " #popl   %%ebp\n\t"
-     :
-     : [eax] "r" (input), [edx] "r" (taps), [ecx] "r" (num_bytes), [out] "r" 
(result), [conjugator] "r" (conjugator)
-     );
-
-
-
-
-  printf("%d, %d\n", leftovers, bound);
-
-  for(; leftovers > 0; leftovers -= 8) {
-
-
-    *result += (input[(bound << 1)] * lv_conj(taps[(bound << 1)]));
-
-  }
-
-  return;
-
-
-
-
-
-
-}
-
-#endif /*LV_HAVE_SSE*/
-
-
-
-#endif /*INCLUDED_volk_32fc_x2_conjugate_dot_prod_32fc_a_H*/
diff --git a/volk/kernels/volk/volk_32fc_x2_dot_prod_32fc.h 
b/volk/kernels/volk/volk_32fc_x2_dot_prod_32fc.h
deleted file mode 100644
index c65d098..0000000
--- a/volk/kernels/volk/volk_32fc_x2_dot_prod_32fc.h
+++ /dev/null
@@ -1,1116 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_x2_dot_prod_32fc_u_H
-#define INCLUDED_volk_32fc_x2_dot_prod_32fc_u_H
-
-#include <volk/volk_common.h>
-#include <volk/volk_complex.h>
-#include <stdio.h>
-#include <string.h>
-
-
-#ifdef LV_HAVE_GENERIC
-
-
-static inline void volk_32fc_x2_dot_prod_32fc_generic(lv_32fc_t* result, const 
lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-
-  float * res = (float*) result;
-  float * in = (float*) input;
-  float * tp = (float*) taps;
-  unsigned int n_2_ccomplex_blocks = num_points/2;
-  unsigned int isodd = num_points & 1;
-
-  float sum0[2] = {0,0};
-  float sum1[2] = {0,0};
-  unsigned int i = 0;
-
-  for(i = 0; i < n_2_ccomplex_blocks; ++i) {
-    sum0[0] += in[0] * tp[0] - in[1] * tp[1];
-    sum0[1] += in[0] * tp[1] + in[1] * tp[0];
-    sum1[0] += in[2] * tp[2] - in[3] * tp[3];
-    sum1[1] += in[2] * tp[3] + in[3] * tp[2];
-
-    in += 4;
-    tp += 4;
-  }
-
-  res[0] = sum0[0] + sum1[0];
-  res[1] = sum0[1] + sum1[1];
-
-  // Cleanup if we had an odd number of points
-  for(i = 0; i < isodd; ++i) {
-    *result += input[num_points - 1] * taps[num_points - 1];
-  }
-}
-
-#endif /*LV_HAVE_GENERIC*/
-
-
-
-#if LV_HAVE_SSE && LV_HAVE_64
-
-static inline void volk_32fc_x2_dot_prod_32fc_u_sse_64(lv_32fc_t* result, 
const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-
-  const unsigned int num_bytes = num_points*8;
-  unsigned int isodd = num_points & 1;
-
-  asm
-    (
-     "#  ccomplex_dotprod_generic (float* result, const float *input,\n\t"
-     "#                         const float *taps, unsigned num_bytes)\n\t"
-     "#    float sum0 = 0;\n\t"
-     "#    float sum1 = 0;\n\t"
-     "#    float sum2 = 0;\n\t"
-     "#    float sum3 = 0;\n\t"
-     "#    do {\n\t"
-     "#      sum0 += input[0] * taps[0] - input[1] * taps[1];\n\t"
-     "#      sum1 += input[0] * taps[1] + input[1] * taps[0];\n\t"
-     "#      sum2 += input[2] * taps[2] - input[3] * taps[3];\n\t"
-     "#      sum3 += input[2] * taps[3] + input[3] * taps[2];\n\t"
-     "#      input += 4;\n\t"
-     "#      taps += 4;  \n\t"
-     "#    } while (--n_2_ccomplex_blocks != 0);\n\t"
-     "#    result[0] = sum0 + sum2;\n\t"
-     "#    result[1] = sum1 + sum3;\n\t"
-     "# TODO: prefetch and better scheduling\n\t"
-     "  xor    %%r9,  %%r9\n\t"
-     "  xor    %%r10, %%r10\n\t"
-     "  movq   %%rcx, %%rax\n\t"
-     "  movq   %%rcx, %%r8\n\t"
-     "  movq   %[rsi],  %%r9\n\t"
-     "  movq   %[rdx], %%r10\n\t"
-     " xorps   %%xmm6, %%xmm6          # zero accumulators\n\t"
-     " movups  0(%%r9), %%xmm0\n\t"
-     " xorps   %%xmm7, %%xmm7          # zero accumulators\n\t"
-     " movups  0(%%r10), %%xmm2\n\t"
-     " shr     $5, %%rax               # rax = n_2_ccomplex_blocks / 2\n\t"
-     "  shr     $4, %%r8\n\t"
-     " jmp     .%=L1_test\n\t"
-     " # 4 taps / loop\n\t"
-     " # something like ?? cycles / loop\n\t"
-     ".%=Loop1:        \n\t"
-     "# complex prod: C += A * B,  w/ temp Z & Y (or B), 
xmmPN=$0x8000000080000000\n\t"
-     "#        movups  (%%r9), %%xmmA\n\t"
-     "#        movups  (%%r10), %%xmmB\n\t"
-     "#        movups  %%xmmA, %%xmmZ\n\t"
-     "#        shufps  $0xb1, %%xmmZ, %%xmmZ   # swap internals\n\t"
-     "#        mulps   %%xmmB, %%xmmA\n\t"
-     "#        mulps   %%xmmZ, %%xmmB\n\t"
-     "#        # SSE replacement for: pfpnacc %%xmmB, %%xmmA\n\t"
-     "#        xorps   %%xmmPN, %%xmmA\n\t"
-     "#        movups  %%xmmA, %%xmmZ\n\t"
-     "#        unpcklps %%xmmB, %%xmmA\n\t"
-     "#        unpckhps %%xmmB, %%xmmZ\n\t"
-     "#        movups  %%xmmZ, %%xmmY\n\t"
-     "#        shufps  $0x44, %%xmmA, %%xmmZ   # b01000100\n\t"
-     "#        shufps  $0xee, %%xmmY, %%xmmA   # b11101110\n\t"
-     "#        addps   %%xmmZ, %%xmmA\n\t"
-     "#        addps   %%xmmA, %%xmmC\n\t"
-     "# A=xmm0, B=xmm2, Z=xmm4\n\t"
-     "# A'=xmm1, B'=xmm3, Z'=xmm5\n\t"
-     " movups  16(%%r9), %%xmm1\n\t"
-     " movups  %%xmm0, %%xmm4\n\t"
-     " mulps   %%xmm2, %%xmm0\n\t"
-     " shufps  $0xb1, %%xmm4, %%xmm4   # swap internals\n\t"
-     " movups  16(%%r10), %%xmm3\n\t"
-     " movups  %%xmm1, %%xmm5\n\t"
-     " addps   %%xmm0, %%xmm6\n\t"
-     " mulps   %%xmm3, %%xmm1\n\t"
-     " shufps  $0xb1, %%xmm5, %%xmm5   # swap internals\n\t"
-     " addps   %%xmm1, %%xmm6\n\t"
-     " mulps   %%xmm4, %%xmm2\n\t"
-     " movups  32(%%r9), %%xmm0\n\t"
-     " addps   %%xmm2, %%xmm7\n\t"
-     " mulps   %%xmm5, %%xmm3\n\t"
-     " add     $32, %%r9\n\t"
-     " movups  32(%%r10), %%xmm2\n\t"
-     " addps   %%xmm3, %%xmm7\n\t"
-     " add     $32, %%r10\n\t"
-     ".%=L1_test:\n\t"
-     " dec     %%rax\n\t"
-     " jge     .%=Loop1\n\t"
-     " # We've handled the bulk of multiplies up to here.\n\t"
-     " # Let's sse if original n_2_ccomplex_blocks was odd.\n\t"
-     " # If so, we've got 2 more taps to do.\n\t"
-     " and     $1, %%r8\n\t"
-     " je      .%=Leven\n\t"
-     " # The count was odd, do 2 more taps.\n\t"
-     " # Note that we've already got mm0/mm2 preloaded\n\t"
-     " # from the main loop.\n\t"
-     " movups  %%xmm0, %%xmm4\n\t"
-     " mulps   %%xmm2, %%xmm0\n\t"
-     " shufps  $0xb1, %%xmm4, %%xmm4   # swap internals\n\t"
-     " addps   %%xmm0, %%xmm6\n\t"
-     " mulps   %%xmm4, %%xmm2\n\t"
-     " addps   %%xmm2, %%xmm7\n\t"
-     ".%=Leven:\n\t"
-     " # neg inversor\n\t"
-     " xorps   %%xmm1, %%xmm1\n\t"
-     " mov     $0x80000000, %%r9\n\t"
-     " movd    %%r9, %%xmm1\n\t"
-     " shufps  $0x11, %%xmm1, %%xmm1   # b00010001 # 0 -0 0 -0\n\t"
-     " # pfpnacc\n\t"
-     " xorps   %%xmm1, %%xmm6\n\t"
-     " movups  %%xmm6, %%xmm2\n\t"
-     " unpcklps %%xmm7, %%xmm6\n\t"
-     " unpckhps %%xmm7, %%xmm2\n\t"
-     " movups  %%xmm2, %%xmm3\n\t"
-     " shufps  $0x44, %%xmm6, %%xmm2   # b01000100\n\t"
-     " shufps  $0xee, %%xmm3, %%xmm6   # b11101110\n\t"
-     " addps   %%xmm2, %%xmm6\n\t"
-     "                                 # xmm6 = r1 i2 r3 i4\n\t"
-     " movhlps %%xmm6, %%xmm4          # xmm4 = r3 i4 ?? ??\n\t"
-     " addps   %%xmm4, %%xmm6          # xmm6 = r1+r3 i2+i4 ?? ??\n\t"
-     " movlps  %%xmm6, (%[rdi])                # store low 2x32 bits (complex) 
to memory\n\t"
-     :
-     :[rsi] "r" (input), [rdx] "r" (taps), "c" (num_bytes), [rdi] "r" (result)
-     :"rax", "r8", "r9", "r10"
-     );
-
-
-  if(isodd) {
-    *result += input[num_points - 1] * taps[num_points - 1];
-  }
-
-  return;
-
-}
-
-#endif /* LV_HAVE_SSE && LV_HAVE_64 */
-
-
-
-
-#ifdef LV_HAVE_SSE3
-
-#include <pmmintrin.h>
-
-static inline void volk_32fc_x2_dot_prod_32fc_u_sse3(lv_32fc_t* result, const 
lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-
-  lv_32fc_t dotProduct;
-  memset(&dotProduct, 0x0, 2*sizeof(float));
-
-  unsigned int number = 0;
-  const unsigned int halfPoints = num_points/2;
-  unsigned int isodd = num_points & 1;
-
-  __m128 x, y, yl, yh, z, tmp1, tmp2, dotProdVal;
-
-  const lv_32fc_t* a = input;
-  const lv_32fc_t* b = taps;
-
-  dotProdVal = _mm_setzero_ps();
-
-  for(;number < halfPoints; number++){
-
-    x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
-    y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
-
-    yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-    yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-
-    tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-
-    x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-
-    tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-
-    z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, 
bi*dr+br*di
-
-    dotProdVal = _mm_add_ps(dotProdVal, z); // Add the complex multiplication 
results together
-
-    a += 2;
-    b += 2;
-  }
-
-  __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector[2];
-
-  _mm_storeu_ps((float*)dotProductVector,dotProdVal); // Store the results 
back into the dot product vector
-
-  dotProduct += ( dotProductVector[0] + dotProductVector[1] );
-
-  if(isodd) {
-    dotProduct += input[num_points - 1] * taps[num_points - 1];
-  }
-
-  *result = dotProduct;
-}
-
-#endif /*LV_HAVE_SSE3*/
-
-#ifdef LV_HAVE_SSE4_1
-
-#include <smmintrin.h>
-
-static inline void volk_32fc_x2_dot_prod_32fc_u_sse4_1(lv_32fc_t* result, 
const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-
-  unsigned int i = 0;
-  const unsigned int qtr_points = num_points/4;
-  const unsigned int isodd = num_points & 3;
-
-  __m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, real0, real1, im0, 
im1;
-  float *p_input, *p_taps;
-  __m64 *p_result;
-
-  p_result = (__m64*)result;
-  p_input = (float*)input;
-  p_taps = (float*)taps;
-
-  static const __m128i neg = {0x000000000000000080000000};
-
-  real0 = _mm_setzero_ps();
-  real1 = _mm_setzero_ps();
-  im0 = _mm_setzero_ps();
-  im1 = _mm_setzero_ps();
-
-  for(; i < qtr_points; ++i) {
-    xmm0 = _mm_loadu_ps(p_input);
-    xmm1 = _mm_loadu_ps(p_taps);
-
-    p_input += 4;
-    p_taps += 4;
-
-    xmm2 = _mm_loadu_ps(p_input);
-    xmm3 = _mm_loadu_ps(p_taps);
-
-    p_input += 4;
-    p_taps += 4;
-
-    xmm4 = _mm_unpackhi_ps(xmm0, xmm2);
-    xmm5 = _mm_unpackhi_ps(xmm1, xmm3);
-    xmm0 = _mm_unpacklo_ps(xmm0, xmm2);
-    xmm2 = _mm_unpacklo_ps(xmm1, xmm3);
-
-    //imaginary vector from input
-    xmm1 = _mm_unpackhi_ps(xmm0, xmm4);
-    //real vector from input
-    xmm3 = _mm_unpacklo_ps(xmm0, xmm4);
-    //imaginary vector from taps
-    xmm0 = _mm_unpackhi_ps(xmm2, xmm5);
-    //real vector from taps
-    xmm2 = _mm_unpacklo_ps(xmm2, xmm5);
-
-    xmm4 = _mm_dp_ps(xmm3, xmm2, 0xf1);
-    xmm5 = _mm_dp_ps(xmm1, xmm0, 0xf1);
-
-    xmm6 = _mm_dp_ps(xmm3, xmm0, 0xf2);
-    xmm7 = _mm_dp_ps(xmm1, xmm2, 0xf2);
-
-    real0 = _mm_add_ps(xmm4, real0);
-    real1 = _mm_add_ps(xmm5, real1);
-    im0 = _mm_add_ps(xmm6, im0);
-    im1 = _mm_add_ps(xmm7, im1);
-  }
-
-  real1 = _mm_xor_ps(real1, bit128_p(&neg)->float_vec);
-
-  im0 = _mm_add_ps(im0, im1);
-  real0 = _mm_add_ps(real0, real1);
-
-  im0 = _mm_add_ps(im0, real0);
-
-  _mm_storel_pi(p_result, im0);
-
-  for(i = num_points-isodd; i < num_points; i++) {
-    *result += input[i] * taps[i];
-  }
-}
-
-#endif /*LV_HAVE_SSE4_1*/
-
-#ifdef LV_HAVE_AVX
-
-#include <immintrin.h>
-
-static inline void volk_32fc_x2_dot_prod_32fc_u_avx(lv_32fc_t* result, const 
lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-
-  unsigned int isodd = num_points & 3;
-  unsigned int i = 0;
-  lv_32fc_t dotProduct;
-  memset(&dotProduct, 0x0, 2*sizeof(float));
-
-  unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;
-
-  __m256 x, y, yl, yh, z, tmp1, tmp2, dotProdVal;
-
-  const lv_32fc_t* a = input;
-  const lv_32fc_t* b = taps;
-
-  dotProdVal = _mm256_setzero_ps();
-
-  for(;number < quarterPoints; number++){
-    x = _mm256_loadu_ps((float*)a); // Load a,b,e,f as ar,ai,br,bi,er,ei,fr,fi
-    y = _mm256_loadu_ps((float*)b); // Load c,d,g,h as cr,ci,dr,di,gr,gi,hr,hi
-
-    yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr,gr,gr,hr,hr
-    yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di,gi,gi,hi,hi
-
-    tmp1 = _mm256_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr ...
-
-    x = _mm256_shuffle_ps(x,x,0xB1); // Re-arrange x to be 
ai,ar,bi,br,ei,er,fi,fr
-
-    tmp2 = _mm256_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di ...
-
-    z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, 
bi*dr+br*di
-
-    dotProdVal = _mm256_add_ps(dotProdVal, z); // Add the complex 
multiplication results together
-
-    a += 4;
-    b += 4;
-  }
-
-  __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector[4];
-
-  _mm256_storeu_ps((float*)dotProductVector,dotProdVal); // Store the results 
back into the dot product vector
-
-  dotProduct += ( dotProductVector[0] + dotProductVector[1] + 
dotProductVector[2] + dotProductVector[3]);
-
-  for(i = num_points-isodd; i < num_points; i++) {
-    dotProduct += input[i] * taps[i];
-  }
-
-  *result = dotProduct;
-}
-
-#endif /*LV_HAVE_AVX*/
-
-
-#endif /*INCLUDED_volk_32fc_x2_dot_prod_32fc_u_H*/
-
-#ifndef INCLUDED_volk_32fc_x2_dot_prod_32fc_a_H
-#define INCLUDED_volk_32fc_x2_dot_prod_32fc_a_H
-
-#include <volk/volk_common.h>
-#include <volk/volk_complex.h>
-#include <stdio.h>
-#include <string.h>
-
-
-#ifdef LV_HAVE_GENERIC
-
-
-static inline void volk_32fc_x2_dot_prod_32fc_a_generic(lv_32fc_t* result, 
const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-
-  const unsigned int num_bytes = num_points*8;
-
-  float * res = (float*) result;
-  float * in = (float*) input;
-  float * tp = (float*) taps;
-  unsigned int n_2_ccomplex_blocks = num_bytes >> 4;
-  unsigned int isodd = num_points & 1;
-
-  float sum0[2] = {0,0};
-  float sum1[2] = {0,0};
-  unsigned int i = 0;
-
-  for(i = 0; i < n_2_ccomplex_blocks; ++i) {
-    sum0[0] += in[0] * tp[0] - in[1] * tp[1];
-    sum0[1] += in[0] * tp[1] + in[1] * tp[0];
-    sum1[0] += in[2] * tp[2] - in[3] * tp[3];
-    sum1[1] += in[2] * tp[3] + in[3] * tp[2];
-
-    in += 4;
-    tp += 4;
-  }
-
-  res[0] = sum0[0] + sum1[0];
-  res[1] = sum0[1] + sum1[1];
-
-  for(i = 0; i < isodd; ++i) {
-    *result += input[num_points - 1] * taps[num_points - 1];
-  }
-}
-
-#endif /*LV_HAVE_GENERIC*/
-
-
-#if LV_HAVE_SSE && LV_HAVE_64
-
-
-static inline void volk_32fc_x2_dot_prod_32fc_a_sse_64(lv_32fc_t* result, 
const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-
-  const unsigned int num_bytes = num_points*8;
-  unsigned int isodd = num_points & 1;
-
-  asm
-    (
-     "#  ccomplex_dotprod_generic (float* result, const float *input,\n\t"
-     "#                         const float *taps, unsigned num_bytes)\n\t"
-     "#    float sum0 = 0;\n\t"
-     "#    float sum1 = 0;\n\t"
-     "#    float sum2 = 0;\n\t"
-     "#    float sum3 = 0;\n\t"
-     "#    do {\n\t"
-     "#      sum0 += input[0] * taps[0] - input[1] * taps[1];\n\t"
-     "#      sum1 += input[0] * taps[1] + input[1] * taps[0];\n\t"
-     "#      sum2 += input[2] * taps[2] - input[3] * taps[3];\n\t"
-     "#      sum3 += input[2] * taps[3] + input[3] * taps[2];\n\t"
-     "#      input += 4;\n\t"
-     "#      taps += 4;  \n\t"
-     "#    } while (--n_2_ccomplex_blocks != 0);\n\t"
-     "#    result[0] = sum0 + sum2;\n\t"
-     "#    result[1] = sum1 + sum3;\n\t"
-     "# TODO: prefetch and better scheduling\n\t"
-     "  xor    %%r9,  %%r9\n\t"
-     "  xor    %%r10, %%r10\n\t"
-     "  movq   %%rcx, %%rax\n\t"
-     "  movq   %%rcx, %%r8\n\t"
-     "  movq   %[rsi],  %%r9\n\t"
-     "  movq   %[rdx], %%r10\n\t"
-     " xorps   %%xmm6, %%xmm6          # zero accumulators\n\t"
-     " movaps  0(%%r9), %%xmm0\n\t"
-     " xorps   %%xmm7, %%xmm7          # zero accumulators\n\t"
-     " movaps  0(%%r10), %%xmm2\n\t"
-     " shr     $5, %%rax               # rax = n_2_ccomplex_blocks / 2\n\t"
-     "  shr     $4, %%r8\n\t"
-     " jmp     .%=L1_test\n\t"
-     " # 4 taps / loop\n\t"
-     " # something like ?? cycles / loop\n\t"
-     ".%=Loop1:        \n\t"
-     "# complex prod: C += A * B,  w/ temp Z & Y (or B), 
xmmPN=$0x8000000080000000\n\t"
-     "#        movaps  (%%r9), %%xmmA\n\t"
-     "#        movaps  (%%r10), %%xmmB\n\t"
-     "#        movaps  %%xmmA, %%xmmZ\n\t"
-     "#        shufps  $0xb1, %%xmmZ, %%xmmZ   # swap internals\n\t"
-     "#        mulps   %%xmmB, %%xmmA\n\t"
-     "#        mulps   %%xmmZ, %%xmmB\n\t"
-     "#        # SSE replacement for: pfpnacc %%xmmB, %%xmmA\n\t"
-     "#        xorps   %%xmmPN, %%xmmA\n\t"
-     "#        movaps  %%xmmA, %%xmmZ\n\t"
-     "#        unpcklps %%xmmB, %%xmmA\n\t"
-     "#        unpckhps %%xmmB, %%xmmZ\n\t"
-     "#        movaps  %%xmmZ, %%xmmY\n\t"
-     "#        shufps  $0x44, %%xmmA, %%xmmZ   # b01000100\n\t"
-     "#        shufps  $0xee, %%xmmY, %%xmmA   # b11101110\n\t"
-     "#        addps   %%xmmZ, %%xmmA\n\t"
-     "#        addps   %%xmmA, %%xmmC\n\t"
-     "# A=xmm0, B=xmm2, Z=xmm4\n\t"
-     "# A'=xmm1, B'=xmm3, Z'=xmm5\n\t"
-     " movaps  16(%%r9), %%xmm1\n\t"
-     " movaps  %%xmm0, %%xmm4\n\t"
-     " mulps   %%xmm2, %%xmm0\n\t"
-     " shufps  $0xb1, %%xmm4, %%xmm4   # swap internals\n\t"
-     " movaps  16(%%r10), %%xmm3\n\t"
-     " movaps  %%xmm1, %%xmm5\n\t"
-     " addps   %%xmm0, %%xmm6\n\t"
-     " mulps   %%xmm3, %%xmm1\n\t"
-     " shufps  $0xb1, %%xmm5, %%xmm5   # swap internals\n\t"
-     " addps   %%xmm1, %%xmm6\n\t"
-     " mulps   %%xmm4, %%xmm2\n\t"
-     " movaps  32(%%r9), %%xmm0\n\t"
-     " addps   %%xmm2, %%xmm7\n\t"
-     " mulps   %%xmm5, %%xmm3\n\t"
-     " add     $32, %%r9\n\t"
-     " movaps  32(%%r10), %%xmm2\n\t"
-     " addps   %%xmm3, %%xmm7\n\t"
-     " add     $32, %%r10\n\t"
-     ".%=L1_test:\n\t"
-     " dec     %%rax\n\t"
-     " jge     .%=Loop1\n\t"
-     " # We've handled the bulk of multiplies up to here.\n\t"
-     " # Let's sse if original n_2_ccomplex_blocks was odd.\n\t"
-     " # If so, we've got 2 more taps to do.\n\t"
-     " and     $1, %%r8\n\t"
-     " je      .%=Leven\n\t"
-     " # The count was odd, do 2 more taps.\n\t"
-     " # Note that we've already got mm0/mm2 preloaded\n\t"
-     " # from the main loop.\n\t"
-     " movaps  %%xmm0, %%xmm4\n\t"
-     " mulps   %%xmm2, %%xmm0\n\t"
-     " shufps  $0xb1, %%xmm4, %%xmm4   # swap internals\n\t"
-     " addps   %%xmm0, %%xmm6\n\t"
-     " mulps   %%xmm4, %%xmm2\n\t"
-     " addps   %%xmm2, %%xmm7\n\t"
-     ".%=Leven:\n\t"
-     " # neg inversor\n\t"
-     " xorps   %%xmm1, %%xmm1\n\t"
-     " mov     $0x80000000, %%r9\n\t"
-     " movd    %%r9, %%xmm1\n\t"
-     " shufps  $0x11, %%xmm1, %%xmm1   # b00010001 # 0 -0 0 -0\n\t"
-     " # pfpnacc\n\t"
-     " xorps   %%xmm1, %%xmm6\n\t"
-     " movaps  %%xmm6, %%xmm2\n\t"
-     " unpcklps %%xmm7, %%xmm6\n\t"
-     " unpckhps %%xmm7, %%xmm2\n\t"
-     " movaps  %%xmm2, %%xmm3\n\t"
-     " shufps  $0x44, %%xmm6, %%xmm2   # b01000100\n\t"
-     " shufps  $0xee, %%xmm3, %%xmm6   # b11101110\n\t"
-     " addps   %%xmm2, %%xmm6\n\t"
-     "                                 # xmm6 = r1 i2 r3 i4\n\t"
-     " movhlps %%xmm6, %%xmm4          # xmm4 = r3 i4 ?? ??\n\t"
-     " addps   %%xmm4, %%xmm6          # xmm6 = r1+r3 i2+i4 ?? ??\n\t"
-     " movlps  %%xmm6, (%[rdi])                # store low 2x32 bits (complex) 
to memory\n\t"
-     :
-     :[rsi] "r" (input), [rdx] "r" (taps), "c" (num_bytes), [rdi] "r" (result)
-     :"rax", "r8", "r9", "r10"
-     );
-
-
-  if(isodd) {
-    *result += input[num_points - 1] * taps[num_points - 1];
-  }
-
-  return;
-
-}
-
-#endif
-
-#if LV_HAVE_SSE && LV_HAVE_32
-
-static inline void volk_32fc_x2_dot_prod_32fc_a_sse_32(lv_32fc_t* result, 
const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-
-  volk_32fc_x2_dot_prod_32fc_a_generic(result, input, taps, num_points);
-
-#if 0
-  const unsigned int num_bytes = num_points*8;
-  unsigned int isodd = num_points & 1;
-
-  asm volatile
-    (
-     " #pushl  %%ebp\n\t"
-     " #movl   %%esp, %%ebp\n\t"
-     " movl    12(%%ebp), %%eax                # input\n\t"
-     " movl    16(%%ebp), %%edx                # taps\n\t"
-     " movl    20(%%ebp), %%ecx                # n_bytes\n\t"
-     " xorps   %%xmm6, %%xmm6          # zero accumulators\n\t"
-     " movaps  0(%%eax), %%xmm0\n\t"
-     " xorps   %%xmm7, %%xmm7          # zero accumulators\n\t"
-     " movaps  0(%%edx), %%xmm2\n\t"
-     " shrl    $5, %%ecx               # ecx = n_2_ccomplex_blocks / 2\n\t"
-     " jmp     .%=L1_test\n\t"
-     " # 4 taps / loop\n\t"
-     " # something like ?? cycles / loop\n\t"
-     ".%=Loop1:        \n\t"
-     "# complex prod: C += A * B,  w/ temp Z & Y (or B), 
xmmPN=$0x8000000080000000\n\t"
-     "#        movaps  (%%eax), %%xmmA\n\t"
-     "#        movaps  (%%edx), %%xmmB\n\t"
-     "#        movaps  %%xmmA, %%xmmZ\n\t"
-     "#        shufps  $0xb1, %%xmmZ, %%xmmZ   # swap internals\n\t"
-     "#        mulps   %%xmmB, %%xmmA\n\t"
-     "#        mulps   %%xmmZ, %%xmmB\n\t"
-     "#        # SSE replacement for: pfpnacc %%xmmB, %%xmmA\n\t"
-     "#        xorps   %%xmmPN, %%xmmA\n\t"
-     "#        movaps  %%xmmA, %%xmmZ\n\t"
-     "#        unpcklps %%xmmB, %%xmmA\n\t"
-     "#        unpckhps %%xmmB, %%xmmZ\n\t"
-     "#        movaps  %%xmmZ, %%xmmY\n\t"
-     "#        shufps  $0x44, %%xmmA, %%xmmZ   # b01000100\n\t"
-     "#        shufps  $0xee, %%xmmY, %%xmmA   # b11101110\n\t"
-     "#        addps   %%xmmZ, %%xmmA\n\t"
-     "#        addps   %%xmmA, %%xmmC\n\t"
-     "# A=xmm0, B=xmm2, Z=xmm4\n\t"
-     "# A'=xmm1, B'=xmm3, Z'=xmm5\n\t"
-     " movaps  16(%%eax), %%xmm1\n\t"
-     " movaps  %%xmm0, %%xmm4\n\t"
-     " mulps   %%xmm2, %%xmm0\n\t"
-     " shufps  $0xb1, %%xmm4, %%xmm4   # swap internals\n\t"
-     " movaps  16(%%edx), %%xmm3\n\t"
-     " movaps  %%xmm1, %%xmm5\n\t"
-     " addps   %%xmm0, %%xmm6\n\t"
-     " mulps   %%xmm3, %%xmm1\n\t"
-     " shufps  $0xb1, %%xmm5, %%xmm5   # swap internals\n\t"
-     " addps   %%xmm1, %%xmm6\n\t"
-     " mulps   %%xmm4, %%xmm2\n\t"
-     " movaps  32(%%eax), %%xmm0\n\t"
-     " addps   %%xmm2, %%xmm7\n\t"
-     " mulps   %%xmm5, %%xmm3\n\t"
-     " addl    $32, %%eax\n\t"
-     " movaps  32(%%edx), %%xmm2\n\t"
-     " addps   %%xmm3, %%xmm7\n\t"
-     " addl    $32, %%edx\n\t"
-     ".%=L1_test:\n\t"
-     " decl    %%ecx\n\t"
-     " jge     .%=Loop1\n\t"
-     " # We've handled the bulk of multiplies up to here.\n\t"
-     " # Let's sse if original n_2_ccomplex_blocks was odd.\n\t"
-     " # If so, we've got 2 more taps to do.\n\t"
-     " movl    20(%%ebp), %%ecx                # n_2_ccomplex_blocks\n\t"
-     "  shrl    $4, %%ecx\n\t"
-     " andl    $1, %%ecx\n\t"
-     " je      .%=Leven\n\t"
-     " # The count was odd, do 2 more taps.\n\t"
-     " # Note that we've already got mm0/mm2 preloaded\n\t"
-     " # from the main loop.\n\t"
-     " movaps  %%xmm0, %%xmm4\n\t"
-     " mulps   %%xmm2, %%xmm0\n\t"
-     " shufps  $0xb1, %%xmm4, %%xmm4   # swap internals\n\t"
-     " addps   %%xmm0, %%xmm6\n\t"
-     " mulps   %%xmm4, %%xmm2\n\t"
-     " addps   %%xmm2, %%xmm7\n\t"
-     ".%=Leven:\n\t"
-     " # neg inversor\n\t"
-     "  movl 8(%%ebp), %%eax \n\t"
-     " xorps   %%xmm1, %%xmm1\n\t"
-     "  movl   $0x80000000, (%%eax)\n\t"
-     " movss   (%%eax), %%xmm1\n\t"
-     " shufps  $0x11, %%xmm1, %%xmm1   # b00010001 # 0 -0 0 -0\n\t"
-     " # pfpnacc\n\t"
-     " xorps   %%xmm1, %%xmm6\n\t"
-     " movaps  %%xmm6, %%xmm2\n\t"
-     " unpcklps %%xmm7, %%xmm6\n\t"
-     " unpckhps %%xmm7, %%xmm2\n\t"
-     " movaps  %%xmm2, %%xmm3\n\t"
-     " shufps  $0x44, %%xmm6, %%xmm2   # b01000100\n\t"
-     " shufps  $0xee, %%xmm3, %%xmm6   # b11101110\n\t"
-     " addps   %%xmm2, %%xmm6\n\t"
-     "                                 # xmm6 = r1 i2 r3 i4\n\t"
-     " #movl   8(%%ebp), %%eax         # @result\n\t"
-     " movhlps %%xmm6, %%xmm4          # xmm4 = r3 i4 ?? ??\n\t"
-     " addps   %%xmm4, %%xmm6          # xmm6 = r1+r3 i2+i4 ?? ??\n\t"
-     " movlps  %%xmm6, (%%eax)         # store low 2x32 bits (complex) to 
memory\n\t"
-     " #popl   %%ebp\n\t"
-     :
-     :
-     : "eax", "ecx", "edx"
-     );
-
-
-  int getem = num_bytes % 16;
-
-  if(isodd) {
-    *result += (input[num_points - 1] * taps[num_points - 1]);
-  }
-
-  return;
-#endif
-}
-
-#endif /*LV_HAVE_SSE*/
-
-#ifdef LV_HAVE_SSE3
-
-#include <pmmintrin.h>
-
-static inline void volk_32fc_x2_dot_prod_32fc_a_sse3(lv_32fc_t* result, const 
lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-
-  const unsigned int num_bytes = num_points*8;
-  unsigned int isodd = num_points & 1;
-
-  lv_32fc_t dotProduct;
-  memset(&dotProduct, 0x0, 2*sizeof(float));
-
-  unsigned int number = 0;
-  const unsigned int halfPoints = num_bytes >> 4;
-
-  __m128 x, y, yl, yh, z, tmp1, tmp2, dotProdVal;
-
-  const lv_32fc_t* a = input;
-  const lv_32fc_t* b = taps;
-
-  dotProdVal = _mm_setzero_ps();
-
-  for(;number < halfPoints; number++){
-
-    x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
-    y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
-
-    yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-    yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-
-    tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-
-    x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-
-    tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-
-    z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, 
bi*dr+br*di
-
-    dotProdVal = _mm_add_ps(dotProdVal, z); // Add the complex multiplication 
results together
-
-    a += 2;
-    b += 2;
-  }
-
-  __VOLK_ATTR_ALIGNED(16) lv_32fc_t dotProductVector[2];
-
-  _mm_store_ps((float*)dotProductVector,dotProdVal); // Store the results back 
into the dot product vector
-
-  dotProduct += ( dotProductVector[0] + dotProductVector[1] );
-
-  if(isodd) {
-    dotProduct += input[num_points - 1] * taps[num_points - 1];
-  }
-
-  *result = dotProduct;
-}
-
-#endif /*LV_HAVE_SSE3*/
-
-
-#ifdef LV_HAVE_SSE4_1
-
-#include <smmintrin.h>
-
-static inline void volk_32fc_x2_dot_prod_32fc_a_sse4_1(lv_32fc_t* result, 
const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-
-  unsigned int i = 0;
-  const unsigned int qtr_points = num_points/4;
-  const unsigned int isodd = num_points & 3;
-
-  __m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, real0, real1, im0, 
im1;
-  float *p_input, *p_taps;
-  __m64 *p_result;
-
-  static const __m128i neg = {0x000000000000000080000000};
-
-  p_result = (__m64*)result;
-  p_input = (float*)input;
-  p_taps = (float*)taps;
-
-  real0 = _mm_setzero_ps();
-  real1 = _mm_setzero_ps();
-  im0 = _mm_setzero_ps();
-  im1 = _mm_setzero_ps();
-
-  for(; i < qtr_points; ++i) {
-    xmm0 = _mm_load_ps(p_input);
-    xmm1 = _mm_load_ps(p_taps);
-
-    p_input += 4;
-    p_taps += 4;
-
-    xmm2 = _mm_load_ps(p_input);
-    xmm3 = _mm_load_ps(p_taps);
-
-    p_input += 4;
-    p_taps += 4;
-
-    xmm4 = _mm_unpackhi_ps(xmm0, xmm2);
-    xmm5 = _mm_unpackhi_ps(xmm1, xmm3);
-    xmm0 = _mm_unpacklo_ps(xmm0, xmm2);
-    xmm2 = _mm_unpacklo_ps(xmm1, xmm3);
-
-    //imaginary vector from input
-    xmm1 = _mm_unpackhi_ps(xmm0, xmm4);
-    //real vector from input
-    xmm3 = _mm_unpacklo_ps(xmm0, xmm4);
-    //imaginary vector from taps
-    xmm0 = _mm_unpackhi_ps(xmm2, xmm5);
-    //real vector from taps
-    xmm2 = _mm_unpacklo_ps(xmm2, xmm5);
-
-    xmm4 = _mm_dp_ps(xmm3, xmm2, 0xf1);
-    xmm5 = _mm_dp_ps(xmm1, xmm0, 0xf1);
-
-    xmm6 = _mm_dp_ps(xmm3, xmm0, 0xf2);
-    xmm7 = _mm_dp_ps(xmm1, xmm2, 0xf2);
-
-    real0 = _mm_add_ps(xmm4, real0);
-    real1 = _mm_add_ps(xmm5, real1);
-    im0 = _mm_add_ps(xmm6, im0);
-    im1 = _mm_add_ps(xmm7, im1);
-  }
-
-  real1 = _mm_xor_ps(real1, bit128_p(&neg)->float_vec);
-
-  im0 = _mm_add_ps(im0, im1);
-  real0 = _mm_add_ps(real0, real1);
-
-  im0 = _mm_add_ps(im0, real0);
-
-  _mm_storel_pi(p_result, im0);
-
-  for(i = num_points-isodd; i < num_points; i++) {
-    *result += input[i] * taps[i];
-  }
-}
-
-#endif /*LV_HAVE_SSE4_1*/
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-
-static inline void volk_32fc_x2_dot_prod_32fc_neon(lv_32fc_t* result, const 
lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-
-    unsigned int quarter_points = num_points / 4;
-    unsigned int number;
-
-    lv_32fc_t* a_ptr = (lv_32fc_t*) taps;
-    lv_32fc_t* b_ptr = (lv_32fc_t*) input;
-    // for 2-lane vectors, 1st lane holds the real part,
-    // 2nd lane holds the imaginary part
-    float32x4x2_t a_val, b_val, c_val, accumulator;
-    float32x4x2_t tmp_real, tmp_imag;
-    accumulator.val[0] = vdupq_n_f32(0);
-    accumulator.val[1] = vdupq_n_f32(0);
-
-    for(number = 0; number < quarter_points; ++number) {
-        a_val = vld2q_f32((float*)a_ptr); // a0r|a1r|a2r|a3r || a0i|a1i|a2i|a3i
-        b_val = vld2q_f32((float*)b_ptr); // b0r|b1r|b2r|b3r || b0i|b1i|b2i|b3i
-        __builtin_prefetch(a_ptr+8);
-        __builtin_prefetch(b_ptr+8);
-
-        // multiply the real*real and imag*imag to get real result
-        // a0r*b0r|a1r*b1r|a2r*b2r|a3r*b3r
-        tmp_real.val[0] = vmulq_f32(a_val.val[0], b_val.val[0]);
-        // a0i*b0i|a1i*b1i|a2i*b2i|a3i*b3i
-        tmp_real.val[1] = vmulq_f32(a_val.val[1], b_val.val[1]);
-
-        // Multiply cross terms to get the imaginary result
-        // a0r*b0i|a1r*b1i|a2r*b2i|a3r*b3i
-        tmp_imag.val[0] = vmulq_f32(a_val.val[0], b_val.val[1]);
-        // a0i*b0r|a1i*b1r|a2i*b2r|a3i*b3r
-        tmp_imag.val[1] = vmulq_f32(a_val.val[1], b_val.val[0]);
-
-        c_val.val[0] = vsubq_f32(tmp_real.val[0], tmp_real.val[1]);
-        c_val.val[1] = vaddq_f32(tmp_imag.val[0], tmp_imag.val[1]);
-
-        accumulator.val[0] = vaddq_f32(accumulator.val[0], c_val.val[0]);
-        accumulator.val[1] = vaddq_f32(accumulator.val[1], c_val.val[1]);
-
-        a_ptr += 4;
-        b_ptr += 4;
-    }
-    lv_32fc_t accum_result[4];
-    vst2q_f32((float*)accum_result, accumulator);
-    *result = accum_result[0] + accum_result[1] + accum_result[2] + 
accum_result[3];
-
-    // tail case
-    for(number = quarter_points*4; number < num_points; ++number) {
-        *result += (*a_ptr++) * (*b_ptr++);
-    }
-
-}
-#endif /*LV_HAVE_NEON*/
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-static inline void volk_32fc_x2_dot_prod_32fc_neon_opttests(lv_32fc_t* result, 
const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-
-    unsigned int quarter_points = num_points / 4;
-    unsigned int number;
-
-    lv_32fc_t* a_ptr = (lv_32fc_t*) taps;
-    lv_32fc_t* b_ptr = (lv_32fc_t*) input;
-    // for 2-lane vectors, 1st lane holds the real part,
-    // 2nd lane holds the imaginary part
-    float32x4x2_t a_val, b_val, accumulator;
-    float32x4x2_t tmp_imag;
-    accumulator.val[0] = vdupq_n_f32(0);
-    accumulator.val[1] = vdupq_n_f32(0);
-
-    for(number = 0; number < quarter_points; ++number) {
-        a_val = vld2q_f32((float*)a_ptr); // a0r|a1r|a2r|a3r || a0i|a1i|a2i|a3i
-        b_val = vld2q_f32((float*)b_ptr); // b0r|b1r|b2r|b3r || b0i|b1i|b2i|b3i
-        __builtin_prefetch(a_ptr+8);
-        __builtin_prefetch(b_ptr+8);
-
-        // do the first multiply
-        tmp_imag.val[1] = vmulq_f32(a_val.val[1], b_val.val[0]);
-        tmp_imag.val[0] = vmulq_f32(a_val.val[0], b_val.val[0]);
-
-        // use multiply accumulate/subtract to get result
-        tmp_imag.val[1] = vmlaq_f32(tmp_imag.val[1], a_val.val[0], 
b_val.val[1]);
-        tmp_imag.val[0] = vmlsq_f32(tmp_imag.val[0], a_val.val[1], 
b_val.val[1]);
-
-        accumulator.val[0] = vaddq_f32(accumulator.val[0], tmp_imag.val[0]);
-        accumulator.val[1] = vaddq_f32(accumulator.val[1], tmp_imag.val[1]);
-
-        // increment pointers
-        a_ptr += 4;
-        b_ptr += 4;
-    }
-    lv_32fc_t accum_result[4];
-    vst2q_f32((float*)accum_result, accumulator);
-    *result = accum_result[0] + accum_result[1] + accum_result[2] + 
accum_result[3];
-
-    // tail case
-    for(number = quarter_points*4; number < num_points; ++number) {
-        *result += (*a_ptr++) * (*b_ptr++);
-    }
-
-}
-#endif /*LV_HAVE_NEON*/
-
-#ifdef LV_HAVE_NEON
-static inline void volk_32fc_x2_dot_prod_32fc_neon_optfma(lv_32fc_t* result, 
const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-
-    unsigned int quarter_points = num_points / 4;
-    unsigned int number;
-
-    lv_32fc_t* a_ptr = (lv_32fc_t*) taps;
-    lv_32fc_t* b_ptr = (lv_32fc_t*) input;
-    // for 2-lane vectors, 1st lane holds the real part,
-    // 2nd lane holds the imaginary part
-    float32x4x2_t a_val, b_val, accumulator1, accumulator2;
-    accumulator1.val[0] = vdupq_n_f32(0);
-    accumulator1.val[1] = vdupq_n_f32(0);
-    accumulator2.val[0] = vdupq_n_f32(0);
-    accumulator2.val[1] = vdupq_n_f32(0);
-
-    for(number = 0; number < quarter_points; ++number) {
-        a_val = vld2q_f32((float*)a_ptr); // a0r|a1r|a2r|a3r || a0i|a1i|a2i|a3i
-        b_val = vld2q_f32((float*)b_ptr); // b0r|b1r|b2r|b3r || b0i|b1i|b2i|b3i
-        __builtin_prefetch(a_ptr+8);
-        __builtin_prefetch(b_ptr+8);
-
-        // use 2 accumulators to remove inter-instruction data dependencies
-        accumulator1.val[0] = vmlaq_f32(accumulator1.val[0], a_val.val[0], 
b_val.val[0]);
-        accumulator1.val[1] = vmlaq_f32(accumulator1.val[1], a_val.val[0], 
b_val.val[1]);
-        accumulator2.val[0] = vmlsq_f32(accumulator2.val[0], a_val.val[1], 
b_val.val[1]);
-        accumulator2.val[1] = vmlaq_f32(accumulator2.val[1], a_val.val[1], 
b_val.val[0]);
-        // increment pointers
-        a_ptr += 4;
-        b_ptr += 4;
-    }
-    accumulator1.val[0] = vaddq_f32(accumulator1.val[0], accumulator2.val[0]);
-    accumulator1.val[1] = vaddq_f32(accumulator1.val[1], accumulator2.val[1]);
-    lv_32fc_t accum_result[4];
-    vst2q_f32((float*)accum_result, accumulator1);
-    *result = accum_result[0] + accum_result[1] + accum_result[2] + 
accum_result[3];
-
-    // tail case
-    for(number = quarter_points*4; number < num_points; ++number) {
-        *result += (*a_ptr++) * (*b_ptr++);
-    }
-
-}
-#endif /*LV_HAVE_NEON*/
-
-#ifdef LV_HAVE_NEON
-static inline void volk_32fc_x2_dot_prod_32fc_neon_optfmaunroll(lv_32fc_t* 
result, const lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) 
{
-// NOTE: GCC does a poor job with this kernel, but the euivalent ASM code is 
very fast
-
-    unsigned int quarter_points = num_points / 8;
-    unsigned int number;
-
-    lv_32fc_t* a_ptr = (lv_32fc_t*) taps;
-    lv_32fc_t* b_ptr = (lv_32fc_t*) input;
-    // for 2-lane vectors, 1st lane holds the real part,
-    // 2nd lane holds the imaginary part
-    float32x4x4_t a_val, b_val, accumulator1, accumulator2;
-    float32x4x2_t reduced_accumulator;
-    accumulator1.val[0] = vdupq_n_f32(0);
-    accumulator1.val[1] = vdupq_n_f32(0);
-    accumulator1.val[2] = vdupq_n_f32(0);
-    accumulator1.val[3] = vdupq_n_f32(0);
-    accumulator2.val[0] = vdupq_n_f32(0);
-    accumulator2.val[1] = vdupq_n_f32(0);
-    accumulator2.val[2] = vdupq_n_f32(0);
-    accumulator2.val[3] = vdupq_n_f32(0);
-
-    // 8 input regs, 8 accumulators -> 16/16 neon regs are used
-    for(number = 0; number < quarter_points; ++number) {
-        a_val = vld4q_f32((float*)a_ptr); // a0r|a1r|a2r|a3r || a0i|a1i|a2i|a3i
-        b_val = vld4q_f32((float*)b_ptr); // b0r|b1r|b2r|b3r || b0i|b1i|b2i|b3i
-        __builtin_prefetch(a_ptr+8);
-        __builtin_prefetch(b_ptr+8);
-
-        // use 2 accumulators to remove inter-instruction data dependencies
-        accumulator1.val[0] = vmlaq_f32(accumulator1.val[0], a_val.val[0], 
b_val.val[0]);
-        accumulator1.val[1] = vmlaq_f32(accumulator1.val[1], a_val.val[0], 
b_val.val[1]);
-
-        accumulator1.val[2] = vmlaq_f32(accumulator1.val[2], a_val.val[2], 
b_val.val[2]);
-        accumulator1.val[3] = vmlaq_f32(accumulator1.val[3], a_val.val[2], 
b_val.val[3]);
-
-        accumulator2.val[0] = vmlsq_f32(accumulator2.val[0], a_val.val[1], 
b_val.val[1]);
-        accumulator2.val[1] = vmlaq_f32(accumulator2.val[1], a_val.val[1], 
b_val.val[0]);
-
-        accumulator2.val[2] = vmlsq_f32(accumulator2.val[2], a_val.val[3], 
b_val.val[3]);
-        accumulator2.val[3] = vmlaq_f32(accumulator2.val[3], a_val.val[3], 
b_val.val[2]);
-        // increment pointers
-        a_ptr += 8;
-        b_ptr += 8;
-    }
-    // reduce 8 accumulator lanes down to 2 (1 real and 1 imag)
-    accumulator1.val[0] = vaddq_f32(accumulator1.val[0], accumulator1.val[2]);
-    accumulator1.val[1] = vaddq_f32(accumulator1.val[1], accumulator1.val[3]);
-    accumulator2.val[0] = vaddq_f32(accumulator2.val[0], accumulator2.val[2]);
-    accumulator2.val[1] = vaddq_f32(accumulator2.val[1], accumulator2.val[3]);
-    reduced_accumulator.val[0] = vaddq_f32(accumulator1.val[0], 
accumulator2.val[0]);
-    reduced_accumulator.val[1] = vaddq_f32(accumulator1.val[1], 
accumulator2.val[1]);
-    // now reduce accumulators to scalars
-    lv_32fc_t accum_result[4];
-    vst2q_f32((float*)accum_result, reduced_accumulator);
-    *result = accum_result[0] + accum_result[1] + accum_result[2] + 
accum_result[3];
-
-    // tail case
-    for(number = quarter_points*8; number < num_points; ++number) {
-        *result += (*a_ptr++) * (*b_ptr++);
-    }
-
-}
-#endif /*LV_HAVE_NEON*/
-
-
-#ifdef LV_HAVE_AVX
-
-#include <immintrin.h>
-
-static inline void volk_32fc_x2_dot_prod_32fc_a_avx(lv_32fc_t* result, const 
lv_32fc_t* input, const lv_32fc_t* taps, unsigned int num_points) {
-
-  unsigned int isodd = num_points & 3;
-  unsigned int i = 0;
-  lv_32fc_t dotProduct;
-  memset(&dotProduct, 0x0, 2*sizeof(float));
-
-  unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;
-
-  __m256 x, y, yl, yh, z, tmp1, tmp2, dotProdVal;
-
-  const lv_32fc_t* a = input;
-  const lv_32fc_t* b = taps;
-
-  dotProdVal = _mm256_setzero_ps();
-
-  for(;number < quarterPoints; number++){
-
-    x = _mm256_load_ps((float*)a); // Load a,b,e,f as ar,ai,br,bi,er,ei,fr,fi
-    y = _mm256_load_ps((float*)b); // Load c,d,g,h as cr,ci,dr,di,gr,gi,hr,hi
-
-    yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr,gr,gr,hr,hr
-    yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di,gi,gi,hi,hi
-
-    tmp1 = _mm256_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr ...
-
-    x = _mm256_shuffle_ps(x,x,0xB1); // Re-arrange x to be 
ai,ar,bi,br,ei,er,fi,fr
-
-    tmp2 = _mm256_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di ...
-
-    z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, 
bi*dr+br*di
-
-    dotProdVal = _mm256_add_ps(dotProdVal, z); // Add the complex 
multiplication results together
-
-    a += 4;
-    b += 4;
-  }
-
-  __VOLK_ATTR_ALIGNED(32) lv_32fc_t dotProductVector[4];
-
-  _mm256_store_ps((float*)dotProductVector,dotProdVal); // Store the results 
back into the dot product vector
-
-  dotProduct += ( dotProductVector[0] + dotProductVector[1] + 
dotProductVector[2] + dotProductVector[3]);
-
-  for(i = num_points-isodd; i < num_points; i++) {
-    dotProduct += input[i] * taps[i];
-  }
-
-  *result = dotProduct;
-}
-
-#endif /*LV_HAVE_AVX*/
-
-#endif /*INCLUDED_volk_32fc_x2_dot_prod_32fc_a_H*/
diff --git a/volk/kernels/volk/volk_32fc_x2_multiply_32fc.h 
b/volk/kernels/volk/volk_32fc_x2_multiply_32fc.h
deleted file mode 100644
index 4575439..0000000
--- a/volk/kernels/volk/volk_32fc_x2_multiply_32fc.h
+++ /dev/null
@@ -1,401 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_x2_multiply_32fc_u_H
-#define INCLUDED_volk_32fc_x2_multiply_32fc_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <volk/volk_complex.h>
-#include <float.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-  /*!
-    \brief Multiplies the two input complex vectors and stores their results 
in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector One of the vectors to be multiplied
-    \param bVector One of the vectors to be multiplied
-    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
-  */
-static inline void volk_32fc_x2_multiply_32fc_u_avx(lv_32fc_t* cVector, const 
lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
-  unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    __m256 x, y, yl, yh, z, tmp1, tmp2;
-    lv_32fc_t* c = cVector;
-    const lv_32fc_t* a = aVector;
-    const lv_32fc_t* b = bVector;
-
-    for(;number < quarterPoints; number++){
-
-      x = _mm256_loadu_ps((float*)a); // Load the ar + ai, br + bi ... as 
ar,ai,br,bi ...
-      y = _mm256_loadu_ps((float*)b); // Load the cr + ci, dr + di ... as 
cr,ci,dr,di ...
-
-      yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr ...
-      yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di ...
-
-      tmp1 = _mm256_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr ...
-
-      x = _mm256_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br ...
-
-      tmp2 = _mm256_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-
-      z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, 
br*dr-bi*di, bi*dr+br*di
-
-      _mm256_storeu_ps((float*)c,z); // Store the results back into the C 
container
-
-      a += 4;
-      b += 4;
-      c += 4;
-    }
-
-    number = quarterPoints * 4;
-
-    for(; number < num_points; number++) {
-      *c++ = (*a++) * (*b++);
-    }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE3
-#include <pmmintrin.h>
-  /*!
-    \brief Multiplies the two input complex vectors and stores their results 
in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector One of the vectors to be multiplied
-    \param bVector One of the vectors to be multiplied
-    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
-  */
-static inline void volk_32fc_x2_multiply_32fc_u_sse3(lv_32fc_t* cVector, const 
lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
-  unsigned int number = 0;
-    const unsigned int halfPoints = num_points / 2;
-
-    __m128 x, y, yl, yh, z, tmp1, tmp2;
-    lv_32fc_t* c = cVector;
-    const lv_32fc_t* a = aVector;
-    const lv_32fc_t* b = bVector;
-
-    for(;number < halfPoints; number++){
-
-      x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
-      y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
-
-      yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-      yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-
-      tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-
-      x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-
-      tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-
-      z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, 
bi*dr+br*di
-
-      _mm_storeu_ps((float*)c,z); // Store the results back into the C 
container
-
-      a += 2;
-      b += 2;
-      c += 2;
-    }
-
-    if((num_points % 2) != 0) {
-      *c = (*a) * (*b);
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Multiplies the two input complex vectors and stores their results 
in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector One of the vectors to be multiplied
-    \param bVector One of the vectors to be multiplied
-    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
-  */
-static inline void volk_32fc_x2_multiply_32fc_generic(lv_32fc_t* cVector, 
const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
-    lv_32fc_t* cPtr = cVector;
-    const lv_32fc_t* aPtr = aVector;
-    const lv_32fc_t* bPtr=  bVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      *cPtr++ = (*aPtr++) * (*bPtr++);
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-#endif /* INCLUDED_volk_32fc_x2_multiply_32fc_u_H */
-#ifndef INCLUDED_volk_32fc_x2_multiply_32fc_a_H
-#define INCLUDED_volk_32fc_x2_multiply_32fc_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <volk/volk_complex.h>
-#include <float.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-  /*!
-    \brief Multiplies the two input complex vectors and stores their results 
in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector One of the vectors to be multiplied
-    \param bVector One of the vectors to be multiplied
-    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
-  */
-static inline void volk_32fc_x2_multiply_32fc_a_avx(lv_32fc_t* cVector, const 
lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    __m256 x, y, yl, yh, z, tmp1, tmp2;
-    lv_32fc_t* c = cVector;
-    const lv_32fc_t* a = aVector;
-    const lv_32fc_t* b = bVector;
-
-    for(;number < quarterPoints; number++){
-
-      x = _mm256_load_ps((float*)a); // Load the ar + ai, br + bi ... as 
ar,ai,br,bi ...
-      y = _mm256_load_ps((float*)b); // Load the cr + ci, dr + di ... as 
cr,ci,dr,di ...
-
-      yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr ...
-      yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di ...
-
-      tmp1 = _mm256_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr ...
-
-      x = _mm256_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br ...
-
-      tmp2 = _mm256_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-
-      z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, 
br*dr-bi*di, bi*dr+br*di
-
-      _mm256_store_ps((float*)c,z); // Store the results back into the C 
container
-
-      a += 4;
-      b += 4;
-      c += 4;
-    }
-
-    number = quarterPoints * 4;
-
-    for(; number < num_points; number++) {
-      *c++ = (*a++) * (*b++);
-    }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE3
-#include <pmmintrin.h>
-  /*!
-    \brief Multiplies the two input complex vectors and stores their results 
in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector One of the vectors to be multiplied
-    \param bVector One of the vectors to be multiplied
-    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
-  */
-static inline void volk_32fc_x2_multiply_32fc_a_sse3(lv_32fc_t* cVector, const 
lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
-  unsigned int number = 0;
-    const unsigned int halfPoints = num_points / 2;
-
-    __m128 x, y, yl, yh, z, tmp1, tmp2;
-    lv_32fc_t* c = cVector;
-    const lv_32fc_t* a = aVector;
-    const lv_32fc_t* b = bVector;
-    for(;number < halfPoints; number++){
-
-      x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
-      y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
-
-      yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-      yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-
-      tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-
-      x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-
-      tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-
-      z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, 
bi*dr+br*di
-
-      _mm_store_ps((float*)c,z); // Store the results back into the C container
-
-      a += 2;
-      b += 2;
-      c += 2;
-    }
-
-    if((num_points % 2) != 0) {
-      *c = (*a) * (*b);
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Multiplies the two input complex vectors and stores their results 
in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector One of the vectors to be multiplied
-    \param bVector One of the vectors to be multiplied
-    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
-  */
-static inline void volk_32fc_x2_multiply_32fc_a_generic(lv_32fc_t* cVector, 
const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
-    lv_32fc_t* cPtr = cVector;
-    const lv_32fc_t* aPtr = aVector;
-    const lv_32fc_t* bPtr=  bVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      *cPtr++ = (*aPtr++) * (*bPtr++);
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-
-  /*!
-    \brief Multiplies the two input complex vectors and stores their results 
in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector One of the vectors to be multiplied
-    \param bVector One of the vectors to be multiplied
-    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
-  */
-static inline void volk_32fc_x2_multiply_32fc_neon(lv_32fc_t* cVector, const 
lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
-
-    lv_32fc_t *a_ptr = (lv_32fc_t*) aVector;
-    lv_32fc_t *b_ptr = (lv_32fc_t*) bVector;
-    unsigned int quarter_points = num_points / 4;
-    float32x4x2_t a_val, b_val, c_val;
-    float32x4x2_t tmp_real, tmp_imag;
-    unsigned int number = 0;
-
-    for(number = 0; number < quarter_points; ++number) {
-        a_val = vld2q_f32((float*)a_ptr); // a0r|a1r|a2r|a3r || a0i|a1i|a2i|a3i
-        b_val = vld2q_f32((float*)b_ptr); // b0r|b1r|b2r|b3r || b0i|b1i|b2i|b3i
-        __builtin_prefetch(a_ptr+4);
-        __builtin_prefetch(b_ptr+4);
-
-        // multiply the real*real and imag*imag to get real result
-        // a0r*b0r|a1r*b1r|a2r*b2r|a3r*b3r
-        tmp_real.val[0] = vmulq_f32(a_val.val[0], b_val.val[0]);
-        // a0i*b0i|a1i*b1i|a2i*b2i|a3i*b3i
-        tmp_real.val[1] = vmulq_f32(a_val.val[1], b_val.val[1]);
-
-        // Multiply cross terms to get the imaginary result
-        // a0r*b0i|a1r*b1i|a2r*b2i|a3r*b3i
-        tmp_imag.val[0] = vmulq_f32(a_val.val[0], b_val.val[1]);
-        // a0i*b0r|a1i*b1r|a2i*b2r|a3i*b3r
-        tmp_imag.val[1] = vmulq_f32(a_val.val[1], b_val.val[0]);
-
-        // store the results
-        c_val.val[0] = vsubq_f32(tmp_real.val[0], tmp_real.val[1]);
-        c_val.val[1] = vaddq_f32(tmp_imag.val[0], tmp_imag.val[1]);
-        vst2q_f32((float*)cVector, c_val);
-
-        a_ptr += 4;
-        b_ptr += 4;
-        cVector += 4;
-    }
-
-    for(number = quarter_points*4; number < num_points; number++){
-      *cVector++ = (*a_ptr++) * (*b_ptr++);
-    }
-
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_NEON
-  /*!
-    \brief Multiplies the two input complex vectors and stores their results 
in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector One of the vectors to be multiplied
-    \param bVector One of the vectors to be multiplied
-    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
-  */
-static inline void volk_32fc_x2_multiply_32fc_neon_opttests(lv_32fc_t* 
cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int 
num_points){
-
-    lv_32fc_t *a_ptr = (lv_32fc_t*) aVector;
-    lv_32fc_t *b_ptr = (lv_32fc_t*) bVector;
-    unsigned int quarter_points = num_points / 4;
-    float32x4x2_t a_val, b_val;
-    float32x4x2_t tmp_imag;
-    unsigned int number = 0;
-
-    for(number = 0; number < quarter_points; ++number) {
-        a_val = vld2q_f32((float*)a_ptr); // a0r|a1r|a2r|a3r || a0i|a1i|a2i|a3i
-        b_val = vld2q_f32((float*)b_ptr); // b0r|b1r|b2r|b3r || b0i|b1i|b2i|b3i
-        __builtin_prefetch(a_ptr+4);
-        __builtin_prefetch(b_ptr+4);
-
-        // do the first multiply
-        tmp_imag.val[1] = vmulq_f32(a_val.val[1], b_val.val[0]);
-        tmp_imag.val[0] = vmulq_f32(a_val.val[0], b_val.val[0]);
-
-        // use multiply accumulate/subtract to get result
-        tmp_imag.val[1] = vmlaq_f32(tmp_imag.val[1], a_val.val[0], 
b_val.val[1]);
-        tmp_imag.val[0] = vmlsq_f32(tmp_imag.val[0], a_val.val[1], 
b_val.val[1]);
-
-        // store
-        vst2q_f32((float*)cVector, tmp_imag);
-        // increment pointers
-        a_ptr += 4;
-        b_ptr += 4;
-        cVector += 4;
-    }
-
-    for(number = quarter_points*4; number < num_points; number++){
-      *cVector++ = (*a_ptr++) * (*b_ptr++);
-    }
-
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_NEON
-  /*!
-    \brief Multiplies the two input complex vectors and stores their results 
in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector One of the vectors to be multiplied
-    \param bVector One of the vectors to be multiplied
-    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
-  */
-extern void volk_32fc_x2_multiply_32fc_neonasm(lv_32fc_t* cVector, const 
lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points);
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_ORC
-  /*!
-    \brief Multiplies the two input complex vectors and stores their results 
in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector One of the vectors to be multiplied
-    \param bVector One of the vectors to be multiplied
-    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
-  */
-extern void volk_32fc_x2_multiply_32fc_a_orc_impl(lv_32fc_t* cVector, const 
lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points);
-static inline void volk_32fc_x2_multiply_32fc_u_orc(lv_32fc_t* cVector, const 
lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
-    volk_32fc_x2_multiply_32fc_a_orc_impl(cVector, aVector, bVector, 
num_points);
-}
-#endif /* LV_HAVE_ORC */
-
-
-
-
-
-#endif /* INCLUDED_volk_32fc_x2_multiply_32fc_a_H */
diff --git a/volk/kernels/volk/volk_32fc_x2_multiply_conjugate_32fc.h 
b/volk/kernels/volk/volk_32fc_x2_multiply_conjugate_32fc.h
deleted file mode 100644
index c9b3bbc..0000000
--- a/volk/kernels/volk/volk_32fc_x2_multiply_conjugate_32fc.h
+++ /dev/null
@@ -1,343 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H
-#define INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <volk/volk_complex.h>
-#include <float.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-  /*!
-    \brief Multiplies vector a by the conjugate of vector b and stores the 
results in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector First vector to be multiplied
-    \param bVector Second vector that is conjugated before being multiplied
-    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
-  */
-static inline void volk_32fc_x2_multiply_conjugate_32fc_u_avx(lv_32fc_t* 
cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int 
num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    __m256 x, y, yl, yh, z, tmp1, tmp2;
-    lv_32fc_t* c = cVector;
-    const lv_32fc_t* a = aVector;
-    const lv_32fc_t* b = bVector;
-
-    __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
-
-    for(;number < quarterPoints; number++){
-
-      x = _mm256_loadu_ps((float*)a); // Load the ar + ai, br + bi ... as 
ar,ai,br,bi ...
-      y = _mm256_loadu_ps((float*)b); // Load the cr + ci, dr + di ... as 
cr,ci,dr,di ...
-
-      y = _mm256_xor_ps(y, conjugator); // conjugate y
-
-      yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr ...
-      yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di ...
-
-      tmp1 = _mm256_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr ...
-
-      x = _mm256_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br ...
-
-      tmp2 = _mm256_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di ...
-
-      z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, 
br*dr-bi*di, bi*dr+br*di ...
-
-      _mm256_storeu_ps((float*)c,z); // Store the results back into the C 
container
-
-      a += 4;
-      b += 4;
-      c += 4;
-    }
-
-    number = quarterPoints * 4;
-
-    for(; number < num_points; number++) {
-      *c++ = (*a++) * lv_conj(*b++);
-    }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE3
-#include <pmmintrin.h>
-  /*!
-    \brief Multiplies vector a by the conjugate of vector b and stores the 
results in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector First vector to be multiplied
-    \param bVector Second vector that is conjugated before being multiplied
-    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
-  */
-static inline void volk_32fc_x2_multiply_conjugate_32fc_u_sse3(lv_32fc_t* 
cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int 
num_points){
-  unsigned int number = 0;
-    const unsigned int halfPoints = num_points / 2;
-
-    __m128 x, y, yl, yh, z, tmp1, tmp2;
-    lv_32fc_t* c = cVector;
-    const lv_32fc_t* a = aVector;
-    const lv_32fc_t* b = bVector;
-
-    __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
-
-    for(;number < halfPoints; number++){
-
-      x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
-      y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
-
-      y = _mm_xor_ps(y, conjugator); // conjugate y
-
-      yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-      yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-
-      tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-
-      x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-
-      tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-
-      z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, 
bi*dr+br*di
-
-      _mm_storeu_ps((float*)c,z); // Store the results back into the C 
container
-
-      a += 2;
-      b += 2;
-      c += 2;
-    }
-
-    if((num_points % 2) != 0) {
-      *c = (*a) * lv_conj(*b);
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Multiplies vector a by the conjugate of vector b and stores the 
results in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector First vector to be multiplied
-    \param bVector Second vector that is conjugated before being multiplied
-    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
-  */
-static inline void volk_32fc_x2_multiply_conjugate_32fc_generic(lv_32fc_t* 
cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int 
num_points){
-    lv_32fc_t* cPtr = cVector;
-    const lv_32fc_t* aPtr = aVector;
-    const lv_32fc_t* bPtr=  bVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      *cPtr++ = (*aPtr++) * lv_conj(*bPtr++);
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-#endif /* INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H */
-#ifndef INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H
-#define INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <volk/volk_complex.h>
-#include <float.h>
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-  /*!
-    \brief Multiplies vector a by the conjugate of vector b and stores the 
results in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector First vector to be multiplied
-    \param bVector Second vector that is conjugated before being multiplied
-    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
-  */
-static inline void volk_32fc_x2_multiply_conjugate_32fc_a_avx(lv_32fc_t* 
cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int 
num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    __m256 x, y, yl, yh, z, tmp1, tmp2;
-    lv_32fc_t* c = cVector;
-    const lv_32fc_t* a = aVector;
-    const lv_32fc_t* b = bVector;
-
-    __m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
-
-    for(;number < quarterPoints; number++){
-
-      x = _mm256_load_ps((float*)a); // Load the ar + ai, br + bi ... as 
ar,ai,br,bi ...
-      y = _mm256_load_ps((float*)b); // Load the cr + ci, dr + di ... as 
cr,ci,dr,di ...
-
-      y = _mm256_xor_ps(y, conjugator); // conjugate y
-
-      yl = _mm256_moveldup_ps(y); // Load yl with cr,cr,dr,dr ...
-      yh = _mm256_movehdup_ps(y); // Load yh with ci,ci,di,di ...
-
-      tmp1 = _mm256_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr ...
-
-      x = _mm256_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br ...
-
-      tmp2 = _mm256_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di ...
-
-      z = _mm256_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, 
br*dr-bi*di, bi*dr+br*di ...
-
-      _mm256_store_ps((float*)c,z); // Store the results back into the C 
container
-
-      a += 4;
-      b += 4;
-      c += 4;
-    }
-
-    number = quarterPoints * 4;
-
-    for(; number < num_points; number++) {
-      *c++ = (*a++) * lv_conj(*b++);
-    }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE3
-#include <pmmintrin.h>
-  /*!
-    \brief Multiplies vector a by the conjugate of vector b and stores the 
results in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector First vector to be multiplied
-    \param bVector Second vector that is conjugated before being multiplied
-    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
-  */
-static inline void volk_32fc_x2_multiply_conjugate_32fc_a_sse3(lv_32fc_t* 
cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int 
num_points){
-  unsigned int number = 0;
-    const unsigned int halfPoints = num_points / 2;
-
-    __m128 x, y, yl, yh, z, tmp1, tmp2;
-    lv_32fc_t* c = cVector;
-    const lv_32fc_t* a = aVector;
-    const lv_32fc_t* b = bVector;
-
-    __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
-
-    for(;number < halfPoints; number++){
-
-      x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
-      y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
-
-      y = _mm_xor_ps(y, conjugator); // conjugate y
-
-      yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
-      yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
-
-      tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
-
-      x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
-
-      tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
-
-      z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, 
bi*dr+br*di
-
-      _mm_store_ps((float*)c,z); // Store the results back into the C container
-
-      a += 2;
-      b += 2;
-      c += 2;
-    }
-
-    if((num_points % 2) != 0) {
-      *c = (*a) * lv_conj(*b);
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-  /*!
-    \brief Multiplies vector a by the conjugate of vector b and stores the 
results in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector First vector to be multiplied
-    \param bVector Second vector that is conjugated before being multiplied
-    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
-  */
-static inline void volk_32fc_x2_multiply_conjugate_32fc_neon(lv_32fc_t* 
cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int 
num_points){
-
-    lv_32fc_t *a_ptr = (lv_32fc_t*) aVector;
-    lv_32fc_t *b_ptr = (lv_32fc_t*) bVector;
-    unsigned int quarter_points = num_points / 4;
-    float32x4x2_t a_val, b_val, c_val;
-    float32x4x2_t tmp_real, tmp_imag;
-    unsigned int number = 0;
-
-    for(number = 0; number < quarter_points; ++number) {
-        a_val = vld2q_f32((float*)a_ptr); // a0r|a1r|a2r|a3r || a0i|a1i|a2i|a3i
-        b_val = vld2q_f32((float*)b_ptr); // b0r|b1r|b2r|b3r || b0i|b1i|b2i|b3i
-        b_val.val[1] = vnegq_f32(b_val.val[1]);
-        __builtin_prefetch(a_ptr+4);
-        __builtin_prefetch(b_ptr+4);
-
-        // multiply the real*real and imag*imag to get real result
-        // a0r*b0r|a1r*b1r|a2r*b2r|a3r*b3r
-        tmp_real.val[0] = vmulq_f32(a_val.val[0], b_val.val[0]);
-        // a0i*b0i|a1i*b1i|a2i*b2i|a3i*b3i
-        tmp_real.val[1] = vmulq_f32(a_val.val[1], b_val.val[1]);
-
-        // Multiply cross terms to get the imaginary result
-        // a0r*b0i|a1r*b1i|a2r*b2i|a3r*b3i
-        tmp_imag.val[0] = vmulq_f32(a_val.val[0], b_val.val[1]);
-        // a0i*b0r|a1i*b1r|a2i*b2r|a3i*b3r
-        tmp_imag.val[1] = vmulq_f32(a_val.val[1], b_val.val[0]);
-
-        // store the results
-        c_val.val[0] = vsubq_f32(tmp_real.val[0], tmp_real.val[1]);
-        c_val.val[1] = vaddq_f32(tmp_imag.val[0], tmp_imag.val[1]);
-        vst2q_f32((float*)cVector, c_val);
-
-        a_ptr += 4;
-        b_ptr += 4;
-        cVector += 4;
-    }
-
-    for(number = quarter_points*4; number < num_points; number++){
-      *cVector++ = (*a_ptr++) * conj(*b_ptr++);
-    }
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Multiplies vector a by the conjugate of vector b and stores the 
results in the third vector
-    \param cVector The vector where the results will be stored
-    \param aVector First vector to be multiplied
-    \param bVector Second vector that is conjugated before being multiplied
-    \param num_points The number of complex values in aVector and bVector to 
be multiplied together and stored into cVector
-  */
-static inline void volk_32fc_x2_multiply_conjugate_32fc_a_generic(lv_32fc_t* 
cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int 
num_points){
-    lv_32fc_t* cPtr = cVector;
-    const lv_32fc_t* aPtr = aVector;
-    const lv_32fc_t* bPtr=  bVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      *cPtr++ = (*aPtr++) * lv_conj(*bPtr++);
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-#endif /* INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H */
diff --git a/volk/kernels/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f.h 
b/volk/kernels/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f.h
deleted file mode 100644
index a25a686..0000000
--- a/volk/kernels/volk/volk_32fc_x2_s32f_square_dist_scalar_mult_32f.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_H
-#define INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_H
-
-#include<inttypes.h>
-#include<stdio.h>
-#include<volk/volk_complex.h>
-#include <string.h>
-
-#ifdef LV_HAVE_SSE3
-#include<xmmintrin.h>
-#include<pmmintrin.h>
-
-static inline void volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_sse3(float* 
target, lv_32fc_t* src0, lv_32fc_t* points, float scalar, unsigned int 
num_points) {
-
-  const unsigned int num_bytes = num_points*8;
-
-  __m128 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
-
-  lv_32fc_t diff;
-  memset(&diff, 0x0, 2*sizeof(float));
-
-  float sq_dist = 0.0;
-  int bound = num_bytes >> 5;
-  int leftovers0 = (num_bytes >> 4) & 1;
-  int leftovers1 = (num_bytes >> 3) & 1;
-  int i = 0;
-
-
-
-  xmm1 = _mm_setzero_ps();
-  xmm1 = _mm_loadl_pi(xmm1, (__m64*)src0);
-  xmm2 = _mm_load_ps((float*)&points[0]);
-  xmm8 = _mm_load1_ps(&scalar);
-  xmm1 = _mm_movelh_ps(xmm1, xmm1);
-  xmm3 = _mm_load_ps((float*)&points[2]);
-
-
-  for(; i < bound - 1; ++i) {
-
-    xmm4 = _mm_sub_ps(xmm1, xmm2);
-    xmm5 = _mm_sub_ps(xmm1, xmm3);
-    points += 4;
-    xmm6 = _mm_mul_ps(xmm4, xmm4);
-    xmm7 = _mm_mul_ps(xmm5, xmm5);
-
-    xmm2 = _mm_load_ps((float*)&points[0]);
-
-    xmm4 = _mm_hadd_ps(xmm6, xmm7);
-
-    xmm3 = _mm_load_ps((float*)&points[2]);
-
-    xmm4 = _mm_mul_ps(xmm4, xmm8);
-
-    _mm_store_ps(target, xmm4);
-
-    target += 4;
-
-  }
-
-  xmm4 = _mm_sub_ps(xmm1, xmm2);
-  xmm5 = _mm_sub_ps(xmm1, xmm3);
-
-
-
-  points += 4;
-  xmm6 = _mm_mul_ps(xmm4, xmm4);
-  xmm7 = _mm_mul_ps(xmm5, xmm5);
-
-  xmm4 = _mm_hadd_ps(xmm6, xmm7);
-
-  xmm4 = _mm_mul_ps(xmm4, xmm8);
-
-  _mm_store_ps(target, xmm4);
-
-  target += 4;
-
-
-  for(i = 0; i < leftovers0; ++i) {
-
-    xmm2 = _mm_load_ps((float*)&points[0]);
-
-    xmm4 = _mm_sub_ps(xmm1, xmm2);
-
-    points += 2;
-
-    xmm6 = _mm_mul_ps(xmm4, xmm4);
-
-    xmm4 = _mm_hadd_ps(xmm6, xmm6);
-
-    xmm4 = _mm_mul_ps(xmm4, xmm8);
-
-    _mm_storeh_pi((__m64*)target, xmm4);
-
-    target += 2;
-  }
-
-  for(i = 0; i < leftovers1; ++i) {
-
-    diff = src0[0] - points[0];
-
-    sq_dist = scalar * (lv_creal(diff) * lv_creal(diff) + lv_cimag(diff) * 
lv_cimag(diff));
-
-    target[0] = sq_dist;
-  }
-}
-
-#endif /*LV_HAVE_SSE3*/
-
-#ifdef LV_HAVE_GENERIC
-static inline void 
volk_32fc_x2_s32f_square_dist_scalar_mult_32f_generic(float* target, lv_32fc_t* 
src0, lv_32fc_t* points, float scalar, unsigned int num_points) {
-
-  const unsigned int num_bytes = num_points*8;
-
-  lv_32fc_t diff;
-  float sq_dist;
-  unsigned int i = 0;
-
-  for(; i < num_bytes >> 3; ++i) {
-    diff = src0[0] - points[i];
-
-    sq_dist = scalar * (lv_creal(diff) * lv_creal(diff) + lv_cimag(diff) * 
lv_cimag(diff));
-
-    target[i] = sq_dist;
-  }
-}
-
-#endif /*LV_HAVE_GENERIC*/
-
-
-#endif /*INCLUDED_volk_32fc_x2_s32f_square_dist_scalar_mult_32f_a_H*/
diff --git a/volk/kernels/volk/volk_32fc_x2_square_dist_32f.h 
b/volk/kernels/volk/volk_32fc_x2_square_dist_32f.h
deleted file mode 100644
index 2984bea..0000000
--- a/volk/kernels/volk/volk_32fc_x2_square_dist_32f.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32fc_x2_square_dist_32f_a_H
-#define INCLUDED_volk_32fc_x2_square_dist_32f_a_H
-
-#include<inttypes.h>
-#include<stdio.h>
-#include<volk/volk_complex.h>
-
-#ifdef LV_HAVE_SSE3
-#include<xmmintrin.h>
-#include<pmmintrin.h>
-
-static inline void volk_32fc_x2_square_dist_32f_a_sse3(float* target, 
lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_points) {
-
-  const unsigned int num_bytes = num_points*8;
-
-  __m128 xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
-
-  lv_32fc_t diff;
-  float sq_dist;
-  int bound = num_bytes >> 5;
-  int leftovers0 = (num_bytes >> 4) & 1;
-  int leftovers1 = (num_bytes >> 3) & 1;
-  int i = 0;
-
-  xmm1 = _mm_setzero_ps();
-  xmm1 = _mm_loadl_pi(xmm1, (__m64*)src0);
-  xmm2 = _mm_load_ps((float*)&points[0]);
-  xmm1 = _mm_movelh_ps(xmm1, xmm1);
-  xmm3 = _mm_load_ps((float*)&points[2]);
-
-
-  for(; i < bound - 1; ++i) {
-    xmm4 = _mm_sub_ps(xmm1, xmm2);
-    xmm5 = _mm_sub_ps(xmm1, xmm3);
-    points += 4;
-    xmm6 = _mm_mul_ps(xmm4, xmm4);
-    xmm7 = _mm_mul_ps(xmm5, xmm5);
-
-    xmm2 = _mm_load_ps((float*)&points[0]);
-
-    xmm4 = _mm_hadd_ps(xmm6, xmm7);
-
-    xmm3 = _mm_load_ps((float*)&points[2]);
-
-    _mm_store_ps(target, xmm4);
-
-    target += 4;
-
-  }
-
-  xmm4 = _mm_sub_ps(xmm1, xmm2);
-  xmm5 = _mm_sub_ps(xmm1, xmm3);
-
-
-
-  points += 4;
-  xmm6 = _mm_mul_ps(xmm4, xmm4);
-  xmm7 = _mm_mul_ps(xmm5, xmm5);
-
-  xmm4 = _mm_hadd_ps(xmm6, xmm7);
-
-  _mm_store_ps(target, xmm4);
-
-  target += 4;
-
-  for(i = 0; i < leftovers0; ++i) {
-
-    xmm2 = _mm_load_ps((float*)&points[0]);
-
-    xmm4 = _mm_sub_ps(xmm1, xmm2);
-
-    points += 2;
-
-    xmm6 = _mm_mul_ps(xmm4, xmm4);
-
-    xmm4 = _mm_hadd_ps(xmm6, xmm6);
-
-    _mm_storeh_pi((__m64*)target, xmm4);
-
-    target += 2;
-  }
-
-  for(i = 0; i < leftovers1; ++i) {
-
-    diff = src0[0] - points[0];
-
-    sq_dist = lv_creal(diff) * lv_creal(diff) + lv_cimag(diff) * 
lv_cimag(diff);
-
-    target[0] = sq_dist;
-  }
-}
-
-#endif /*LV_HAVE_SSE3*/
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-static inline void volk_32fc_x2_square_dist_32f_neon(float* target, lv_32fc_t* 
src0, lv_32fc_t* points, unsigned int num_points) {
-    const unsigned int quarter_points = num_points / 4;
-    unsigned int number;
-
-    float32x4x2_t a_vec, b_vec;
-    float32x4x2_t diff_vec;
-    float32x4_t tmp, tmp1, dist_sq;
-    a_vec.val[0] = vdupq_n_f32( lv_creal(src0[0]) );
-    a_vec.val[1] = vdupq_n_f32( lv_cimag(src0[0]) );
-    for(number=0; number < quarter_points; ++number) {
-        b_vec = vld2q_f32((float*)points);
-        diff_vec.val[0] = vsubq_f32(a_vec.val[0], b_vec.val[0]);
-        diff_vec.val[1] = vsubq_f32(a_vec.val[1], b_vec.val[1]);
-        tmp = vmulq_f32(diff_vec.val[0], diff_vec.val[0]);
-        tmp1 = vmulq_f32(diff_vec.val[1], diff_vec.val[1]);
-
-        dist_sq = vaddq_f32(tmp, tmp1);
-        vst1q_f32(target, dist_sq);
-        points += 4;
-        target += 4;
-    }
-    for(number=quarter_points*4; number < num_points; ++number) {
-        lv_32fc_t diff = src0[0] - *points++;
-        *target++ = lv_creal(diff) * lv_creal(diff) + lv_cimag(diff) * 
lv_cimag(diff);
-    }
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_GENERIC
-static inline void volk_32fc_x2_square_dist_32f_generic(float* target, 
lv_32fc_t* src0, lv_32fc_t* points, unsigned int num_points) {
-
-  const unsigned int num_bytes = num_points*8;
-
-  lv_32fc_t diff;
-  float sq_dist;
-  unsigned int i = 0;
-
-  for(; i < num_bytes >> 3; ++i) {
-    diff = src0[0] - points[i];
-
-    sq_dist = lv_creal(diff) * lv_creal(diff) + lv_cimag(diff) * 
lv_cimag(diff);
-
-    target[i] = sq_dist;
-  }
-}
-
-#endif /*LV_HAVE_GENERIC*/
-
-
-#endif /*INCLUDED_volk_32fc_x2_square_dist_32f_a_H*/
diff --git a/volk/kernels/volk/volk_32i_s32f_convert_32f.h 
b/volk/kernels/volk/volk_32i_s32f_convert_32f.h
deleted file mode 100644
index dd965bc..0000000
--- a/volk/kernels/volk/volk_32i_s32f_convert_32f.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32i_s32f_convert_32f_u_H
-#define INCLUDED_volk_32i_s32f_convert_32f_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-
-  /*!
-    \brief Converts the input 32 bit integer data into floating point data, 
and divides the each floating point output data point by the scalar value
-    \param inputVector The 32 bit input data buffer
-    \param outputVector The floating point output data buffer
-    \param scalar The value divided against each point in the output buffer
-    \param num_points The number of data values to be converted
-    \note Output buffer does NOT need to be properly aligned
-  */
-static inline void volk_32i_s32f_convert_32f_u_sse2(float* outputVector, const 
int32_t* inputVector, const float scalar, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-     float* outputVectorPtr = outputVector;
-     const float iScalar = 1.0 / scalar;
-    __m128 invScalar = _mm_set_ps1(iScalar);
-    int32_t* inputPtr = (int32_t*)inputVector;
-    __m128i inputVal;
-    __m128 ret;
-
-    for(;number < quarterPoints; number++){
-
-      // Load the 4 values
-      inputVal = _mm_loadu_si128((__m128i*)inputPtr);
-
-      ret = _mm_cvtepi32_ps(inputVal);
-      ret = _mm_mul_ps(ret, invScalar);
-
-      _mm_storeu_ps(outputVectorPtr, ret);
-
-      outputVectorPtr += 4;
-      inputPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(; number < num_points; number++){
-      outputVector[number] =((float)(inputVector[number])) * iScalar;
-    }
-}
-#endif /* LV_HAVE_SSE2 */
-
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Converts the input 32 bit integer data into floating point data, 
and divides the each floating point output data point by the scalar value
-    \param inputVector The 32 bit input data buffer
-    \param outputVector The floating point output data buffer
-    \param scalar The value divided against each point in the output buffer
-    \param num_points The number of data values to be converted
-    \note Output buffer does NOT need to be properly aligned
-  */
-static inline void volk_32i_s32f_convert_32f_generic(float* outputVector, 
const int32_t* inputVector, const float scalar, unsigned int num_points){
-  float* outputVectorPtr = outputVector;
-  const int32_t* inputVectorPtr = inputVector;
-  unsigned int number = 0;
-  const float iScalar = 1.0 / scalar;
-
-  for(number = 0; number < num_points; number++){
-    *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32i_s32f_convert_32f_u_H */
-#ifndef INCLUDED_volk_32i_s32f_convert_32f_a_H
-#define INCLUDED_volk_32i_s32f_convert_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-
-  /*!
-    \brief Converts the input 32 bit integer data into floating point data, 
and divides the each floating point output data point by the scalar value
-    \param inputVector The 32 bit input data buffer
-    \param outputVector The floating point output data buffer
-    \param scalar The value divided against each point in the output buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_32i_s32f_convert_32f_a_sse2(float* outputVector, const 
int32_t* inputVector, const float scalar, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-     float* outputVectorPtr = outputVector;
-     const float iScalar = 1.0 / scalar;
-    __m128 invScalar = _mm_set_ps1(iScalar);
-    int32_t* inputPtr = (int32_t*)inputVector;
-    __m128i inputVal;
-    __m128 ret;
-
-    for(;number < quarterPoints; number++){
-
-      // Load the 4 values
-      inputVal = _mm_load_si128((__m128i*)inputPtr);
-
-      ret = _mm_cvtepi32_ps(inputVal);
-      ret = _mm_mul_ps(ret, invScalar);
-
-      _mm_store_ps(outputVectorPtr, ret);
-
-      outputVectorPtr += 4;
-      inputPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(; number < num_points; number++){
-      outputVector[number] =((float)(inputVector[number])) * iScalar;
-    }
-}
-#endif /* LV_HAVE_SSE2 */
-
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Converts the input 32 bit integer data into floating point data, 
and divides the each floating point output data point by the scalar value
-    \param inputVector The 32 bit input data buffer
-    \param outputVector The floating point output data buffer
-    \param scalar The value divided against each point in the output buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_32i_s32f_convert_32f_a_generic(float* outputVector, 
const int32_t* inputVector, const float scalar, unsigned int num_points){
-  float* outputVectorPtr = outputVector;
-  const int32_t* inputVectorPtr = inputVector;
-  unsigned int number = 0;
-  const float iScalar = 1.0 / scalar;
-
-  for(number = 0; number < num_points; number++){
-    *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32i_s32f_convert_32f_a_H */
diff --git a/volk/kernels/volk/volk_32i_x2_and_32i.h 
b/volk/kernels/volk/volk_32i_x2_and_32i.h
deleted file mode 100644
index c138540..0000000
--- a/volk/kernels/volk/volk_32i_x2_and_32i.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32i_x2_and_32i_a_H
-#define INCLUDED_volk_32i_x2_and_32i_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Ands the two input vectors and store their results in the third vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors
-  \param bVector One of the vectors
-  \param num_points The number of values in aVector and bVector to be anded 
together and stored into cVector
-*/
-static inline void volk_32i_x2_and_32i_a_sse(int32_t* cVector, const int32_t* 
aVector, const int32_t* bVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    float* cPtr = (float*)cVector;
-    const float* aPtr = (float*)aVector;
-    const float* bPtr = (float*)bVector;
-
-    __m128 aVal, bVal, cVal;
-    for(;number < quarterPoints; number++){
-
-      aVal = _mm_load_ps(aPtr);
-      bVal = _mm_load_ps(bPtr);
-
-      cVal = _mm_and_ps(aVal, bVal);
-
-      _mm_store_ps(cPtr,cVal); // Store the results back into the C container
-
-      aPtr += 4;
-      bPtr += 4;
-      cPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-      cVector[number] = aVector[number] & bVector[number];
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-/*!
-  \brief Ands the two input vectors and store their results in the third vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors
-  \param bVector One of the vectors
-  \param num_points The number of values in aVector and bVector to be anded 
together and stored into cVector
-*/
-static inline void volk_32i_x2_and_32i_neon(int32_t* cVector, const int32_t* 
aVector, const int32_t* bVector, unsigned int num_points){
-    int32_t* cPtr = cVector;
-    const int32_t* aPtr = aVector;
-    const int32_t* bPtr=  bVector;
-    unsigned int number = 0;
-    unsigned int quarter_points = num_points / 4;
-
-    int32x4_t a_val, b_val, c_val;
-
-    for(number = 0; number < quarter_points; number++){
-        a_val = vld1q_s32(aPtr);
-        b_val = vld1q_s32(bPtr);
-        c_val = vandq_s32(a_val, b_val);
-        vst1q_s32(cPtr, c_val);
-        aPtr += 4;
-        bPtr += 4;
-        cPtr += 4;
-    }
-
-    for(number = quarter_points * 4; number < num_points; number++){
-      *cPtr++ = (*aPtr++) & (*bPtr++);
-    }
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Ands the two input vectors and store their results in the third vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors
-  \param bVector One of the vectors
-  \param num_points The number of values in aVector and bVector to be anded 
together and stored into cVector
-*/
-static inline void volk_32i_x2_and_32i_generic(int32_t* cVector, const 
int32_t* aVector, const int32_t* bVector, unsigned int num_points){
-    int32_t* cPtr = cVector;
-    const int32_t* aPtr = aVector;
-    const int32_t* bPtr=  bVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      *cPtr++ = (*aPtr++) & (*bPtr++);
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_ORC
-/*!
-  \brief Ands the two input vectors and store their results in the third vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors
-  \param bVector One of the vectors
-  \param num_points The number of values in aVector and bVector to be anded 
together and stored into cVector
-*/
-extern void volk_32i_x2_and_32i_a_orc_impl(int32_t* cVector, const int32_t* 
aVector, const int32_t* bVector, unsigned int num_points);
-static inline void volk_32i_x2_and_32i_u_orc(int32_t* cVector, const int32_t* 
aVector, const int32_t* bVector, unsigned int num_points){
-    volk_32i_x2_and_32i_a_orc_impl(cVector, aVector, bVector, num_points);
-}
-#endif /* LV_HAVE_ORC */
-
-
-#endif /* INCLUDED_volk_32i_x2_and_32i_a_H */
diff --git a/volk/kernels/volk/volk_32i_x2_or_32i.h 
b/volk/kernels/volk/volk_32i_x2_or_32i.h
deleted file mode 100644
index 544a71c..0000000
--- a/volk/kernels/volk/volk_32i_x2_or_32i.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32i_x2_or_32i_a_H
-#define INCLUDED_volk_32i_x2_or_32i_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Ors the two input vectors and store their results in the third vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be ored
-  \param bVector One of the vectors to be ored
-  \param num_points The number of values in aVector and bVector to be ored 
together and stored into cVector
-*/
-static inline void volk_32i_x2_or_32i_a_sse(int32_t* cVector, const int32_t* 
aVector, const int32_t* bVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int quarterPoints = num_points / 4;
-
-    float* cPtr = (float*)cVector;
-    const float* aPtr = (float*)aVector;
-    const float* bPtr = (float*)bVector;
-
-    __m128 aVal, bVal, cVal;
-    for(;number < quarterPoints; number++){
-
-      aVal = _mm_load_ps(aPtr);
-      bVal = _mm_load_ps(bPtr);
-
-      cVal = _mm_or_ps(aVal, bVal);
-
-      _mm_store_ps(cPtr,cVal); // Store the results back into the C container
-
-      aPtr += 4;
-      bPtr += 4;
-      cPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-    for(;number < num_points; number++){
-      cVector[number] = aVector[number] | bVector[number];
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-/*!
-  \brief Ands the two input vectors and store their results in the third vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors
-  \param bVector One of the vectors
-  \param num_points The number of values in aVector and bVector to be anded 
together and stored into cVector
-*/
-static inline void volk_32i_x2_or_32i_neon(int32_t* cVector, const int32_t* 
aVector, const int32_t* bVector, unsigned int num_points){
-    int32_t* cPtr = cVector;
-    const int32_t* aPtr = aVector;
-    const int32_t* bPtr=  bVector;
-    unsigned int number = 0;
-    unsigned int quarter_points = num_points / 4;
-
-    int32x4_t a_val, b_val, c_val;
-
-    for(number = 0; number < quarter_points; number++){
-        a_val = vld1q_s32(aPtr);
-        b_val = vld1q_s32(bPtr);
-        c_val = vorrq_s32(a_val, b_val);
-        vst1q_s32(cPtr, c_val);
-        aPtr += 4;
-        bPtr += 4;
-        cPtr += 4;
-    }
-
-    for(number = quarter_points * 4; number < num_points; number++){
-      *cPtr++ = (*aPtr++) | (*bPtr++);
-    }
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Ors the two input vectors and store their results in the third vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be ored
-  \param bVector One of the vectors to be ored
-  \param num_points The number of values in aVector and bVector to be ored 
together and stored into cVector
-*/
-static inline void volk_32i_x2_or_32i_generic(int32_t* cVector, const int32_t* 
aVector, const int32_t* bVector, unsigned int num_points){
-    int32_t* cPtr = cVector;
-    const int32_t* aPtr = aVector;
-    const int32_t* bPtr=  bVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      *cPtr++ = (*aPtr++) | (*bPtr++);
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_ORC
-/*!
-  \brief Ors the two input vectors and store their results in the third vector
-  \param cVector The vector where the results will be stored
-  \param aVector One of the vectors to be ored
-  \param bVector One of the vectors to be ored
-  \param num_points The number of values in aVector and bVector to be ored 
together and stored into cVector
-*/
-extern void volk_32i_x2_or_32i_a_orc_impl(int32_t* cVector, const int32_t* 
aVector, const int32_t* bVector, unsigned int num_points);
-static inline void volk_32i_x2_or_32i_u_orc(int32_t* cVector, const int32_t* 
aVector, const int32_t* bVector, unsigned int num_points){
-    volk_32i_x2_or_32i_a_orc_impl(cVector, aVector, bVector, num_points);
-}
-#endif /* LV_HAVE_ORC */
-
-
-#endif /* INCLUDED_volk_32i_x2_or_32i_a_H */
diff --git a/volk/kernels/volk/volk_32u_byteswap.h 
b/volk/kernels/volk/volk_32u_byteswap.h
deleted file mode 100644
index 0194efc..0000000
--- a/volk/kernels/volk/volk_32u_byteswap.h
+++ /dev/null
@@ -1,227 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32u_byteswap_u_H
-#define INCLUDED_volk_32u_byteswap_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-
-/*!
-  \brief Byteswaps (in-place) an aligned vector of int32_t's.
-  \param intsToSwap The vector of data to byte swap
-  \param numDataPoints The number of data points
-*/
-static inline void volk_32u_byteswap_u_sse2(uint32_t* intsToSwap, unsigned int 
num_points){
-  unsigned int number = 0;
-
-  uint32_t* inputPtr = intsToSwap;
-  __m128i input, byte1, byte2, byte3, byte4, output;
-  __m128i byte2mask = _mm_set1_epi32(0x00FF0000);
-  __m128i byte3mask = _mm_set1_epi32(0x0000FF00);
-
-  const uint64_t quarterPoints = num_points / 4;
-  for(;number < quarterPoints; number++){
-    // Load the 32t values, increment inputPtr later since we're doing it 
in-place.
-    input = _mm_loadu_si128((__m128i*)inputPtr);
-    // Do the four shifts
-    byte1 = _mm_slli_epi32(input, 24);
-    byte2 = _mm_slli_epi32(input, 8);
-    byte3 = _mm_srli_epi32(input, 8);
-    byte4 = _mm_srli_epi32(input, 24);
-    // Or bytes together
-    output = _mm_or_si128(byte1, byte4);
-    byte2 = _mm_and_si128(byte2, byte2mask);
-    output = _mm_or_si128(output, byte2);
-    byte3 = _mm_and_si128(byte3, byte3mask);
-    output = _mm_or_si128(output, byte3);
-    // Store the results
-    _mm_storeu_si128((__m128i*)inputPtr, output);
-    inputPtr += 4;
-  }
-
-  // Byteswap any remaining points:
-  number = quarterPoints*4;
-  for(; number < num_points; number++){
-    uint32_t outputVal = *inputPtr;
-    outputVal = (((outputVal >> 24) & 0xff) | ((outputVal >> 8) & 0x0000ff00) 
| ((outputVal << 8) & 0x00ff0000) | ((outputVal << 24) & 0xff000000));
-    *inputPtr = outputVal;
-    inputPtr++;
-  }
-}
-#endif /* LV_HAVE_SSE2 */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-/*!
-  \brief Byteswaps (in-place) an aligned vector of int32_t's.
-  \param intsToSwap The vector of data to byte swap
-  \param numDataPoints The number of data points
-*/
-static inline void volk_32u_byteswap_neon(uint32_t* intsToSwap, unsigned int 
num_points){
-  uint32_t* inputPtr = intsToSwap;
-  unsigned int number = 0;
-  unsigned int n8points = num_points / 8;
-
-  uint8x8x4_t input_table;
-  uint8x8_t int_lookup01, int_lookup23, int_lookup45, int_lookup67;
-  uint8x8_t swapped_int01, swapped_int23, swapped_int45, swapped_int67;
-
-  /* these magic numbers are used as byte-indeces in the LUT.
-     they are pre-computed to save time. A simple C program
-     can calculate them; for example for lookup01:
-    uint8_t chars[8] = {24, 16, 8, 0, 25, 17, 9, 1};
-    for(ii=0; ii < 8; ++ii) {
-        index += ((uint64_t)(*(chars+ii))) << (ii*8);
-    }
-  */
-  int_lookup01 = vcreate_u8(74609667900706840);
-  int_lookup23 = vcreate_u8(219290013576860186);
-  int_lookup45 = vcreate_u8(363970359253013532);
-  int_lookup67 = vcreate_u8(508650704929166878);
-  
-  for(number = 0; number < n8points; ++number){
-    input_table = vld4_u8((uint8_t*) inputPtr);
-    swapped_int01 = vtbl4_u8(input_table, int_lookup01);
-    swapped_int23 = vtbl4_u8(input_table, int_lookup23);
-    swapped_int45 = vtbl4_u8(input_table, int_lookup45);
-    swapped_int67 = vtbl4_u8(input_table, int_lookup67);
-    vst1_u8((uint8_t*) inputPtr, swapped_int01);
-    vst1_u8((uint8_t*) (inputPtr+2), swapped_int23);
-    vst1_u8((uint8_t*) (inputPtr+4), swapped_int45);
-    vst1_u8((uint8_t*) (inputPtr+6), swapped_int67);
-
-    inputPtr += 8;
-  }
-
-  for(number = n8points * 8; number < num_points; ++number){
-    uint32_t output = *inputPtr;
-    output = (((output >> 24) & 0xff) | ((output >> 8) & 0x0000ff00) | 
((output << 8) & 0x00ff0000) | ((output << 24) & 0xff000000));
-
-    *inputPtr = output;
-    inputPtr++;
-  }
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Byteswaps (in-place) an aligned vector of int32_t's.
-  \param intsToSwap The vector of data to byte swap
-  \param numDataPoints The number of data points
-*/
-static inline void volk_32u_byteswap_generic(uint32_t* intsToSwap, unsigned 
int num_points){
-  uint32_t* inputPtr = intsToSwap;
-
-  unsigned int point;
-  for(point = 0; point < num_points; point++){
-    uint32_t output = *inputPtr;
-    output = (((output >> 24) & 0xff) | ((output >> 8) & 0x0000ff00) | 
((output << 8) & 0x00ff0000) | ((output << 24) & 0xff000000));
-
-    *inputPtr = output;
-    inputPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-#endif /* INCLUDED_volk_32u_byteswap_u_H */
-#ifndef INCLUDED_volk_32u_byteswap_a_H
-#define INCLUDED_volk_32u_byteswap_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-
-/*!
-  \brief Byteswaps (in-place) an aligned vector of int32_t's.
-  \param intsToSwap The vector of data to byte swap
-  \param numDataPoints The number of data points
-*/
-static inline void volk_32u_byteswap_a_sse2(uint32_t* intsToSwap, unsigned int 
num_points){
-  unsigned int number = 0;
-
-  uint32_t* inputPtr = intsToSwap;
-  __m128i input, byte1, byte2, byte3, byte4, output;
-  __m128i byte2mask = _mm_set1_epi32(0x00FF0000);
-  __m128i byte3mask = _mm_set1_epi32(0x0000FF00);
-
-  const uint64_t quarterPoints = num_points / 4;
-  for(;number < quarterPoints; number++){
-    // Load the 32t values, increment inputPtr later since we're doing it 
in-place.
-    input = _mm_load_si128((__m128i*)inputPtr);
-    // Do the four shifts
-    byte1 = _mm_slli_epi32(input, 24);
-    byte2 = _mm_slli_epi32(input, 8);
-    byte3 = _mm_srli_epi32(input, 8);
-    byte4 = _mm_srli_epi32(input, 24);
-    // Or bytes together
-    output = _mm_or_si128(byte1, byte4);
-    byte2 = _mm_and_si128(byte2, byte2mask);
-    output = _mm_or_si128(output, byte2);
-    byte3 = _mm_and_si128(byte3, byte3mask);
-    output = _mm_or_si128(output, byte3);
-    // Store the results
-    _mm_store_si128((__m128i*)inputPtr, output);
-    inputPtr += 4;
-  }
-
-  // Byteswap any remaining points:
-  number = quarterPoints*4;
-  for(; number < num_points; number++){
-    uint32_t outputVal = *inputPtr;
-    outputVal = (((outputVal >> 24) & 0xff) | ((outputVal >> 8) & 0x0000ff00) 
| ((outputVal << 8) & 0x00ff0000) | ((outputVal << 24) & 0xff000000));
-    *inputPtr = outputVal;
-    inputPtr++;
-  }
-}
-#endif /* LV_HAVE_SSE2 */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Byteswaps (in-place) an aligned vector of int32_t's.
-  \param intsToSwap The vector of data to byte swap
-  \param numDataPoints The number of data points
-*/
-static inline void volk_32u_byteswap_a_generic(uint32_t* intsToSwap, unsigned 
int num_points){
-  uint32_t* inputPtr = intsToSwap;
-
-  unsigned int point;
-  for(point = 0; point < num_points; point++){
-    uint32_t output = *inputPtr;
-    output = (((output >> 24) & 0xff) | ((output >> 8) & 0x0000ff00) | 
((output << 8) & 0x00ff0000) | ((output << 24) & 0xff000000));
-
-    *inputPtr = output;
-    inputPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_32u_byteswap_a_H */
diff --git a/volk/kernels/volk/volk_32u_byteswappuppet_32u.h 
b/volk/kernels/volk/volk_32u_byteswappuppet_32u.h
deleted file mode 100644
index bf7055e..0000000
--- a/volk/kernels/volk/volk_32u_byteswappuppet_32u.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef INCLUDED_volk_32u_byteswappuppet_32u_H
-#define INCLUDED_volk_32u_byteswappuppet_32u_H
-
-
-#include <stdint.h>
-#include <volk/volk_32u_byteswap.h>
-
-#ifdef LV_HAVE_GENERIC
-static inline void volk_32u_byteswappuppet_32u_generic(uint32_t*output, 
uint32_t* intsToSwap, unsigned int num_points){
-
-    volk_32u_byteswap_generic((uint32_t*)intsToSwap, num_points);
-    memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint32_t));
-
-}
-#endif
-
-#ifdef LV_HAVE_NEON
-static inline void volk_32u_byteswappuppet_32u_neon(uint32_t*output, uint32_t* 
intsToSwap, unsigned int num_points){
-
-    volk_32u_byteswap_neon((uint32_t*)intsToSwap, num_points);
-    memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint32_t));
-
-}
-#endif
-
-#ifdef LV_HAVE_SSE2
-static inline void volk_32u_byteswappuppet_32u_u_sse2(uint32_t *output, 
uint32_t* intsToSwap, unsigned int num_points){
-
-    volk_32u_byteswap_u_sse2((uint32_t*)intsToSwap, num_points);
-    memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint32_t));
-
-}
-#endif
-
-#ifdef LV_HAVE_SSE2
-static inline void volk_32u_byteswappuppet_32u_a_sse2(uint32_t* output, 
uint32_t* intsToSwap, unsigned int num_points){
-
-    volk_32u_byteswap_a_sse2((uint32_t*)intsToSwap, num_points);
-    memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint32_t));
-
-}
-#endif
-
-
-#endif
diff --git a/volk/kernels/volk/volk_32u_popcnt.h 
b/volk/kernels/volk/volk_32u_popcnt.h
deleted file mode 100644
index 8d61375..0000000
--- a/volk/kernels/volk/volk_32u_popcnt.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_VOLK_32u_POPCNT_A16_H
-#define INCLUDED_VOLK_32u_POPCNT_A16_H
-
-#include <stdio.h>
-#include <inttypes.h>
-
-
-#ifdef LV_HAVE_GENERIC
-
-static inline void volk_32u_popcnt_generic(uint32_t* ret, const uint32_t 
value) {
-
-  // This is faster than a lookup table
-  uint32_t retVal = value;
-
-  retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
-  retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
-  retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
-  retVal = (retVal + (retVal >> 8));
-  retVal = (retVal + (retVal >> 16)) & 0x0000003F;
-
-  *ret = retVal;
-}
-
-#endif /*LV_HAVE_GENERIC*/
-
-#ifdef LV_HAVE_SSE4_2
-
-#include <nmmintrin.h>
-
-static inline void volk_32u_popcnt_a_sse4_2(uint32_t* ret, const uint32_t 
value) {
-  *ret = _mm_popcnt_u32(value);
-}
-
-#endif /*LV_HAVE_SSE4_2*/
-
-#endif /*INCLUDED_VOLK_32u_POPCNT_A16_H*/
diff --git a/volk/kernels/volk/volk_32u_popcntpuppet_32u.h 
b/volk/kernels/volk/volk_32u_popcntpuppet_32u.h
deleted file mode 100644
index d5edd35..0000000
--- a/volk/kernels/volk/volk_32u_popcntpuppet_32u.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_32u_popcntpuppet_32u_H
-#define INCLUDED_volk_32u_popcntpuppet_32u_H
-
-#include <stdint.h>
-#include <volk/volk_32u_popcnt.h>
-
-#ifdef LV_HAVE_GENERIC
-static inline void volk_32u_popcntpuppet_32u_generic(uint32_t* outVector, 
const uint32_t* inVector, unsigned int num_points){
-    unsigned int ii;
-    for(ii=0; ii < num_points; ++ii) {
-        volk_32u_popcnt_generic(outVector+ii, *(inVector+ii) );
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_SSE4_2
-static inline void volk_32u_popcntpuppet_32u_a_sse4_2(uint32_t* outVector, 
const uint32_t* inVector, unsigned int num_points){
-    unsigned int ii;
-    for(ii=0; ii < num_points; ++ii) {
-        volk_32u_popcnt_a_sse4_2(outVector+ii, *(inVector+ii) );
-    }
-}
-#endif /* LV_HAVE_SSE4_2 */
-
-#endif /* INCLUDED_volk_32fc_s32fc_rotatorpuppet_32fc_a_H */
diff --git a/volk/kernels/volk/volk_64f_convert_32f.h 
b/volk/kernels/volk/volk_64f_convert_32f.h
deleted file mode 100644
index 39a7ae8..0000000
--- a/volk/kernels/volk/volk_64f_convert_32f.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_64f_convert_32f_u_H
-#define INCLUDED_volk_64f_convert_32f_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-  /*!
-    \brief Converts the double values into float values
-    \param dVector The converted float vector values
-    \param fVector The double vector values to be converted
-    \param num_points The number of points in the two vectors to be converted
-  */
-static inline void volk_64f_convert_32f_u_sse2(float* outputVector, const 
double* inputVector, unsigned int num_points){
-  unsigned int number = 0;
-
-  const unsigned int quarterPoints = num_points / 4;
-
-  const double* inputVectorPtr = (const double*)inputVector;
-  float* outputVectorPtr = outputVector;
-  __m128 ret, ret2;
-  __m128d inputVal1, inputVal2;
-
-  for(;number < quarterPoints; number++){
-    inputVal1 = _mm_loadu_pd(inputVectorPtr); inputVectorPtr += 2;
-    inputVal2 = _mm_loadu_pd(inputVectorPtr); inputVectorPtr += 2;
-
-    ret = _mm_cvtpd_ps(inputVal1);
-    ret2 = _mm_cvtpd_ps(inputVal2);
-
-    ret = _mm_movelh_ps(ret, ret2);
-
-    _mm_storeu_ps(outputVectorPtr, ret);
-    outputVectorPtr += 4;
-  }
-
-  number = quarterPoints * 4;
-  for(; number < num_points; number++){
-    outputVector[number] = (float)(inputVector[number]);
-  }
-}
-#endif /* LV_HAVE_SSE2 */
-
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Converts the double values into float values
-  \param dVector The converted float vector values
-  \param fVector The double vector values to be converted
-  \param num_points The number of points in the two vectors to be converted
-*/
-static inline void volk_64f_convert_32f_generic(float* outputVector, const 
double* inputVector, unsigned int num_points){
-  float* outputVectorPtr = outputVector;
-  const double* inputVectorPtr = inputVector;
-  unsigned int number = 0;
-
-  for(number = 0; number < num_points; number++){
-    *outputVectorPtr++ = ((float)(*inputVectorPtr++));
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_64f_convert_32f_u_H */
-#ifndef INCLUDED_volk_64f_convert_32f_a_H
-#define INCLUDED_volk_64f_convert_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-  /*!
-    \brief Converts the double values into float values
-    \param dVector The converted float vector values
-    \param fVector The double vector values to be converted
-    \param num_points The number of points in the two vectors to be converted
-  */
-static inline void volk_64f_convert_32f_a_sse2(float* outputVector, const 
double* inputVector, unsigned int num_points){
-  unsigned int number = 0;
-
-  const unsigned int quarterPoints = num_points / 4;
-
-  const double* inputVectorPtr = (const double*)inputVector;
-  float* outputVectorPtr = outputVector;
-  __m128 ret, ret2;
-  __m128d inputVal1, inputVal2;
-
-  for(;number < quarterPoints; number++){
-    inputVal1 = _mm_load_pd(inputVectorPtr); inputVectorPtr += 2;
-    inputVal2 = _mm_load_pd(inputVectorPtr); inputVectorPtr += 2;
-
-    ret = _mm_cvtpd_ps(inputVal1);
-    ret2 = _mm_cvtpd_ps(inputVal2);
-
-    ret = _mm_movelh_ps(ret, ret2);
-
-    _mm_store_ps(outputVectorPtr, ret);
-    outputVectorPtr += 4;
-  }
-
-  number = quarterPoints * 4;
-  for(; number < num_points; number++){
-    outputVector[number] = (float)(inputVector[number]);
-  }
-}
-#endif /* LV_HAVE_SSE2 */
-
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Converts the double values into float values
-  \param dVector The converted float vector values
-  \param fVector The double vector values to be converted
-  \param num_points The number of points in the two vectors to be converted
-*/
-static inline void volk_64f_convert_32f_a_generic(float* outputVector, const 
double* inputVector, unsigned int num_points){
-  float* outputVectorPtr = outputVector;
-  const double* inputVectorPtr = inputVector;
-  unsigned int number = 0;
-
-  for(number = 0; number < num_points; number++){
-    *outputVectorPtr++ = ((float)(*inputVectorPtr++));
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_64f_convert_32f_a_H */
diff --git a/volk/kernels/volk/volk_64f_x2_max_64f.h 
b/volk/kernels/volk/volk_64f_x2_max_64f.h
deleted file mode 100644
index 7bf09e8..0000000
--- a/volk/kernels/volk/volk_64f_x2_max_64f.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_64f_x2_max_64f_a_H
-#define INCLUDED_volk_64f_x2_max_64f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-/*!
-  \brief Selects maximum value from each entry between bVector and aVector and 
store their results in the cVector
-  \param cVector The vector where the results will be stored
-  \param aVector The vector to be checked
-  \param bVector The vector to be checked
-  \param num_points The number of values in aVector and bVector to be checked 
and stored into cVector
-*/
-static inline void volk_64f_x2_max_64f_a_sse2(double* cVector, const double* 
aVector, const double* bVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int halfPoints = num_points / 2;
-
-    double* cPtr = cVector;
-    const double* aPtr = aVector;
-    const double* bPtr=  bVector;
-
-    __m128d aVal, bVal, cVal;
-    for(;number < halfPoints; number++){
-
-      aVal = _mm_load_pd(aPtr);
-      bVal = _mm_load_pd(bPtr);
-
-      cVal = _mm_max_pd(aVal, bVal);
-
-      _mm_store_pd(cPtr,cVal); // Store the results back into the C container
-
-      aPtr += 2;
-      bPtr += 2;
-      cPtr += 2;
-    }
-
-    number = halfPoints * 2;
-    for(;number < num_points; number++){
-      const double a = *aPtr++;
-      const double b = *bPtr++;
-      *cPtr++ = ( a > b ? a : b);
-    }
-}
-#endif /* LV_HAVE_SSE2 */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Selects maximum value from each entry between bVector and aVector and 
store their results in the cVector
-  \param cVector The vector where the results will be stored
-  \param aVector The vector to be checked
-  \param bVector The vector to be checked
-  \param num_points The number of values in aVector and bVector to be checked 
and stored into cVector
-*/
-static inline void volk_64f_x2_max_64f_generic(double* cVector, const double* 
aVector, const double* bVector, unsigned int num_points){
-    double* cPtr = cVector;
-    const double* aPtr = aVector;
-    const double* bPtr=  bVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      const double a = *aPtr++;
-      const double b = *bPtr++;
-      *cPtr++ = ( a > b ? a : b);
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-#endif /* INCLUDED_volk_64f_x2_max_64f_a_H */
diff --git a/volk/kernels/volk/volk_64f_x2_min_64f.h 
b/volk/kernels/volk/volk_64f_x2_min_64f.h
deleted file mode 100644
index 8d8b377..0000000
--- a/volk/kernels/volk/volk_64f_x2_min_64f.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_64f_x2_min_64f_a_H
-#define INCLUDED_volk_64f_x2_min_64f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-/*!
-  \brief Selects minimum value from each entry between bVector and aVector and 
store their results in the cVector
-  \param cVector The vector where the results will be stored
-  \param aVector The vector to be checked
-  \param bVector The vector to be checked
-  \param num_points The number of values in aVector and bVector to be checked 
and stored into cVector
-*/
-static inline void volk_64f_x2_min_64f_a_sse2(double* cVector, const double* 
aVector, const double* bVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int halfPoints = num_points / 2;
-
-    double* cPtr = cVector;
-    const double* aPtr = aVector;
-    const double* bPtr=  bVector;
-
-    __m128d aVal, bVal, cVal;
-    for(;number < halfPoints; number++){
-
-      aVal = _mm_load_pd(aPtr);
-      bVal = _mm_load_pd(bPtr);
-
-      cVal = _mm_min_pd(aVal, bVal);
-
-      _mm_store_pd(cPtr,cVal); // Store the results back into the C container
-
-      aPtr += 2;
-      bPtr += 2;
-      cPtr += 2;
-    }
-
-    number = halfPoints * 2;
-    for(;number < num_points; number++){
-      const double a = *aPtr++;
-      const double b = *bPtr++;
-      *cPtr++ = ( a < b ? a : b);
-    }
-}
-#endif /* LV_HAVE_SSE2 */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Selects minimum value from each entry between bVector and aVector and 
store their results in the cVector
-  \param cVector The vector where the results will be stored
-  \param aVector The vector to be checked
-  \param bVector The vector to be checked
-  \param num_points The number of values in aVector and bVector to be checked 
and stored into cVector
-*/
-static inline void volk_64f_x2_min_64f_generic(double* cVector, const double* 
aVector, const double* bVector, unsigned int num_points){
-    double* cPtr = cVector;
-    const double* aPtr = aVector;
-    const double* bPtr=  bVector;
-    unsigned int number = 0;
-
-    for(number = 0; number < num_points; number++){
-      const double a = *aPtr++;
-      const double b = *bPtr++;
-      *cPtr++ = ( a < b ? a : b);
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-#endif /* INCLUDED_volk_64f_x2_min_64f_a_H */
diff --git a/volk/kernels/volk/volk_64u_byteswap.h 
b/volk/kernels/volk/volk_64u_byteswap.h
deleted file mode 100644
index dce8832..0000000
--- a/volk/kernels/volk/volk_64u_byteswap.h
+++ /dev/null
@@ -1,253 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_64u_byteswap_u_H
-#define INCLUDED_volk_64u_byteswap_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-
-/*!
-  \brief Byteswaps (in-place) an aligned vector of int64_t's.
-  \param intsToSwap The vector of data to byte swap
-  \param numDataPoints The number of data points
-*/
-static inline void volk_64u_byteswap_u_sse2(uint64_t* intsToSwap, unsigned int 
num_points){
-    uint32_t* inputPtr = (uint32_t*)intsToSwap;
-    __m128i input, byte1, byte2, byte3, byte4, output;
-    __m128i byte2mask = _mm_set1_epi32(0x00FF0000);
-    __m128i byte3mask = _mm_set1_epi32(0x0000FF00);
-    uint64_t number = 0;
-    const unsigned int halfPoints = num_points / 2;
-    for(;number < halfPoints; number++){
-      // Load the 32t values, increment inputPtr later since we're doing it 
in-place.
-      input = _mm_loadu_si128((__m128i*)inputPtr);
-
-      // Do the four shifts
-      byte1 = _mm_slli_epi32(input, 24);
-      byte2 = _mm_slli_epi32(input, 8);
-      byte3 = _mm_srli_epi32(input, 8);
-      byte4 = _mm_srli_epi32(input, 24);
-      // Or bytes together
-      output = _mm_or_si128(byte1, byte4);
-      byte2 = _mm_and_si128(byte2, byte2mask);
-      output = _mm_or_si128(output, byte2);
-      byte3 = _mm_and_si128(byte3, byte3mask);
-      output = _mm_or_si128(output, byte3);
-
-      // Reorder the two words
-      output = _mm_shuffle_epi32(output, _MM_SHUFFLE(2, 3, 0, 1));
-
-      // Store the results
-      _mm_storeu_si128((__m128i*)inputPtr, output);
-      inputPtr += 4;
-    }
-
-    // Byteswap any remaining points:
-    number = halfPoints*2;
-    for(; number < num_points; number++){
-      uint32_t output1 = *inputPtr;
-      uint32_t output2 = inputPtr[1];
-
-      output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | 
((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
-
-      output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | 
((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
-
-      *inputPtr++ = output2;
-      *inputPtr++ = output1;
-    }
-}
-#endif /* LV_HAVE_SSE2 */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Byteswaps (in-place) an aligned vector of int64_t's.
-  \param intsToSwap The vector of data to byte swap
-  \param numDataPoints The number of data points
-*/
-static inline void volk_64u_byteswap_generic(uint64_t* intsToSwap, unsigned 
int num_points){
-  uint32_t* inputPtr = (uint32_t*)intsToSwap;
-  unsigned int point;
-  for(point = 0; point < num_points; point++){
-    uint32_t output1 = *inputPtr;
-    uint32_t output2 = inputPtr[1];
-
-    output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | 
((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
-
-    output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | 
((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
-
-    *inputPtr++ = output2;
-    *inputPtr++ = output1;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-/*!
-  \brief Byteswaps (in-place) a vector of int64_t's.
-  \param intsToSwap The vector of data to byte swap
-  \param numDataPoints The number of data points
-*/
-static inline void volk_64u_byteswap_neon(uint64_t* intsToSwap, unsigned int 
num_points){
-  uint32_t* inputPtr = (uint32_t*)intsToSwap;
-  unsigned int number = 0;
-  unsigned int n8points = num_points / 4;
-
-  uint8x8x4_t input_table;
-  uint8x8_t int_lookup01, int_lookup23, int_lookup45, int_lookup67;
-  uint8x8_t swapped_int01, swapped_int23, swapped_int45, swapped_int67;
-
-  /* these magic numbers are used as byte-indeces in the LUT.
-     they are pre-computed to save time. A simple C program
-     can calculate them; for example for lookup01:
-    uint8_t chars[8] = {24, 16, 8, 0, 25, 17, 9, 1};
-    for(ii=0; ii < 8; ++ii) {
-        index += ((uint64_t)(*(chars+ii))) << (ii*8);
-    }
-  */
-  int_lookup01 = vcreate_u8(2269495096316185);
-  int_lookup23 = vcreate_u8(146949840772469531);
-  int_lookup45 = vcreate_u8(291630186448622877);
-  int_lookup67 = vcreate_u8(436310532124776223);
-
-  for(number = 0; number < n8points; ++number){
-    input_table = vld4_u8((uint8_t*) inputPtr);
-    swapped_int01 = vtbl4_u8(input_table, int_lookup01);
-    swapped_int23 = vtbl4_u8(input_table, int_lookup23);
-    swapped_int45 = vtbl4_u8(input_table, int_lookup45);
-    swapped_int67 = vtbl4_u8(input_table, int_lookup67);
-    vst1_u8((uint8_t*) inputPtr, swapped_int01);
-    vst1_u8((uint8_t*) (inputPtr+2), swapped_int23);
-    vst1_u8((uint8_t*) (inputPtr+4), swapped_int45);
-    vst1_u8((uint8_t*) (inputPtr+6), swapped_int67);
-
-    inputPtr += 4;
-  }
-
-  for(number = n8points * 4; number < num_points; ++number){
-    uint32_t output1 = *inputPtr;
-    uint32_t output2 = inputPtr[1];
-
-    output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | 
((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
-    output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | 
((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
-
-    *inputPtr++ = output2;
-    *inputPtr++ = output1;
-  }
-
-}
-#endif /* LV_HAVE_NEON */
-
-
-#endif /* INCLUDED_volk_64u_byteswap_u_H */
-#ifndef INCLUDED_volk_64u_byteswap_a_H
-#define INCLUDED_volk_64u_byteswap_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE2
-#include <emmintrin.h>
-
-/*!
-  \brief Byteswaps (in-place) an aligned vector of int64_t's.
-  \param intsToSwap The vector of data to byte swap
-  \param numDataPoints The number of data points
-*/
-static inline void volk_64u_byteswap_a_sse2(uint64_t* intsToSwap, unsigned int 
num_points){
-    uint32_t* inputPtr = (uint32_t*)intsToSwap;
-    __m128i input, byte1, byte2, byte3, byte4, output;
-    __m128i byte2mask = _mm_set1_epi32(0x00FF0000);
-    __m128i byte3mask = _mm_set1_epi32(0x0000FF00);
-    uint64_t number = 0;
-    const unsigned int halfPoints = num_points / 2;
-    for(;number < halfPoints; number++){
-      // Load the 32t values, increment inputPtr later since we're doing it 
in-place.
-      input = _mm_load_si128((__m128i*)inputPtr);
-
-      // Do the four shifts
-      byte1 = _mm_slli_epi32(input, 24);
-      byte2 = _mm_slli_epi32(input, 8);
-      byte3 = _mm_srli_epi32(input, 8);
-      byte4 = _mm_srli_epi32(input, 24);
-      // Or bytes together
-      output = _mm_or_si128(byte1, byte4);
-      byte2 = _mm_and_si128(byte2, byte2mask);
-      output = _mm_or_si128(output, byte2);
-      byte3 = _mm_and_si128(byte3, byte3mask);
-      output = _mm_or_si128(output, byte3);
-
-      // Reorder the two words
-      output = _mm_shuffle_epi32(output, _MM_SHUFFLE(2, 3, 0, 1));
-
-      // Store the results
-      _mm_store_si128((__m128i*)inputPtr, output);
-      inputPtr += 4;
-    }
-
-    // Byteswap any remaining points:
-    number = halfPoints*2;
-    for(; number < num_points; number++){
-      uint32_t output1 = *inputPtr;
-      uint32_t output2 = inputPtr[1];
-
-      output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | 
((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
-
-      output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | 
((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
-
-      *inputPtr++ = output2;
-      *inputPtr++ = output1;
-    }
-}
-#endif /* LV_HAVE_SSE2 */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Byteswaps (in-place) an aligned vector of int64_t's.
-  \param intsToSwap The vector of data to byte swap
-  \param numDataPoints The number of data points
-*/
-static inline void volk_64u_byteswap_a_generic(uint64_t* intsToSwap, unsigned 
int num_points){
-  uint32_t* inputPtr = (uint32_t*)intsToSwap;
-  unsigned int point;
-  for(point = 0; point < num_points; point++){
-    uint32_t output1 = *inputPtr;
-    uint32_t output2 = inputPtr[1];
-
-    output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | 
((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
-
-    output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | 
((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
-
-    *inputPtr++ = output2;
-    *inputPtr++ = output1;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_64u_byteswap_a_H */
diff --git a/volk/kernels/volk/volk_64u_byteswappuppet_64u.h 
b/volk/kernels/volk/volk_64u_byteswappuppet_64u.h
deleted file mode 100644
index 6733c58..0000000
--- a/volk/kernels/volk/volk_64u_byteswappuppet_64u.h
+++ /dev/null
@@ -1,46 +0,0 @@
-#ifndef INCLUDED_volk_64u_byteswappuppet_64u_H
-#define INCLUDED_volk_64u_byteswappuppet_64u_H
-
-
-#include <stdint.h>
-#include <volk/volk_64u_byteswap.h>
-#include <string.h>
-
-#ifdef LV_HAVE_GENERIC
-static inline void volk_64u_byteswappuppet_64u_generic(uint64_t*output, 
uint64_t* intsToSwap, unsigned int num_points){
-
-    volk_64u_byteswap_generic((uint64_t*)intsToSwap, num_points);
-    memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint64_t));
-
-}
-#endif
-
-#ifdef LV_HAVE_NEON
-static inline void volk_64u_byteswappuppet_64u_neon(uint64_t*output, uint64_t* 
intsToSwap, unsigned int num_points){
-
-    volk_64u_byteswap_neon((uint64_t*)intsToSwap, num_points);
-    memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint64_t));
-
-}
-#endif
-
-#ifdef LV_HAVE_SSE2
-static inline void volk_64u_byteswappuppet_64u_u_sse2(uint64_t* output, 
uint64_t* intsToSwap, unsigned int num_points){
-
-    volk_64u_byteswap_u_sse2((uint64_t*)intsToSwap, num_points);
-    memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint64_t));
-
-}
-#endif
-
-#ifdef LV_HAVE_SSE2
-static inline void volk_64u_byteswappuppet_64u_a_sse2(uint64_t* output, 
uint64_t* intsToSwap, unsigned int num_points){
-
-    volk_64u_byteswap_a_sse2((uint64_t*)intsToSwap, num_points);
-    memcpy((void*)output, (void*)intsToSwap, num_points * sizeof(uint64_t));
-
-}
-#endif
-
-
-#endif
diff --git a/volk/kernels/volk/volk_64u_popcnt.h 
b/volk/kernels/volk/volk_64u_popcnt.h
deleted file mode 100644
index 0ec72e3..0000000
--- a/volk/kernels/volk/volk_64u_popcnt.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_64u_popcnt_a_H
-#define INCLUDED_volk_64u_popcnt_a_H
-
-#include <stdio.h>
-#include <inttypes.h>
-
-
-#ifdef LV_HAVE_GENERIC
-
-
-static inline void volk_64u_popcnt_generic(uint64_t* ret, const uint64_t 
value) {
-
-  //const uint32_t* valueVector = (const uint32_t*)&value;
-
-  // This is faster than a lookup table
-  //uint32_t retVal = valueVector[0];
-  uint32_t retVal = (uint32_t)(value & 0x00000000FFFFFFFF);
-
-  retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
-  retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
-  retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
-  retVal = (retVal + (retVal >> 8));
-  retVal = (retVal + (retVal >> 16)) & 0x0000003F;
-  uint64_t retVal64  = retVal;
-
-  //retVal = valueVector[1];
-  retVal = (uint32_t)((value & 0xFFFFFFFF00000000) >> 31);
-  retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
-  retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
-  retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
-  retVal = (retVal + (retVal >> 8));
-  retVal = (retVal + (retVal >> 16)) & 0x0000003F;
-  retVal64 += retVal;
-
-  *ret = retVal64;
-
-}
-
-#endif /*LV_HAVE_GENERIC*/
-
-#if LV_HAVE_SSE4_2 && LV_HAVE_64
-
-#include <nmmintrin.h>
-
-static inline void volk_64u_popcnt_a_sse4_2(uint64_t* ret, const uint64_t 
value) {
-  *ret = _mm_popcnt_u64(value);
-
-}
-
-#endif /*LV_HAVE_SSE4_2*/
-
-#if LV_HAVE_NEON
-#include <arm_neon.h>
-static inline void volk_64u_popcnt_neon(uint64_t* ret, const uint64_t value) {
-    uint8x8_t input_val, count8x8_val;
-    uint16x4_t count16x4_val;
-    uint32x2_t count32x2_val;
-    uint64x1_t count64x1_val;
-
-    input_val = vld1_u8((unsigned char *) &value);
-    count8x8_val = vcnt_u8(input_val);
-    count16x4_val = vpaddl_u8(count8x8_val);
-    count32x2_val = vpaddl_u16(count16x4_val);
-    count64x1_val = vpaddl_u32(count32x2_val);
-    vst1_u64(ret, count64x1_val);
-
-    //*ret = _mm_popcnt_u64(value);
-
-}
-#endif /*LV_HAVE_NEON*/
-
-#endif /*INCLUDED_volk_64u_popcnt_a_H*/
diff --git a/volk/kernels/volk/volk_64u_popcntpuppet_64u.h 
b/volk/kernels/volk/volk_64u_popcntpuppet_64u.h
deleted file mode 100644
index c2be422..0000000
--- a/volk/kernels/volk/volk_64u_popcntpuppet_64u.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_64u_popcntpuppet_64u_H
-#define INCLUDED_volk_64u_popcntpuppet_64u_H
-
-#include <stdint.h>
-#include <volk/volk_64u_popcnt.h>
-
-#ifdef LV_HAVE_GENERIC
-static inline void volk_64u_popcntpuppet_64u_generic(uint64_t* outVector, 
const uint64_t* inVector, unsigned int num_points){
-    unsigned int ii;
-    for(ii=0; ii < num_points; ++ii) {
-        volk_64u_popcnt_generic(outVector+ii, num_points );
-
-    }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#if LV_HAVE_SSE4_2 && LV_HAVE_64
-static inline void volk_64u_popcntpuppet_64u_a_sse4_2(uint64_t* outVector, 
const uint64_t* inVector, unsigned int num_points){
-    unsigned int ii;
-    for(ii=0; ii < num_points; ++ii) {
-        volk_64u_popcnt_a_sse4_2(outVector+ii, num_points );
-
-    }
-}
-#endif /* LV_HAVE_SSE4_2 */
-
-#ifdef LV_HAVE_NEON
-static inline void volk_64u_popcntpuppet_64u_neon(uint64_t* outVector, const 
uint64_t* inVector, unsigned int num_points){
-    unsigned int ii;
-    for(ii=0; ii < num_points; ++ii) {
-        volk_64u_popcnt_neon(outVector+ii, num_points );
-    }
-}
-#endif /* LV_HAVE_NEON */
-
-#endif /* INCLUDED_volk_32fc_s32fc_rotatorpuppet_32fc_a_H */
diff --git a/volk/kernels/volk/volk_8i_convert_16i.h 
b/volk/kernels/volk/volk_8i_convert_16i.h
deleted file mode 100644
index 549ba4e..0000000
--- a/volk/kernels/volk/volk_8i_convert_16i.h
+++ /dev/null
@@ -1,217 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_8i_convert_16i_u_H
-#define INCLUDED_volk_8i_convert_16i_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-
-  /*!
-    \brief Converts the input 8 bit integer data into 16 bit integer data
-    \param inputVector The 8 bit input data buffer
-    \param outputVector The 16 bit output data buffer
-    \param num_points The number of data values to be converted
-    \note Input and output buffers do NOT need to be properly aligned
-  */
-static inline void volk_8i_convert_16i_u_sse4_1(int16_t* outputVector, const 
int8_t* inputVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int sixteenthPoints = num_points / 16;
-
-    const __m128i* inputVectorPtr = (const __m128i*)inputVector;
-    __m128i* outputVectorPtr = (__m128i*)outputVector;
-    __m128i inputVal;
-    __m128i ret;
-
-    for(;number < sixteenthPoints; number++){
-      inputVal = _mm_loadu_si128(inputVectorPtr);
-      ret = _mm_cvtepi8_epi16(inputVal);
-      ret = _mm_slli_epi16(ret, 8); // Multiply by 256
-      _mm_storeu_si128(outputVectorPtr, ret);
-
-      outputVectorPtr++;
-
-      inputVal = _mm_srli_si128(inputVal, 8);
-      ret = _mm_cvtepi8_epi16(inputVal);
-      ret = _mm_slli_epi16(ret, 8); // Multiply by 256
-      _mm_storeu_si128(outputVectorPtr, ret);
-
-      outputVectorPtr++;
-
-      inputVectorPtr++;
-    }
-
-    number = sixteenthPoints * 16;
-    for(; number < num_points; number++){
-      outputVector[number] = (int16_t)(inputVector[number])*256;
-    }
-}
-#endif /* LV_HAVE_SSE4_1 */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Converts the input 8 bit integer data into 16 bit integer data
-    \param inputVector The 8 bit input data buffer
-    \param outputVector The 16 bit output data buffer
-    \param num_points The number of data values to be converted
-    \note Input and output buffers do NOT need to be properly aligned
-  */
-static inline void volk_8i_convert_16i_generic(int16_t* outputVector, const 
int8_t* inputVector, unsigned int num_points){
-  int16_t* outputVectorPtr = outputVector;
-  const int8_t* inputVectorPtr = inputVector;
-  unsigned int number = 0;
-
-  for(number = 0; number < num_points; number++){
-    *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++)) * 256;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_VOLK_8s_CONVERT_16s_UNALIGNED8_H */
-#ifndef INCLUDED_volk_8i_convert_16i_a_H
-#define INCLUDED_volk_8i_convert_16i_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-
-  /*!
-    \brief Converts the input 8 bit integer data into 16 bit integer data
-    \param inputVector The 8 bit input data buffer
-    \param outputVector The 16 bit output data buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_8i_convert_16i_a_sse4_1(int16_t* outputVector, const 
int8_t* inputVector, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int sixteenthPoints = num_points / 16;
-
-    const __m128i* inputVectorPtr = (const __m128i*)inputVector;
-    __m128i* outputVectorPtr = (__m128i*)outputVector;
-    __m128i inputVal;
-    __m128i ret;
-
-    for(;number < sixteenthPoints; number++){
-      inputVal = _mm_load_si128(inputVectorPtr);
-      ret = _mm_cvtepi8_epi16(inputVal);
-      ret = _mm_slli_epi16(ret, 8); // Multiply by 256
-      _mm_store_si128(outputVectorPtr, ret);
-
-      outputVectorPtr++;
-
-      inputVal = _mm_srli_si128(inputVal, 8);
-      ret = _mm_cvtepi8_epi16(inputVal);
-      ret = _mm_slli_epi16(ret, 8); // Multiply by 256
-      _mm_store_si128(outputVectorPtr, ret);
-
-      outputVectorPtr++;
-
-      inputVectorPtr++;
-    }
-
-    number = sixteenthPoints * 16;
-    for(; number < num_points; number++){
-      outputVector[number] = (int16_t)(inputVector[number])*256;
-    }
-}
-#endif /* LV_HAVE_SSE4_1 */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Converts the input 8 bit integer data into 16 bit integer data
-    \param inputVector The 8 bit input data buffer
-    \param outputVector The 16 bit output data buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_8i_convert_16i_a_generic(int16_t* outputVector, const 
int8_t* inputVector, unsigned int num_points){
-  int16_t* outputVectorPtr = outputVector;
-  const int8_t* inputVectorPtr = inputVector;
-  unsigned int number = 0;
-
-  for(number = 0; number < num_points; number++){
-    *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++)) * 256;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-
-  /*!
-    \brief Converts the input 8 bit integer data into 16 bit integer data
-    \param inputVector The 8 bit input data buffer
-    \param outputVector The 16 bit output data buffer
-    \param num_points The number of data values to be converted
-    \note Input and output buffers do NOT need to be properly aligned
-  */
-static inline void volk_8i_convert_16i_neon(int16_t* outputVector, const 
int8_t* inputVector, unsigned int num_points){
-    int16_t* outputVectorPtr = outputVector;
-    const int8_t* inputVectorPtr = inputVector;
-    unsigned int number;
-    const unsigned int eighth_points = num_points / 8;
-
-    int8x8_t input_vec ;
-    int16x8_t converted_vec;
-
-    // NEON doesn't have a concept of 8 bit registers, so we are really
-    // dealing with the low half of 16-bit registers. Since this requires
-    // a move instruction we likely do better with ASM here.
-    for(number = 0; number < eighth_points; ++number) {
-        input_vec = vld1_s8(inputVectorPtr);
-        converted_vec = vmovl_s8(input_vec);
-        //converted_vec = vmulq_s16(converted_vec, scale_factor);
-        converted_vec = vshlq_n_s16(converted_vec, 8);
-        vst1q_s16( outputVectorPtr, converted_vec);
-
-        inputVectorPtr += 8;
-        outputVectorPtr += 8;
-    }
-
-    for(number = eighth_points * 8; number < num_points; number++){
-      *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++)) * 256;
-    }
-}
-#endif /* LV_HAVE_NEON */
-
-#ifdef LV_HAVE_ORC
-  /*!
-    \brief Converts the input 8 bit integer data into 16 bit integer data
-    \param inputVector The 8 bit input data buffer
-    \param outputVector The 16 bit output data buffer
-    \param num_points The number of data values to be converted
-  */
-extern void volk_8i_convert_16i_a_orc_impl(int16_t* outputVector, const 
int8_t* inputVector, unsigned int num_points);
-static inline void volk_8i_convert_16i_u_orc(int16_t* outputVector, const 
int8_t* inputVector, unsigned int num_points){
-    volk_8i_convert_16i_a_orc_impl(outputVector, inputVector, num_points);
-}
-#endif /* LV_HAVE_ORC */
-
-
-
-#endif /* INCLUDED_VOLK_8s_CONVERT_16s_ALIGNED8_H */
diff --git a/volk/kernels/volk/volk_8i_s32f_convert_32f.h 
b/volk/kernels/volk/volk_8i_s32f_convert_32f.h
deleted file mode 100644
index 5e959c8..0000000
--- a/volk/kernels/volk/volk_8i_s32f_convert_32f.h
+++ /dev/null
@@ -1,222 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_8i_s32f_convert_32f_u_H
-#define INCLUDED_volk_8i_s32f_convert_32f_u_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-
-  /*!
-    \brief Converts the input 8 bit integer data into floating point data, and 
divides the each floating point output data point by the scalar value
-    \param inputVector The 8 bit input data buffer
-    \param outputVector The floating point output data buffer
-    \param scalar The value divided against each point in the output buffer
-    \param num_points The number of data values to be converted
-    \note Output buffer does NOT need to be properly aligned
-  */
-static inline void volk_8i_s32f_convert_32f_u_sse4_1(float* outputVector, 
const int8_t* inputVector, const float scalar, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int sixteenthPoints = num_points / 16;
-
-    float* outputVectorPtr = outputVector;
-    const float iScalar = 1.0 / scalar;
-    __m128 invScalar = _mm_set_ps1( iScalar );
-    const int8_t* inputVectorPtr = inputVector;
-    __m128 ret;
-    __m128i inputVal;
-    __m128i interimVal;
-
-    for(;number < sixteenthPoints; number++){
-      inputVal = _mm_loadu_si128((__m128i*)inputVectorPtr);
-
-      interimVal = _mm_cvtepi8_epi32(inputVal);
-      ret = _mm_cvtepi32_ps(interimVal);
-      ret = _mm_mul_ps(ret, invScalar);
-      _mm_storeu_ps(outputVectorPtr, ret);
-      outputVectorPtr += 4;
-
-      inputVal = _mm_srli_si128(inputVal, 4);
-      interimVal = _mm_cvtepi8_epi32(inputVal);
-      ret = _mm_cvtepi32_ps(interimVal);
-      ret = _mm_mul_ps(ret, invScalar);
-      _mm_storeu_ps(outputVectorPtr, ret);
-      outputVectorPtr += 4;
-
-      inputVal = _mm_srli_si128(inputVal, 4);
-      interimVal = _mm_cvtepi8_epi32(inputVal);
-      ret = _mm_cvtepi32_ps(interimVal);
-      ret = _mm_mul_ps(ret, invScalar);
-      _mm_storeu_ps(outputVectorPtr, ret);
-      outputVectorPtr += 4;
-
-      inputVal = _mm_srli_si128(inputVal, 4);
-      interimVal = _mm_cvtepi8_epi32(inputVal);
-      ret = _mm_cvtepi32_ps(interimVal);
-      ret = _mm_mul_ps(ret, invScalar);
-      _mm_storeu_ps(outputVectorPtr, ret);
-      outputVectorPtr += 4;
-
-      inputVectorPtr += 16;
-    }
-
-    number = sixteenthPoints * 16;
-    for(; number < num_points; number++){
-      outputVector[number] = (float)(inputVector[number]) * iScalar;
-    }
-}
-#endif /* LV_HAVE_SSE4_1 */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Converts the input 8 bit integer data into floating point data, and 
divides the each floating point output data point by the scalar value
-    \param inputVector The 8 bit input data buffer
-    \param outputVector The floating point output data buffer
-    \param scalar The value divided against each point in the output buffer
-    \param num_points The number of data values to be converted
-    \note Output buffer does NOT need to be properly aligned
-  */
-static inline void volk_8i_s32f_convert_32f_generic(float* outputVector, const 
int8_t* inputVector, const float scalar, unsigned int num_points){
-  float* outputVectorPtr = outputVector;
-  const int8_t* inputVectorPtr = inputVector;
-  unsigned int number = 0;
-  const float iScalar = 1.0 / scalar;
-
-  for(number = 0; number < num_points; number++){
-    *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_VOLK_8s_CONVERT_32f_UNALIGNED8_H */
-#ifndef INCLUDED_volk_8i_s32f_convert_32f_a_H
-#define INCLUDED_volk_8i_s32f_convert_32f_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-
-  /*!
-    \brief Converts the input 8 bit integer data into floating point data, and 
divides the each floating point output data point by the scalar value
-    \param inputVector The 8 bit input data buffer
-    \param outputVector The floating point output data buffer
-    \param scalar The value divided against each point in the output buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_8i_s32f_convert_32f_a_sse4_1(float* outputVector, 
const int8_t* inputVector, const float scalar, unsigned int num_points){
-    unsigned int number = 0;
-    const unsigned int sixteenthPoints = num_points / 16;
-
-    float* outputVectorPtr = outputVector;
-    const float iScalar = 1.0 / scalar;
-    __m128 invScalar = _mm_set_ps1(iScalar);
-    const int8_t* inputVectorPtr = inputVector;
-    __m128 ret;
-    __m128i inputVal;
-    __m128i interimVal;
-
-    for(;number < sixteenthPoints; number++){
-      inputVal = _mm_load_si128((__m128i*)inputVectorPtr);
-
-      interimVal = _mm_cvtepi8_epi32(inputVal);
-      ret = _mm_cvtepi32_ps(interimVal);
-      ret = _mm_mul_ps(ret, invScalar);
-      _mm_store_ps(outputVectorPtr, ret);
-      outputVectorPtr += 4;
-
-      inputVal = _mm_srli_si128(inputVal, 4);
-      interimVal = _mm_cvtepi8_epi32(inputVal);
-      ret = _mm_cvtepi32_ps(interimVal);
-      ret = _mm_mul_ps(ret, invScalar);
-      _mm_store_ps(outputVectorPtr, ret);
-      outputVectorPtr += 4;
-
-      inputVal = _mm_srli_si128(inputVal, 4);
-      interimVal = _mm_cvtepi8_epi32(inputVal);
-      ret = _mm_cvtepi32_ps(interimVal);
-      ret = _mm_mul_ps(ret, invScalar);
-      _mm_store_ps(outputVectorPtr, ret);
-      outputVectorPtr += 4;
-
-      inputVal = _mm_srli_si128(inputVal, 4);
-      interimVal = _mm_cvtepi8_epi32(inputVal);
-      ret = _mm_cvtepi32_ps(interimVal);
-      ret = _mm_mul_ps(ret, invScalar);
-      _mm_store_ps(outputVectorPtr, ret);
-      outputVectorPtr += 4;
-
-      inputVectorPtr += 16;
-    }
-
-    number = sixteenthPoints * 16;
-    for(; number < num_points; number++){
-      outputVector[number] = (float)(inputVector[number]) * iScalar;
-    }
-}
-#endif /* LV_HAVE_SSE4_1 */
-
-#ifdef LV_HAVE_GENERIC
-  /*!
-    \brief Converts the input 8 bit integer data into floating point data, and 
divides the each floating point output data point by the scalar value
-    \param inputVector The 8 bit input data buffer
-    \param outputVector The floating point output data buffer
-    \param scalar The value divided against each point in the output buffer
-    \param num_points The number of data values to be converted
-  */
-static inline void volk_8i_s32f_convert_32f_a_generic(float* outputVector, 
const int8_t* inputVector, const float scalar, unsigned int num_points){
-  float* outputVectorPtr = outputVector;
-  const int8_t* inputVectorPtr = inputVector;
-  unsigned int number = 0;
-  const float iScalar = 1.0 / scalar;
-
-  for(number = 0; number < num_points; number++){
-    *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_ORC
-  /*!
-    \brief Converts the input 8 bit integer data into floating point data, and 
divides the each floating point output data point by the scalar value
-    \param inputVector The 8 bit input data buffer
-    \param outputVector The floating point output data buffer
-    \param scalar The value divided against each point in the output buffer
-    \param num_points The number of data values to be converted
-  */
-extern void volk_8i_s32f_convert_32f_a_orc_impl(float* outputVector, const 
int8_t* inputVector, const float scalar, unsigned int num_points);
-static inline void volk_8i_s32f_convert_32f_u_orc(float* outputVector, const 
int8_t* inputVector, const float scalar, unsigned int num_points){
-    float invscalar = 1.0 / scalar;
-    volk_8i_s32f_convert_32f_a_orc_impl(outputVector, inputVector, invscalar, 
num_points);
-}
-#endif /* LV_HAVE_ORC */
-
-
-
-#endif /* INCLUDED_VOLK_8s_CONVERT_32f_ALIGNED8_H */
diff --git a/volk/kernels/volk/volk_8ic_deinterleave_16i_x2.h 
b/volk/kernels/volk/volk_8ic_deinterleave_16i_x2.h
deleted file mode 100644
index 818d309..0000000
--- a/volk/kernels/volk/volk_8ic_deinterleave_16i_x2.h
+++ /dev/null
@@ -1,166 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_a_H
-#define INCLUDED_volk_8ic_deinterleave_16i_x2_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Deinterleaves the complex 8 bit vector into I & Q 16 bit vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param qBuffer The Q buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-
-static inline void volk_8ic_deinterleave_16i_x2_a_sse4_1(int16_t* iBuffer, 
int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const int8_t* complexVectorPtr = (int8_t*)complexVector;
-  int16_t* iBufferPtr = iBuffer;
-  int16_t* qBufferPtr = qBuffer;
-  __m128i iMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 
0x80, 14, 12, 10, 8, 6, 4, 2, 0);  // set 16 byte values
-  __m128i qMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 
0x80, 15, 13, 11, 9, 7, 5, 3, 1);
-  __m128i complexVal, iOutputVal, qOutputVal;
-
-  unsigned int eighthPoints = num_points / 8;
-
-  for(number = 0; number < eighthPoints; number++){
-    complexVal = _mm_load_si128((__m128i*)complexVectorPtr);  complexVectorPtr 
+= 16;   // aligned load
-
-    iOutputVal = _mm_shuffle_epi8(complexVal, iMoveMask);   // shuffle 16 
bytes of 128bit complexVal
-    qOutputVal = _mm_shuffle_epi8(complexVal, qMoveMask);
-
-    iOutputVal = _mm_cvtepi8_epi16(iOutputVal);     // fills 2-byte sign 
extended versions of lower 8 bytes of input to output
-    iOutputVal = _mm_slli_epi16(iOutputVal, 8);     // shift in left by 8 
bits, each of the 8 16-bit integers, shift in with zeros
-
-    qOutputVal = _mm_cvtepi8_epi16(qOutputVal);
-    qOutputVal = _mm_slli_epi16(qOutputVal, 8);
-
-    _mm_store_si128((__m128i*)iBufferPtr, iOutputVal);  // aligned store
-    _mm_store_si128((__m128i*)qBufferPtr, qOutputVal);
-
-    iBufferPtr += 8;
-    qBufferPtr += 8;
-  }
-
-  number = eighthPoints * 8;
-  for(; number < num_points; number++){
-    *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;   // load 8 bit 
Complexvector into 16 bit, shift left by 8 bits and store
-    *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
-  }
-}
-#endif /* LV_HAVE_SSE4_1 */
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-/*!
-  \brief Deinterleaves the complex 8 bit vector into I & Q 16 bit vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param qBuffer The Q buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_8ic_deinterleave_16i_x2_a_avx(int16_t* iBuffer, 
int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const int8_t* complexVectorPtr = (int8_t*)complexVector;
-  int16_t* iBufferPtr = iBuffer;
-  int16_t* qBufferPtr = qBuffer;
-  __m128i iMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 
0x80, 14, 12, 10, 8, 6, 4, 2, 0);  // set 16 byte values
-  __m128i qMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 
0x80, 15, 13, 11, 9, 7, 5, 3, 1);
-  __m256i complexVal, iOutputVal, qOutputVal;
-  __m128i complexVal1, complexVal0;
-  __m128i iOutputVal1, iOutputVal0, qOutputVal1, qOutputVal0;
-
-  unsigned int sixteenthPoints = num_points / 16;
-
-  for(number = 0; number < sixteenthPoints; number++){
-    complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);  
complexVectorPtr += 32;    // aligned load
-
-    // Extract from complexVal to iOutputVal and qOutputVal
-    complexVal1 = _mm256_extractf128_si256(complexVal, 1);
-    complexVal0 = _mm256_extractf128_si256(complexVal, 0);
-
-    iOutputVal1 = _mm_shuffle_epi8(complexVal1, iMoveMask);     // shuffle 16 
bytes of 128bit complexVal
-    iOutputVal0 = _mm_shuffle_epi8(complexVal0, iMoveMask);
-    qOutputVal1 = _mm_shuffle_epi8(complexVal1, qMoveMask);
-    qOutputVal0 = _mm_shuffle_epi8(complexVal0, qMoveMask);
-
-    iOutputVal1 = _mm_cvtepi8_epi16(iOutputVal1);   // fills 2-byte sign 
extended versions of lower 8 bytes of input to output
-    iOutputVal1 = _mm_slli_epi16(iOutputVal1, 8);   // shift in left by 8 
bits, each of the 8 16-bit integers, shift in with zeros
-    iOutputVal0 = _mm_cvtepi8_epi16(iOutputVal0);
-    iOutputVal0 = _mm_slli_epi16(iOutputVal0, 8);
-
-    qOutputVal1 = _mm_cvtepi8_epi16(qOutputVal1);
-    qOutputVal1 = _mm_slli_epi16(qOutputVal1, 8);
-    qOutputVal0 = _mm_cvtepi8_epi16(qOutputVal0);
-    qOutputVal0 = _mm_slli_epi16(qOutputVal0, 8);
-
-    // Pack iOutputVal0,1 to iOutputVal
-    __m256i dummy = _mm256_setzero_si256();
-    iOutputVal = _mm256_insertf128_si256(dummy, iOutputVal0, 0);
-    iOutputVal = _mm256_insertf128_si256(iOutputVal, iOutputVal1, 1);
-    qOutputVal = _mm256_insertf128_si256(dummy, qOutputVal0, 0);
-    qOutputVal = _mm256_insertf128_si256(qOutputVal, qOutputVal1, 1);
-
-    _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);   // aligned store
-    _mm256_store_si256((__m256i*)qBufferPtr, qOutputVal);
-
-    iBufferPtr += 16;
-    qBufferPtr += 16;
-  }
-
-  number = sixteenthPoints * 16;
-  for(; number < num_points; number++){
-    *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;   // load 8 bit 
Complexvector into 16 bit, shift left by 8 bits and store
-    *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
-  }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Deinterleaves the complex 8 bit vector into I & Q 16 bit vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param qBuffer The Q buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_8ic_deinterleave_16i_x2_generic(int16_t* iBuffer, 
int16_t* qBuffer, const lv_8sc_t* complexVector, unsigned int num_points){
-  const int8_t* complexVectorPtr = (const int8_t*)complexVector;
-  int16_t* iBufferPtr = iBuffer;
-  int16_t* qBufferPtr = qBuffer;
-  unsigned int number;
-  for(number = 0; number < num_points; number++){
-    *iBufferPtr++ = (int16_t)(*complexVectorPtr++)*256;
-    *qBufferPtr++ = (int16_t)(*complexVectorPtr++)*256;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_8ic_deinterleave_16i_x2_a_H */
diff --git a/volk/kernels/volk/volk_8ic_deinterleave_real_16i.h 
b/volk/kernels/volk/volk_8ic_deinterleave_real_16i.h
deleted file mode 100644
index c22f828..0000000
--- a/volk/kernels/volk/volk_8ic_deinterleave_real_16i.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_8ic_deinterleave_real_16i_a_H
-#define INCLUDED_volk_8ic_deinterleave_real_16i_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Deinterleaves the complex 8 bit vector into I 16 bit vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_8ic_deinterleave_real_16i_a_sse4_1(int16_t* iBuffer, 
const lv_8sc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const int8_t* complexVectorPtr = (int8_t*)complexVector;
-  int16_t* iBufferPtr = iBuffer;
-  __m128i moveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 
0x80, 14, 12, 10, 8, 6, 4, 2, 0);
-  __m128i complexVal, outputVal;
-
-  unsigned int eighthPoints = num_points / 8;
-
-  for(number = 0; number < eighthPoints; number++){
-    complexVal = _mm_load_si128((__m128i*)complexVectorPtr);  complexVectorPtr 
+= 16;
-
-    complexVal = _mm_shuffle_epi8(complexVal, moveMask);
-
-    outputVal = _mm_cvtepi8_epi16(complexVal);
-    outputVal = _mm_slli_epi16(outputVal, 7);
-
-    _mm_store_si128((__m128i*)iBufferPtr, outputVal);
-    iBufferPtr += 8;
-  }
-
-  number = eighthPoints * 8;
-  for(; number < num_points; number++){
-    *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
-    complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_SSE4_1 */
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-/*!
-  \brief Deinterleaves the complex 8 bit vector into I 16 bit vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_8ic_deinterleave_real_16i_a_avx(int16_t* iBuffer, 
const lv_8sc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const int8_t* complexVectorPtr = (int8_t*)complexVector;
-  int16_t* iBufferPtr = iBuffer;
-  __m128i moveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 
0x80, 14, 12, 10, 8, 6, 4, 2, 0);
-  __m256i complexVal, outputVal;
-  __m128i complexVal1, complexVal0, outputVal1, outputVal0;
-
-  unsigned int sixteenthPoints = num_points / 16;
-
-  for(number = 0; number < sixteenthPoints; number++){
-    complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);  
complexVectorPtr += 32;
-
-    complexVal1 = _mm256_extractf128_si256(complexVal, 1);
-    complexVal0 = _mm256_extractf128_si256(complexVal, 0);
-
-    outputVal1 = _mm_shuffle_epi8(complexVal1, moveMask);
-    outputVal0 = _mm_shuffle_epi8(complexVal0, moveMask);
-
-    outputVal1 = _mm_cvtepi8_epi16(outputVal1);
-    outputVal1 = _mm_slli_epi16(outputVal1, 7);
-    outputVal0 = _mm_cvtepi8_epi16(outputVal0);
-    outputVal0 = _mm_slli_epi16(outputVal0, 7);
-
-    __m256i dummy = _mm256_setzero_si256();
-    outputVal = _mm256_insertf128_si256(dummy, outputVal0, 0);
-    outputVal = _mm256_insertf128_si256(outputVal, outputVal1, 1);
-    _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
-
-    iBufferPtr += 16;
-  }
-
-  number = sixteenthPoints * 16;
-  for(; number < num_points; number++){
-    *iBufferPtr++ = ((int16_t)*complexVectorPtr++) * 128;
-    complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_AVX */
-
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Deinterleaves the complex 8 bit vector into I 16 bit vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_8ic_deinterleave_real_16i_generic(int16_t* iBuffer, 
const lv_8sc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const int8_t* complexVectorPtr = (const int8_t*)complexVector;
-  int16_t* iBufferPtr = iBuffer;
-  for(number = 0; number < num_points; number++){
-    *iBufferPtr++ = ((int16_t)(*complexVectorPtr++)) * 128;
-    complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_8ic_deinterleave_real_16i_a_H */
diff --git a/volk/kernels/volk/volk_8ic_deinterleave_real_8i.h 
b/volk/kernels/volk/volk_8ic_deinterleave_real_8i.h
deleted file mode 100644
index e333306..0000000
--- a/volk/kernels/volk/volk_8ic_deinterleave_real_8i.h
+++ /dev/null
@@ -1,175 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_ALIGNED8_H
-#define INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_ALIGNED8_H
-
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSSE3
-#include <tmmintrin.h>
-/*!
-  \brief Deinterleaves the complex 8 bit vector into I vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_8ic_deinterleave_real_8i_a_ssse3(int8_t* iBuffer, 
const lv_8sc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const int8_t* complexVectorPtr = (int8_t*)complexVector;
-  int8_t* iBufferPtr = iBuffer;
-  __m128i moveMask1 = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 
0x80, 14, 12, 10, 8, 6, 4, 2, 0);
-  __m128i moveMask2 = _mm_set_epi8(14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 
0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
-  __m128i complexVal1, complexVal2, outputVal;
-
-  unsigned int sixteenthPoints = num_points / 16;
-
-  for(number = 0; number < sixteenthPoints; number++){
-    complexVal1 = _mm_load_si128((__m128i*)complexVectorPtr);  
complexVectorPtr += 16;
-    complexVal2 = _mm_load_si128((__m128i*)complexVectorPtr);  
complexVectorPtr += 16;
-
-    complexVal1 = _mm_shuffle_epi8(complexVal1, moveMask1);
-    complexVal2 = _mm_shuffle_epi8(complexVal2, moveMask2);
-
-    outputVal = _mm_or_si128(complexVal1, complexVal2);
-
-    _mm_store_si128((__m128i*)iBufferPtr, outputVal);
-    iBufferPtr += 16;
-  }
-
-  number = sixteenthPoints * 16;
-  for(; number < num_points; number++){
-    *iBufferPtr++ = *complexVectorPtr++;
-    complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_SSSE3 */
-
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-/*!
-  \brief Deinterleaves the complex 8 bit vector into I vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-
-static inline void volk_8ic_deinterleave_real_8i_a_avx(int8_t* iBuffer, const 
lv_8sc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const int8_t* complexVectorPtr = (int8_t*)complexVector;
-  int8_t* iBufferPtr = iBuffer;
-  __m128i moveMaskL = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 
0x80, 14, 12, 10, 8, 6, 4, 2, 0);
-  __m128i moveMaskH = _mm_set_epi8(14, 12, 10, 8, 6, 4, 2, 0, 0x80, 0x80, 
0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
-  __m256i complexVal1, complexVal2, outputVal;
-  __m128i complexVal1H, complexVal1L, complexVal2H, complexVal2L, outputVal1, 
outputVal2;
-
-  unsigned int thirtysecondPoints = num_points / 32;
-
-  for(number = 0; number < thirtysecondPoints; number++){
-
-    complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
-    complexVectorPtr += 32;
-    complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
-    complexVectorPtr += 32;
-
-    complexVal1H = _mm256_extractf128_si256(complexVal1, 1);
-    complexVal1L = _mm256_extractf128_si256(complexVal1, 0);
-    complexVal2H = _mm256_extractf128_si256(complexVal2, 1);
-    complexVal2L = _mm256_extractf128_si256(complexVal2, 0);
-
-    complexVal1H = _mm_shuffle_epi8(complexVal1H, moveMaskH);
-    complexVal1L = _mm_shuffle_epi8(complexVal1L, moveMaskL);
-    outputVal1 = _mm_or_si128(complexVal1H, complexVal1L);
-
-
-    complexVal2H = _mm_shuffle_epi8(complexVal2H, moveMaskH);
-    complexVal2L = _mm_shuffle_epi8(complexVal2L, moveMaskL);
-    outputVal2 = _mm_or_si128(complexVal2H, complexVal2L);
-
-    __m256i dummy = _mm256_setzero_si256();
-    outputVal = _mm256_insertf128_si256(dummy, outputVal1, 0);
-    outputVal = _mm256_insertf128_si256(outputVal, outputVal2, 1);
-
-
-    _mm256_store_si256((__m256i*)iBufferPtr, outputVal);
-    iBufferPtr += 32;
-  }
-
-  number = thirtysecondPoints * 32;
-  for(; number < num_points; number++){
-    *iBufferPtr++ = *complexVectorPtr++;
-    complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Deinterleaves the complex 8 bit vector into I vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_8ic_deinterleave_real_8i_generic(int8_t* iBuffer, 
const lv_8sc_t* complexVector, unsigned int num_points){
-  unsigned int number = 0;
-  const int8_t* complexVectorPtr = (int8_t*)complexVector;
-  int8_t* iBufferPtr = iBuffer;
-  for(number = 0; number < num_points; number++){
-    *iBufferPtr++ = *complexVectorPtr++;
-    complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-#ifdef LV_HAVE_NEON
-#include <arm_neon.h>
-/*!
-  \brief Deinterleaves the complex 8 bit vector into I vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_8ic_deinterleave_real_8i_neon(int8_t* iBuffer, const 
lv_8sc_t* complexVector, unsigned int num_points){
-    unsigned int number;
-    unsigned int sixteenth_points = num_points / 16;
-
-    int8x16x2_t input_vector;
-    for(number=0; number < sixteenth_points; ++number) {
-        input_vector = vld2q_s8((int8_t*) complexVector );
-        vst1q_s8(iBuffer, input_vector.val[0]);
-        iBuffer += 16;
-        complexVector += 16;
-    }
-
-    const int8_t* complexVectorPtr = (int8_t*)complexVector;
-    int8_t* iBufferPtr = iBuffer;
-    for(number = sixteenth_points*16; number < num_points; number++){
-      *iBufferPtr++ = *complexVectorPtr++;
-      complexVectorPtr++;
-    }
-}
-#endif /* LV_HAVE_NEON */
-
-
-
-#endif /* INCLUDED_VOLK_8sc_DEINTERLEAVE_REAL_8s_ALIGNED8_H */
diff --git a/volk/kernels/volk/volk_8ic_s32f_deinterleave_32f_x2.h 
b/volk/kernels/volk/volk_8ic_s32f_deinterleave_32f_x2.h
deleted file mode 100644
index a611548..0000000
--- a/volk/kernels/volk/volk_8ic_s32f_deinterleave_32f_x2.h
+++ /dev/null
@@ -1,187 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H
-#define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H
-
-#include <volk/volk_common.h>
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Deinterleaves the complex 8 bit vector into I & Q floating point 
vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param qBuffer The Q buffer output data
-  \param scalar The scaling value being multiplied against each data point
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_8ic_s32f_deinterleave_32f_x2_a_sse4_1(float* iBuffer, 
float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int 
num_points){
-  float* iBufferPtr = iBuffer;
-  float* qBufferPtr = qBuffer;
-
-  unsigned int number = 0;
-  const unsigned int eighthPoints = num_points / 8;
-  __m128 iFloatValue, qFloatValue;
-
-  const float iScalar= 1.0 / scalar;
-  __m128 invScalar = _mm_set_ps1(iScalar);
-  __m128i complexVal, iIntVal, qIntVal, iComplexVal, qComplexVal;
-  int8_t* complexVectorPtr = (int8_t*)complexVector;
-
-  __m128i iMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 
0x80, 14, 12, 10, 8, 6, 4, 2, 0);
-  __m128i qMoveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 
0x80, 15, 13, 11, 9, 7, 5, 3, 1);
-
-  for(;number < eighthPoints; number++){
-    complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr 
+= 16;
-    iComplexVal = _mm_shuffle_epi8(complexVal, iMoveMask);
-    qComplexVal = _mm_shuffle_epi8(complexVal, qMoveMask);
-
-    iIntVal = _mm_cvtepi8_epi32(iComplexVal);
-    iFloatValue = _mm_cvtepi32_ps(iIntVal);
-    iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
-    _mm_store_ps(iBufferPtr, iFloatValue);
-    iBufferPtr += 4;
-
-    iComplexVal = _mm_srli_si128(iComplexVal, 4);
-
-    iIntVal = _mm_cvtepi8_epi32(iComplexVal);
-    iFloatValue = _mm_cvtepi32_ps(iIntVal);
-    iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
-    _mm_store_ps(iBufferPtr, iFloatValue);
-    iBufferPtr += 4;
-
-    qIntVal = _mm_cvtepi8_epi32(qComplexVal);
-    qFloatValue = _mm_cvtepi32_ps(qIntVal);
-    qFloatValue = _mm_mul_ps(qFloatValue, invScalar);
-    _mm_store_ps(qBufferPtr, qFloatValue);
-    qBufferPtr += 4;
-
-    qComplexVal = _mm_srli_si128(qComplexVal, 4);
-
-    qIntVal = _mm_cvtepi8_epi32(qComplexVal);
-    qFloatValue = _mm_cvtepi32_ps(qIntVal);
-    qFloatValue = _mm_mul_ps(qFloatValue, invScalar);
-    _mm_store_ps(qBufferPtr, qFloatValue);
-
-    qBufferPtr += 4;
-  }
-
-  number = eighthPoints * 8;
-  for(; number < num_points; number++){
-    *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
-    *qBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
-  }
-
-}
-#endif /* LV_HAVE_SSE4_1 */
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Deinterleaves the complex 8 bit vector into I & Q floating point 
vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param qBuffer The Q buffer output data
-  \param scalar The scaling value being multiplied against each data point
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_8ic_s32f_deinterleave_32f_x2_a_sse(float* iBuffer, 
float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int 
num_points){
-  float* iBufferPtr = iBuffer;
-  float* qBufferPtr = qBuffer;
-
-  unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;
-  __m128 cplxValue1, cplxValue2, iValue, qValue;
-
-  __m128 invScalar = _mm_set_ps1(1.0/scalar);
-  int8_t* complexVectorPtr = (int8_t*)complexVector;
-
-  __VOLK_ATTR_ALIGNED(16) float floatBuffer[8];
-
-  for(;number < quarterPoints; number++){
-    floatBuffer[0] = (float)(complexVectorPtr[0]);
-    floatBuffer[1] = (float)(complexVectorPtr[1]);
-    floatBuffer[2] = (float)(complexVectorPtr[2]);
-    floatBuffer[3] = (float)(complexVectorPtr[3]);
-
-    floatBuffer[4] = (float)(complexVectorPtr[4]);
-    floatBuffer[5] = (float)(complexVectorPtr[5]);
-    floatBuffer[6] = (float)(complexVectorPtr[6]);
-    floatBuffer[7] = (float)(complexVectorPtr[7]);
-
-    cplxValue1 = _mm_load_ps(&floatBuffer[0]);
-    cplxValue2 = _mm_load_ps(&floatBuffer[4]);
-
-    complexVectorPtr += 8;
-
-    cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
-    cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
-
-    // Arrange in i1i2i3i4 format
-    iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2,0,2,0));
-    qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3,1,3,1));
-
-    _mm_store_ps(iBufferPtr, iValue);
-    _mm_store_ps(qBufferPtr, qValue);
-
-    iBufferPtr += 4;
-    qBufferPtr += 4;
-  }
-
-  number = quarterPoints * 4;
-  complexVectorPtr = (int8_t*)&complexVector[number];
-  for(; number < num_points; number++){
-    *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
-    *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
-  }
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Deinterleaves the complex 8 bit vector into I & Q floating point 
vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param qBuffer The Q buffer output data
-  \param scalar The scaling value being multiplied against each data point
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_8ic_s32f_deinterleave_32f_x2_generic(float* iBuffer, 
float* qBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int 
num_points){
-  const int8_t* complexVectorPtr = (const int8_t*)complexVector;
-  float* iBufferPtr = iBuffer;
-  float* qBufferPtr = qBuffer;
-  unsigned int number;
-  const float invScalar = 1.0 / scalar;
-  for(number = 0; number < num_points; number++){
-    *iBufferPtr++ = (float)(*complexVectorPtr++)*invScalar;
-    *qBufferPtr++ = (float)(*complexVectorPtr++)*invScalar;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H */
diff --git a/volk/kernels/volk/volk_8ic_s32f_deinterleave_real_32f.h 
b/volk/kernels/volk/volk_8ic_s32f_deinterleave_real_32f.h
deleted file mode 100644
index bbe2c26..0000000
--- a/volk/kernels/volk/volk_8ic_s32f_deinterleave_real_32f.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
-#define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
-
-#include <volk/volk_common.h>
-#include <inttypes.h>
-#include <stdio.h>
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Deinterleaves the complex 8 bit vector into I float vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param scalar The scaling value being multiplied against each data point
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_8ic_s32f_deinterleave_real_32f_a_sse4_1(float* 
iBuffer, const lv_8sc_t* complexVector, const float scalar, unsigned int 
num_points){
-  float* iBufferPtr = iBuffer;
-
-  unsigned int number = 0;
-  const unsigned int eighthPoints = num_points / 8;
-  __m128 iFloatValue;
-
-  const float iScalar= 1.0 / scalar;
-  __m128 invScalar = _mm_set_ps1(iScalar);
-  __m128i complexVal, iIntVal;
-  int8_t* complexVectorPtr = (int8_t*)complexVector;
-
-  __m128i moveMask = _mm_set_epi8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 
0x80, 14, 12, 10, 8, 6, 4, 2, 0);
-
-  for(;number < eighthPoints; number++){
-    complexVal = _mm_load_si128((__m128i*)complexVectorPtr); complexVectorPtr 
+= 16;
-    complexVal = _mm_shuffle_epi8(complexVal, moveMask);
-
-    iIntVal = _mm_cvtepi8_epi32(complexVal);
-    iFloatValue = _mm_cvtepi32_ps(iIntVal);
-
-    iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
-
-    _mm_store_ps(iBufferPtr, iFloatValue);
-
-    iBufferPtr += 4;
-
-    complexVal = _mm_srli_si128(complexVal, 4);
-    iIntVal = _mm_cvtepi8_epi32(complexVal);
-    iFloatValue = _mm_cvtepi32_ps(iIntVal);
-
-    iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
-
-    _mm_store_ps(iBufferPtr, iFloatValue);
-
-    iBufferPtr += 4;
-  }
-
-  number = eighthPoints * 8;
-  for(; number < num_points; number++){
-    *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
-    complexVectorPtr++;
-  }
-
-}
-#endif /* LV_HAVE_SSE4_1 */
-
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-/*!
-  \brief Deinterleaves the complex 8 bit vector into I float vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param scalar The scaling value being multiplied against each data point
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_8ic_s32f_deinterleave_real_32f_a_sse(float* iBuffer, 
const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
-  float* iBufferPtr = iBuffer;
-
-  unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;
-  __m128 iValue;
-
-  const float iScalar= 1.0 / scalar;
-  __m128 invScalar = _mm_set_ps1(iScalar);
-  int8_t* complexVectorPtr = (int8_t*)complexVector;
-
-  __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
-
-  for(;number < quarterPoints; number++){
-    floatBuffer[0] = (float)(*complexVectorPtr); complexVectorPtr += 2;
-    floatBuffer[1] = (float)(*complexVectorPtr); complexVectorPtr += 2;
-    floatBuffer[2] = (float)(*complexVectorPtr); complexVectorPtr += 2;
-    floatBuffer[3] = (float)(*complexVectorPtr); complexVectorPtr += 2;
-
-    iValue = _mm_load_ps(floatBuffer);
-
-    iValue = _mm_mul_ps(iValue, invScalar);
-
-    _mm_store_ps(iBufferPtr, iValue);
-
-    iBufferPtr += 4;
-  }
-
-  number = quarterPoints * 4;
-  for(; number < num_points; number++){
-    *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
-    complexVectorPtr++;
-  }
-
-}
-#endif /* LV_HAVE_SSE */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Deinterleaves the complex 8 bit vector into I float vector data
-  \param complexVector The complex input vector
-  \param iBuffer The I buffer output data
-  \param scalar The scaling value being multiplied against each data point
-  \param num_points The number of complex data values to be deinterleaved
-*/
-static inline void volk_8ic_s32f_deinterleave_real_32f_generic(float* iBuffer, 
const lv_8sc_t* complexVector, const float scalar, unsigned int num_points){
-  unsigned int number = 0;
-  const int8_t* complexVectorPtr = (const int8_t*)complexVector;
-  float* iBufferPtr = iBuffer;
-  const float invScalar = 1.0 / scalar;
-  for(number = 0; number < num_points; number++){
-    *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
-    complexVectorPtr++;
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H */
diff --git a/volk/kernels/volk/volk_8ic_x2_multiply_conjugate_16ic.h 
b/volk/kernels/volk/volk_8ic_x2_multiply_conjugate_16ic.h
deleted file mode 100644
index 5b64c52..0000000
--- a/volk/kernels/volk/volk_8ic_x2_multiply_conjugate_16ic.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H
-#define INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <volk/volk_complex.h>
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Multiplys the one complex vector with the complex conjugate of the 
second complex vector and stores their results in the third vector
-  \param cVector The complex vector where the results will be stored
-  \param aVector One of the complex vectors to be multiplied
-  \param bVector The complex vector which will be converted to complex 
conjugate and multiplied
-  \param num_points The number of complex values in aVector and bVector to be 
multiplied together and stored into cVector
-*/
-static inline void volk_8ic_x2_multiply_conjugate_16ic_a_sse4_1(lv_16sc_t* 
cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int 
num_points){
-  unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;
-
-  __m128i x, y, realz, imagz;
-  lv_16sc_t* c = cVector;
-  const lv_8sc_t* a = aVector;
-  const lv_8sc_t* b = bVector;
-  __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1);
-
-  for(;number < quarterPoints; number++){
-    // Convert into 8 bit values into 16 bit values
-    x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
-    y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
-
-    // Calculate the ar*cr - ai*(-ci) portions
-    realz = _mm_madd_epi16(x,y);
-
-    // Calculate the complex conjugate of the cr + ci j values
-    y = _mm_sign_epi16(y, conjugateSign);
-
-    // Shift the order of the cr and ci values
-    y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), 
_MM_SHUFFLE(2,3,0,1));
-
-    // Calculate the ar*(-ci) + cr*(ai)
-    imagz = _mm_madd_epi16(x,y);
-
-    _mm_store_si128((__m128i*)c, _mm_packs_epi32(_mm_unpacklo_epi32(realz, 
imagz), _mm_unpackhi_epi32(realz, imagz)));
-
-    a += 4;
-    b += 4;
-    c += 4;
-  }
-
-  number = quarterPoints * 4;
-  int16_t* c16Ptr = (int16_t*)&cVector[number];
-  int8_t* a8Ptr = (int8_t*)&aVector[number];
-  int8_t* b8Ptr = (int8_t*)&bVector[number];
-  for(; number < num_points; number++){
-    float aReal =  (float)*a8Ptr++;
-    float aImag =  (float)*a8Ptr++;
-    lv_32fc_t aVal = lv_cmake(aReal, aImag );
-    float bReal = (float)*b8Ptr++;
-    float bImag = (float)*b8Ptr++;
-    lv_32fc_t bVal = lv_cmake( bReal, -bImag );
-    lv_32fc_t temp = aVal * bVal;
-
-    *c16Ptr++ = (int16_t)lv_creal(temp);
-    *c16Ptr++ = (int16_t)lv_cimag(temp);
-  }
-}
-#endif /* LV_HAVE_SSE4_1 */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Multiplys the one complex vector with the complex conjugate of the 
second complex vector and stores their results in the third vector
-  \param cVector The complex vector where the results will be stored
-  \param aVector One of the complex vectors to be multiplied
-  \param bVector The complex vector which will be converted to complex 
conjugate and multiplied
-  \param num_points The number of complex values in aVector and bVector to be 
multiplied together and stored into cVector
-*/
-static inline void volk_8ic_x2_multiply_conjugate_16ic_generic(lv_16sc_t* 
cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, unsigned int 
num_points){
-  unsigned int number = 0;
-  int16_t* c16Ptr = (int16_t*)cVector;
-  int8_t* a8Ptr = (int8_t*)aVector;
-  int8_t* b8Ptr = (int8_t*)bVector;
-  for(number =0; number < num_points; number++){
-    float aReal =  (float)*a8Ptr++;
-    float aImag =  (float)*a8Ptr++;
-    lv_32fc_t aVal = lv_cmake(aReal, aImag );
-    float bReal = (float)*b8Ptr++;
-    float bImag = (float)*b8Ptr++;
-    lv_32fc_t bVal = lv_cmake( bReal, -bImag );
-    lv_32fc_t temp = aVal * bVal;
-
-    *c16Ptr++ = (int16_t)lv_creal(temp);
-    *c16Ptr++ = (int16_t)lv_cimag(temp);
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_8ic_x2_multiply_conjugate_16ic_a_H */
diff --git a/volk/kernels/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc.h 
b/volk/kernels/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc.h
deleted file mode 100644
index d6cc1f0..0000000
--- a/volk/kernels/volk/volk_8ic_x2_s32f_multiply_conjugate_32fc.h
+++ /dev/null
@@ -1,144 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H
-#define INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H
-
-#include <inttypes.h>
-#include <stdio.h>
-#include <volk/volk_complex.h>
-
-#ifdef LV_HAVE_SSE4_1
-#include <smmintrin.h>
-/*!
-  \brief Multiplys the one complex vector with the complex conjugate of the 
second complex vector and stores their results in the third vector
-  \param cVector The complex vector where the results will be stored
-  \param aVector One of the complex vectors to be multiplied
-  \param bVector The complex vector which will be converted to complex 
conjugate and multiplied
-  \param num_points The number of complex values in aVector and bVector to be 
multiplied together and stored into cVector
-*/
-static inline void 
volk_8ic_x2_s32f_multiply_conjugate_32fc_a_sse4_1(lv_32fc_t* cVector, const 
lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, unsigned int 
num_points){
-  unsigned int number = 0;
-  const unsigned int quarterPoints = num_points / 4;
-
-  __m128i x, y, realz, imagz;
-  __m128 ret;
-  lv_32fc_t* c = cVector;
-  const lv_8sc_t* a = aVector;
-  const lv_8sc_t* b = bVector;
-  __m128i conjugateSign = _mm_set_epi16(-1, 1, -1, 1, -1, 1, -1, 1);
-
-  __m128 invScalar = _mm_set_ps1(1.0/scalar);
-
-  for(;number < quarterPoints; number++){
-    // Convert into 8 bit values into 16 bit values
-    x = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)a));
-    y = _mm_cvtepi8_epi16(_mm_loadl_epi64((__m128i*)b));
-
-    // Calculate the ar*cr - ai*(-ci) portions
-    realz = _mm_madd_epi16(x,y);
-
-    // Calculate the complex conjugate of the cr + ci j values
-    y = _mm_sign_epi16(y, conjugateSign);
-
-    // Shift the order of the cr and ci values
-    y = _mm_shufflehi_epi16(_mm_shufflelo_epi16(y, _MM_SHUFFLE(2,3,0,1) ), 
_MM_SHUFFLE(2,3,0,1));
-
-    // Calculate the ar*(-ci) + cr*(ai)
-    imagz = _mm_madd_epi16(x,y);
-
-    // Interleave real and imaginary and then convert to float values
-    ret = _mm_cvtepi32_ps(_mm_unpacklo_epi32(realz, imagz));
-
-    // Normalize the floating point values
-    ret = _mm_mul_ps(ret, invScalar);
-
-    // Store the floating point values
-    _mm_store_ps((float*)c, ret);
-    c += 2;
-
-    // Interleave real and imaginary and then convert to float values
-    ret = _mm_cvtepi32_ps(_mm_unpackhi_epi32(realz, imagz));
-
-    // Normalize the floating point values
-    ret = _mm_mul_ps(ret, invScalar);
-
-    // Store the floating point values
-    _mm_store_ps((float*)c, ret);
-    c += 2;
-
-    a += 4;
-    b += 4;
-  }
-
-  number = quarterPoints * 4;
-  float* cFloatPtr = (float*)&cVector[number];
-  int8_t* a8Ptr = (int8_t*)&aVector[number];
-  int8_t* b8Ptr = (int8_t*)&bVector[number];
-  for(; number < num_points; number++){
-    float aReal =  (float)*a8Ptr++;
-    float aImag =  (float)*a8Ptr++;
-    lv_32fc_t aVal = lv_cmake(aReal, aImag );
-    float bReal = (float)*b8Ptr++;
-    float bImag = (float)*b8Ptr++;
-    lv_32fc_t bVal = lv_cmake( bReal, -bImag );
-    lv_32fc_t temp = aVal * bVal;
-
-    *cFloatPtr++ = lv_creal(temp) / scalar;
-    *cFloatPtr++ = lv_cimag(temp) / scalar;
-  }
-}
-#endif /* LV_HAVE_SSE4_1 */
-
-#ifdef LV_HAVE_GENERIC
-/*!
-  \brief Multiplys the one complex vector with the complex conjugate of the 
second complex vector and stores their results in the third vector
-  \param cVector The complex vector where the results will be stored
-  \param aVector One of the complex vectors to be multiplied
-  \param bVector The complex vector which will be converted to complex 
conjugate and multiplied
-  \param num_points The number of complex values in aVector and bVector to be 
multiplied together and stored into cVector
-*/
-static inline void volk_8ic_x2_s32f_multiply_conjugate_32fc_generic(lv_32fc_t* 
cVector, const lv_8sc_t* aVector, const lv_8sc_t* bVector, const float scalar, 
unsigned int num_points){
-  unsigned int number = 0;
-  float* cPtr = (float*)cVector;
-  const float invScalar = 1.0 / scalar;
-  int8_t* a8Ptr = (int8_t*)aVector;
-  int8_t* b8Ptr = (int8_t*)bVector;
-  for(number = 0; number < num_points; number++){
-    float aReal =  (float)*a8Ptr++;
-    float aImag =  (float)*a8Ptr++;
-    lv_32fc_t aVal = lv_cmake(aReal, aImag );
-    float bReal = (float)*b8Ptr++;
-    float bImag = (float)*b8Ptr++;
-    lv_32fc_t bVal = lv_cmake( bReal, -bImag );
-    lv_32fc_t temp = aVal * bVal;
-
-    *cPtr++ = (lv_creal(temp) * invScalar);
-    *cPtr++ = (lv_cimag(temp) * invScalar);
-  }
-}
-#endif /* LV_HAVE_GENERIC */
-
-
-
-
-#endif /* INCLUDED_volk_8ic_x2_s32f_multiply_conjugate_32fc_a_H */
diff --git a/volk/kernels/volk/volk_8u_conv_k7_r2puppet_8u.h 
b/volk/kernels/volk/volk_8u_conv_k7_r2puppet_8u.h
deleted file mode 100644
index b398556..0000000
--- a/volk/kernels/volk/volk_8u_conv_k7_r2puppet_8u.h
+++ /dev/null
@@ -1,264 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
-#define INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
-
-#include <volk/volk.h>
-#include <volk/volk_8u_x4_conv_k7_r2_8u.h>
-#include <string.h>
-
-typedef union {
-  //decision_t is a BIT vector
-  unsigned char* t;
-  unsigned int* w;
-} p_decision_t;
-
-static inline int parity(int x, unsigned char* Partab)
-{
-  x ^= (x >> 16);
-  x ^= (x >> 8);
-  return Partab[x];
-}
-
-static inline int chainback_viterbi(unsigned char* data,
-                                    unsigned int nbits,
-                                    unsigned int endstate,
-                                    unsigned int tailsize,
-                                    unsigned char* decisions)
-{
-  unsigned char* d;
-  int d_ADDSHIFT = 0;
-  int d_numstates = (1 << 6);
-  int d_decision_t_size = d_numstates/8;
-  unsigned int d_k = 7;
-  int d_framebits = nbits;
-  /* ADDSHIFT and SUBSHIFT make sure that the thing returned is a byte. */
-  d = decisions;
-  /* Make room beyond the end of the encoder register so we can
-   * accumulate a full byte of decoded data
-   */
-
-  endstate = (endstate%d_numstates) << d_ADDSHIFT;
-
-  /* The store into data[] only needs to be done every 8 bits.
-   * But this avoids a conditional branch, and the writes will
-   * combine in the cache anyway
-   */
-
-  d += tailsize * d_decision_t_size ; /* Look past tail */
-  int retval;
-  int dif = tailsize - (d_k - 1);
-  //printf("break, %d, %d\n", dif, (nbits+dif)%d_framebits);
-  p_decision_t dec;
-  while(nbits-- > d_framebits - (d_k - 1)) {
-    int k;
-    dec.t =  &d[nbits * d_decision_t_size];
-    k = (dec.w[(endstate>>d_ADDSHIFT)/32] >> ((endstate>>d_ADDSHIFT)%32)) & 1;
-
-    endstate = (endstate >> 1) | (k << (d_k-2+d_ADDSHIFT));
-    //data[((nbits+dif)%nbits)>>3] = endstate>>d_SUBSHIFT;
-    //printf("%d, %d\n", k, (nbits+dif)%d_framebits);
-    data[((nbits+dif)%d_framebits)] = k;
-
-    retval = endstate;
-  }
-  nbits += 1;
-
-  while(nbits-- != 0) {
-    int k;
-
-    dec.t = &d[nbits * d_decision_t_size];
-
-    k = (dec.w[(endstate>>d_ADDSHIFT)/32] >> ((endstate>>d_ADDSHIFT)%32)) & 1;
-
-    endstate = (endstate >> 1) | (k << (d_k-2+d_ADDSHIFT));
-    data[((nbits+dif)%d_framebits)] = k;
-  }
-  //printf("%d, %d, %d, %d, %d, %d, %d, %d\n", 
data[4095],data[4094],data[4093],data[4092],data[4091],data[4090],data[4089],data[4088]);
-
-
-  return retval >> d_ADDSHIFT;
-}
-
-
-#if LV_HAVE_SSE3
-
-#include <pmmintrin.h>
-#include <emmintrin.h>
-#include <xmmintrin.h>
-#include <mmintrin.h>
-#include <stdio.h>
-
-
-
-
-
-static inline void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char* syms, 
unsigned char* dec, unsigned int framebits) {
-
-
-  static int once = 1;
-  int d_numstates = (1 << 6);
-  int rate = 2;
-  static unsigned char* D;
-  static unsigned char* Y;
-  static unsigned char* X;
-  static unsigned int excess = 6;
-  static unsigned char* Branchtab;
-  static unsigned char Partab[256];
-
-  int d_polys[2] = {79, 109};
-
-
-  if(once) {
-
-    X = (unsigned char*)volk_malloc(2*d_numstates, volk_get_alignment());
-    Y = X + d_numstates;
-    Branchtab = (unsigned char*)volk_malloc(d_numstates/2*rate, 
volk_get_alignment());
-    D = (unsigned char*)volk_malloc((d_numstates/8) * (framebits + 6), 
volk_get_alignment());
-    int state, i;
-    int cnt,ti;
-
-    /* Initialize parity lookup table */
-    for(i=0;i<256;i++){
-      cnt = 0;
-      ti = i;
-      while(ti){
-        if(ti & 1)
-          cnt++;
-        ti >>= 1;
-      }
-      Partab[i] = cnt & 1;
-    }
-    /*  Initialize the branch table */
-    for(state=0;state < d_numstates/2;state++){
-      for(i=0; i<rate; i++){
-        Branchtab[i*d_numstates/2+state] = (d_polys[i] < 0) ^ parity((2*state) 
& abs(d_polys[i]), Partab) ? 255 : 0;
-      }
-    }
-
-    once = 0;
-  }
-
-    //unbias the old_metrics
-  memset(X, 31, d_numstates);
-
-  volk_8u_x4_conv_k7_r2_8u_spiral(Y, X, syms, D, framebits/2 - excess, excess, 
Branchtab);
-
-  unsigned int min = X[0];
-  int i = 0, state = 0;
-  for(i = 0; i < (d_numstates); ++i) {
-    if(X[i] < min) {
-      min = X[i];
-      state = i;
-    }
-  }
-
-  chainback_viterbi(dec, framebits/2 -excess, state, excess, D);
-
-  return;
-}
-
-#endif /*LV_HAVE_SSE3*/
-
-
-
-
-
-#if LV_HAVE_GENERIC
-
-
-static inline void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char* syms, 
unsigned char* dec, unsigned int framebits) {
-
-
-
-  static int once = 1;
-  int d_numstates = (1 << 6);
-  int rate = 2;
-  static unsigned char* Y;
-  static unsigned char* X;
-  static unsigned char* D;
-  static unsigned int excess = 6;
-  static unsigned char* Branchtab;
-  static unsigned char Partab[256];
-
-  int d_polys[2] = {79, 109};
-
-
-  if(once) {
-
-    X = (unsigned char*)volk_malloc(2*d_numstates, volk_get_alignment());
-    Y = X + d_numstates;
-    Branchtab = (unsigned char*)volk_malloc(d_numstates/2*rate, 
volk_get_alignment());
-    D = (unsigned char*)volk_malloc((d_numstates/8) * (framebits + 6), 
volk_get_alignment());
-
-    int state, i;
-    int cnt,ti;
-
-    /* Initialize parity lookup table */
-    for(i=0;i<256;i++){
-      cnt = 0;
-      ti = i;
-      while(ti){
-        if(ti & 1)
-          cnt++;
-        ti >>= 1;
-      }
-      Partab[i] = cnt & 1;
-    }
-    /*  Initialize the branch table */
-    for(state=0;state < d_numstates/2;state++){
-      for(i=0; i<rate; i++){
-        Branchtab[i*d_numstates/2+state] = (d_polys[i] < 0) ^ parity((2*state) 
& abs(d_polys[i]), Partab) ? 255 : 0;
-      }
-    }
-
-    once = 0;
-  }
-
-
-
-
-    //unbias the old_metrics
-  memset(X, 31, d_numstates);
-
-  volk_8u_x4_conv_k7_r2_8u_generic(Y, X, syms, D, framebits/2 - excess, 
excess, Branchtab);
-
-  unsigned int min = X[0];
-  int i = 0, state = 0;
-  for(i = 0; i < (d_numstates); ++i) {
-    if(X[i] < min) {
-      min = X[i];
-      state = i;
-    }
-  }
-
-  chainback_viterbi(dec, framebits/2 -excess, state, excess, D);
-
-  return;
-
-
-}
-
-#endif /* LV_HAVE_GENERIC */
-
-#endif /*INCLUDED_volk_8u_conv_k7_r2puppet_8u_H*/
diff --git a/volk/kernels/volk/volk_8u_x4_conv_k7_r2_8u.h 
b/volk/kernels/volk/volk_8u_x4_conv_k7_r2_8u.h
deleted file mode 100644
index e21dfe1..0000000
--- a/volk/kernels/volk/volk_8u_x4_conv_k7_r2_8u.h
+++ /dev/null
@@ -1,422 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_volk_8u_x4_conv_k7_r2_8u_H
-#define INCLUDED_volk_8u_x4_conv_k7_r2_8u_H
-
-
-
-typedef union {
-  unsigned char/*DECISIONTYPE*/ t[64/*NUMSTATES*//8/*DECISIONTYPE_BITSIZE*/];
-  unsigned int w[64/*NUMSTATES*//32];
-  unsigned short s[64/*NUMSTATES*//16];
-  unsigned char c[64/*NUMSTATES*//8];
-#ifdef _MSC_VER
-} decision_t;
-#else
-} decision_t __attribute__ ((aligned (16)));
-#endif
-
-static inline void renormalize(unsigned char* X, unsigned char threshold){
-  int NUMSTATES = 64;
-  int i;
-
-    unsigned char min=X[0];
-    //if(min > threshold) {
-    for(i=0;i<NUMSTATES;i++)
-    if (min>X[i])
-      min=X[i];
-    for(i=0;i<NUMSTATES;i++)
-      X[i]-=min;
-    //}
-}
-
-
-
-//helper BFLY for GENERIC version
-static inline void BFLY(int i, int s, unsigned char * syms, unsigned char *Y, 
unsigned char *X, decision_t * d, unsigned char* Branchtab) {
-  int j, decision0, decision1;
-  unsigned char metric,m0,m1,m2,m3;
-
-  int NUMSTATES = 64;
-  int RATE = 2;
-  int METRICSHIFT = 1;
-  int PRECISIONSHIFT = 2;
-
-
-
-  metric =0;
-  for(j=0;j<RATE;j++)
-    metric += (Branchtab[i+j*NUMSTATES/2] ^ syms[s*RATE+j])>>METRICSHIFT ;
-  metric=metric>>PRECISIONSHIFT;
-
-  unsigned char max = ((RATE*((256 -1)>>METRICSHIFT))>>PRECISIONSHIFT);
-
-  m0 = X[i] + metric;
-  m1 = X[i+NUMSTATES/2] + (max - metric);
-  m2 = X[i] + (max - metric);
-  m3 = X[i+NUMSTATES/2] + metric;
-
-  decision0 = (signed int)(m0-m1) > 0;
-  decision1 = (signed int)(m2-m3) > 0;
-
-  Y[2*i] = decision0 ? m1 : m0;
-  Y[2*i+1] =  decision1 ? m3 : m2;
-
-  d->w[i/(sizeof(unsigned int)*8/2)+s*(sizeof(decision_t)/sizeof(unsigned 
int))] |=
-    (decision0|decision1<<1) << ((2*i)&(sizeof(unsigned int)*8-1));
-}
-
-
-#if LV_HAVE_SSE3
-
-#include <pmmintrin.h>
-#include <emmintrin.h>
-#include <xmmintrin.h>
-#include <mmintrin.h>
-#include <stdio.h>
-
-static inline void volk_8u_x4_conv_k7_r2_8u_spiral(unsigned char* Y, unsigned 
char* X, unsigned char* syms, unsigned char* dec, unsigned int framebits, 
unsigned int excess, unsigned char* Branchtab) {
-  unsigned int i9;
-  for(i9 = 0; i9 < ((framebits + excess) >> 1); i9++) {
-    unsigned char a75, a81;
-    int a73, a92;
-    short int s20, s21, s26, s27;
-    unsigned char  *a74, *a80, *b6;
-    short int  *a110, *a111, *a91, *a93, *a94;
-    __m128i  *a102, *a112, *a113, *a71, *a72, *a77, *a83
-      , *a95, *a96, *a97, *a98, *a99;
-    __m128i a105, a106, a86, a87;
-    __m128i a100, a101, a103, a104, a107, a108, a109
-      , a76, a78, a79, a82, a84, a85, a88, a89
-      , a90, d10, d11, d12, d9, m23, m24, m25
-      , m26, m27, m28, m29, m30, s18, s19, s22
-      , s23, s24, s25, s28, s29, t13, t14, t15
-      , t16, t17, t18;
-    a71 = ((__m128i  *) X);
-    s18 = *(a71);
-    a72 = (a71 + 2);
-    s19 = *(a72);
-    a73 = (4 * i9);
-    a74 = (syms + a73);
-    a75 = *(a74);
-    a76 = _mm_set1_epi8(a75);
-    a77 = ((__m128i  *) Branchtab);
-    a78 = *(a77);
-    a79 = _mm_xor_si128(a76, a78);
-    b6 = (a73 + syms);
-    a80 = (b6 + 1);
-    a81 = *(a80);
-    a82 = _mm_set1_epi8(a81);
-    a83 = (a77 + 2);
-    a84 = *(a83);
-    a85 = _mm_xor_si128(a82, a84);
-    t13 = _mm_avg_epu8(a79,a85);
-    a86 = ((__m128i ) t13);
-    a87 = _mm_srli_epi16(a86, 2);
-    a88 = ((__m128i ) a87);
-    t14 = _mm_and_si128(a88, _mm_set_epi8(63, 63, 63, 63, 63, 63, 63
-                                         , 63, 63, 63, 63, 63, 63, 63, 63
-                                         , 63));
-    t15 = _mm_subs_epu8(_mm_set_epi8(63, 63, 63, 63, 63, 63, 63
-                                    , 63, 63, 63, 63, 63, 63, 63, 63
-                                    , 63), t14);
-    m23 = _mm_adds_epu8(s18, t14);
-    m24 = _mm_adds_epu8(s19, t15);
-    m25 = _mm_adds_epu8(s18, t15);
-    m26 = _mm_adds_epu8(s19, t14);
-    a89 = _mm_min_epu8(m24, m23);
-    d9 = _mm_cmpeq_epi8(a89, m24);
-    a90 = _mm_min_epu8(m26, m25);
-    d10 = _mm_cmpeq_epi8(a90, m26);
-    s20 = _mm_movemask_epi8(_mm_unpacklo_epi8(d9,d10));
-    a91 = ((short int  *) dec);
-    a92 = (8 * i9);
-    a93 = (a91 + a92);
-    *(a93) = s20;
-    s21 = _mm_movemask_epi8(_mm_unpackhi_epi8(d9,d10));
-    a94 = (a93 + 1);
-    *(a94) = s21;
-    s22 = _mm_unpacklo_epi8(a89, a90);
-    s23 = _mm_unpackhi_epi8(a89, a90);
-    a95 = ((__m128i  *) Y);
-    *(a95) = s22;
-    a96 = (a95 + 1);
-    *(a96) = s23;
-    a97 = (a71 + 1);
-    s24 = *(a97);
-    a98 = (a71 + 3);
-    s25 = *(a98);
-    a99 = (a77 + 1);
-    a100 = *(a99);
-    a101 = _mm_xor_si128(a76, a100);
-    a102 = (a77 + 3);
-    a103 = *(a102);
-    a104 = _mm_xor_si128(a82, a103);
-    t16 = _mm_avg_epu8(a101,a104);
-    a105 = ((__m128i ) t16);
-    a106 = _mm_srli_epi16(a105, 2);
-    a107 = ((__m128i ) a106);
-    t17 = _mm_and_si128(a107, _mm_set_epi8(63, 63, 63, 63, 63, 63, 63
-                                          , 63, 63, 63, 63, 63, 63, 63, 63
-                                          , 63));
-    t18 = _mm_subs_epu8(_mm_set_epi8(63, 63, 63, 63, 63, 63, 63
-                                    , 63, 63, 63, 63, 63, 63, 63, 63
-                                    , 63), t17);
-    m27 = _mm_adds_epu8(s24, t17);
-    m28 = _mm_adds_epu8(s25, t18);
-    m29 = _mm_adds_epu8(s24, t18);
-    m30 = _mm_adds_epu8(s25, t17);
-    a108 = _mm_min_epu8(m28, m27);
-    d11 = _mm_cmpeq_epi8(a108, m28);
-    a109 = _mm_min_epu8(m30, m29);
-    d12 = _mm_cmpeq_epi8(a109, m30);
-    s26 = _mm_movemask_epi8(_mm_unpacklo_epi8(d11,d12));
-    a110 = (a93 + 2);
-    *(a110) = s26;
-    s27 = _mm_movemask_epi8(_mm_unpackhi_epi8(d11,d12));
-    a111 = (a93 + 3);
-    *(a111) = s27;
-    s28 = _mm_unpacklo_epi8(a108, a109);
-    s29 = _mm_unpackhi_epi8(a108, a109);
-    a112 = (a95 + 2);
-    *(a112) = s28;
-    a113 = (a95 + 3);
-    *(a113) = s29;
-    if ((((unsigned char  *) Y)[0]>210)) {
-      __m128i m5, m6;
-      m5 = ((__m128i  *) Y)[0];
-      m5 = _mm_min_epu8(m5, ((__m128i  *) Y)[1]);
-      m5 = _mm_min_epu8(m5, ((__m128i  *) Y)[2]);
-      m5 = _mm_min_epu8(m5, ((__m128i  *) Y)[3]);
-      __m128i m7;
-      m7 = _mm_min_epu8(_mm_srli_si128(m5, 8), m5);
-      m7 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m7, 32)), 
((__m128i ) m7)));
-      m7 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m7, 16)), 
((__m128i ) m7)));
-      m7 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m7, 8)), 
((__m128i ) m7)));
-      m7 = _mm_unpacklo_epi8(m7, m7);
-      m7 = _mm_shufflelo_epi16(m7, _MM_SHUFFLE(0, 0, 0, 0));
-      m6 = _mm_unpacklo_epi64(m7, m7);
-      ((__m128i  *) Y)[0] = _mm_subs_epu8(((__m128i  *) Y)[0], m6);
-      ((__m128i  *) Y)[1] = _mm_subs_epu8(((__m128i  *) Y)[1], m6);
-      ((__m128i  *) Y)[2] = _mm_subs_epu8(((__m128i  *) Y)[2], m6);
-      ((__m128i  *) Y)[3] = _mm_subs_epu8(((__m128i  *) Y)[3], m6);
-    }
-    unsigned char a188, a194;
-    int a186, a205;
-    short int s48, s49, s54, s55;
-    unsigned char  *a187, *a193, *b15;
-    short int  *a204, *a206, *a207, *a223, *a224, *b16;
-    __m128i  *a184, *a185, *a190, *a196, *a208, *a209, *a210
-      , *a211, *a212, *a215, *a225, *a226;
-    __m128i a199, a200, a218, a219;
-    __m128i a189, a191, a192, a195, a197, a198, a201
-      , a202, a203, a213, a214, a216, a217, a220, a221
-      , a222, d17, d18, d19, d20, m39, m40, m41
-      , m42, m43, m44, m45, m46, s46, s47, s50
-      , s51, s52, s53, s56, s57, t25, t26, t27
-      , t28, t29, t30;
-    a184 = ((__m128i  *) Y);
-    s46 = *(a184);
-    a185 = (a184 + 2);
-    s47 = *(a185);
-    a186 = (4 * i9);
-    b15 = (a186 + syms);
-    a187 = (b15 + 2);
-    a188 = *(a187);
-    a189 = _mm_set1_epi8(a188);
-    a190 = ((__m128i  *) Branchtab);
-    a191 = *(a190);
-    a192 = _mm_xor_si128(a189, a191);
-    a193 = (b15 + 3);
-    a194 = *(a193);
-    a195 = _mm_set1_epi8(a194);
-    a196 = (a190 + 2);
-    a197 = *(a196);
-    a198 = _mm_xor_si128(a195, a197);
-    t25 = _mm_avg_epu8(a192,a198);
-    a199 = ((__m128i ) t25);
-    a200 = _mm_srli_epi16(a199, 2);
-    a201 = ((__m128i ) a200);
-    t26 = _mm_and_si128(a201, _mm_set_epi8(63, 63, 63, 63, 63, 63, 63
-                                          , 63, 63, 63, 63, 63, 63, 63, 63
-                                          , 63));
-    t27 = _mm_subs_epu8(_mm_set_epi8(63, 63, 63, 63, 63, 63, 63
-                                    , 63, 63, 63, 63, 63, 63, 63, 63
-                                    , 63), t26);
-    m39 = _mm_adds_epu8(s46, t26);
-    m40 = _mm_adds_epu8(s47, t27);
-    m41 = _mm_adds_epu8(s46, t27);
-    m42 = _mm_adds_epu8(s47, t26);
-    a202 = _mm_min_epu8(m40, m39);
-    d17 = _mm_cmpeq_epi8(a202, m40);
-    a203 = _mm_min_epu8(m42, m41);
-    d18 = _mm_cmpeq_epi8(a203, m42);
-    s48 = _mm_movemask_epi8(_mm_unpacklo_epi8(d17,d18));
-    a204 = ((short int  *) dec);
-    a205 = (8 * i9);
-    b16 = (a204 + a205);
-    a206 = (b16 + 4);
-    *(a206) = s48;
-    s49 = _mm_movemask_epi8(_mm_unpackhi_epi8(d17,d18));
-    a207 = (b16 + 5);
-    *(a207) = s49;
-    s50 = _mm_unpacklo_epi8(a202, a203);
-    s51 = _mm_unpackhi_epi8(a202, a203);
-    a208 = ((__m128i  *) X);
-    *(a208) = s50;
-    a209 = (a208 + 1);
-    *(a209) = s51;
-    a210 = (a184 + 1);
-    s52 = *(a210);
-    a211 = (a184 + 3);
-    s53 = *(a211);
-    a212 = (a190 + 1);
-    a213 = *(a212);
-    a214 = _mm_xor_si128(a189, a213);
-    a215 = (a190 + 3);
-    a216 = *(a215);
-    a217 = _mm_xor_si128(a195, a216);
-    t28 = _mm_avg_epu8(a214,a217);
-    a218 = ((__m128i ) t28);
-    a219 = _mm_srli_epi16(a218, 2);
-    a220 = ((__m128i ) a219);
-    t29 = _mm_and_si128(a220, _mm_set_epi8(63, 63, 63, 63, 63, 63, 63
-                                          , 63, 63, 63, 63, 63, 63, 63, 63
-                                          , 63));
-    t30 = _mm_subs_epu8(_mm_set_epi8(63, 63, 63, 63, 63, 63, 63
-                                    , 63, 63, 63, 63, 63, 63, 63, 63
-                                    , 63), t29);
-    m43 = _mm_adds_epu8(s52, t29);
-    m44 = _mm_adds_epu8(s53, t30);
-    m45 = _mm_adds_epu8(s52, t30);
-    m46 = _mm_adds_epu8(s53, t29);
-    a221 = _mm_min_epu8(m44, m43);
-    d19 = _mm_cmpeq_epi8(a221, m44);
-    a222 = _mm_min_epu8(m46, m45);
-    d20 = _mm_cmpeq_epi8(a222, m46);
-    s54 = _mm_movemask_epi8(_mm_unpacklo_epi8(d19,d20));
-    a223 = (b16 + 6);
-    *(a223) = s54;
-    s55 = _mm_movemask_epi8(_mm_unpackhi_epi8(d19,d20));
-    a224 = (b16 + 7);
-    *(a224) = s55;
-    s56 = _mm_unpacklo_epi8(a221, a222);
-    s57 = _mm_unpackhi_epi8(a221, a222);
-    a225 = (a208 + 2);
-    *(a225) = s56;
-    a226 = (a208 + 3);
-    *(a226) = s57;
-    if ((((unsigned char  *) X)[0]>210)) {
-      __m128i m12, m13;
-      m12 = ((__m128i  *) X)[0];
-      m12 = _mm_min_epu8(m12, ((__m128i  *) X)[1]);
-      m12 = _mm_min_epu8(m12, ((__m128i  *) X)[2]);
-      m12 = _mm_min_epu8(m12, ((__m128i  *) X)[3]);
-      __m128i m14;
-      m14 = _mm_min_epu8(_mm_srli_si128(m12, 8), m12);
-      m14 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m14, 32)), 
((__m128i ) m14)));
-      m14 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m14, 16)), 
((__m128i ) m14)));
-      m14 = ((__m128i ) _mm_min_epu8(((__m128i ) _mm_srli_epi64(m14, 8)), 
((__m128i ) m14)));
-      m14 = _mm_unpacklo_epi8(m14, m14);
-      m14 = _mm_shufflelo_epi16(m14, _MM_SHUFFLE(0, 0, 0, 0));
-      m13 = _mm_unpacklo_epi64(m14, m14);
-      ((__m128i  *) X)[0] = _mm_subs_epu8(((__m128i  *) X)[0], m13);
-      ((__m128i  *) X)[1] = _mm_subs_epu8(((__m128i  *) X)[1], m13);
-      ((__m128i  *) X)[2] = _mm_subs_epu8(((__m128i  *) X)[2], m13);
-      ((__m128i  *) X)[3] = _mm_subs_epu8(((__m128i  *) X)[3], m13);
-    }
-  }
-
-  renormalize(X, 210);
-
-  /*int ch;
-  for(ch = 0; ch < 64; ch++) {
-    printf("%d,", X[ch]);
-  }
-  printf("\n");*/
-
-  unsigned int j;
-  for(j=0; j < (framebits + excess) % 2; ++j) {
-    int i;
-    for(i=0;i<64/2;i++){
-      BFLY(i, (((framebits+excess) >> 1) << 1) + j , syms, Y, X, (decision_t 
*)dec, Branchtab);
-    }
-
-
-    renormalize(Y, 210);
-
-    /*printf("\n");
-    for(ch = 0; ch < 64; ch++) {
-      printf("%d,", Y[ch]);
-    }
-    printf("\n");*/
-
-  }
-  /*skip*/
-  return;
-}
-
-#endif /*LV_HAVE_SSE3*/
-
-
-
-
-
-
-
-#if LV_HAVE_GENERIC
-
-
-static inline void volk_8u_x4_conv_k7_r2_8u_generic(unsigned char* Y, unsigned 
char* X, unsigned char* syms, unsigned char* dec, unsigned int framebits, 
unsigned int excess, unsigned char* Branchtab) {
-  int nbits = framebits + excess;
-  int NUMSTATES = 64;
-  int RENORMALIZE_THRESHOLD = 210;
-
-
-  int s,i;
-
-
-
-  for (s=0;s<nbits;s++){
-    void *tmp;
-    for(i=0;i<NUMSTATES/2;i++){
-      BFLY(i, s, syms, Y, X, (decision_t *)dec, Branchtab);
-    }
-
-
-
-    renormalize(Y, RENORMALIZE_THRESHOLD);
-
-    ///     Swap pointers to old and new metrics
-    tmp = (void *)X;
-    X = Y;
-    Y = (unsigned char*)tmp;
-  }
-
-
-  return;
-}
-
-#endif /* LV_HAVE_GENERIC */
-
-#endif /*INCLUDED_volk_8u_x4_conv_k7_r2_8u_H*/
diff --git a/volk/lib/CMakeLists.txt b/volk/lib/CMakeLists.txt
deleted file mode 100644
index 37915e5..0000000
--- a/volk/lib/CMakeLists.txt
+++ /dev/null
@@ -1,566 +0,0 @@
-#
-# Copyright 2011-2012,2014 Free Software Foundation, Inc.
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-#
-
-########################################################################
-# header file detection
-########################################################################
-include(CheckIncludeFile)
-CHECK_INCLUDE_FILE(cpuid.h HAVE_CPUID_H)
-if(HAVE_CPUID_H)
-    add_definitions(-DHAVE_CPUID_H)
-endif()
-
-CHECK_INCLUDE_FILE(intrin.h HAVE_INTRIN_H)
-if(HAVE_INTRIN_H)
-    add_definitions(-DHAVE_INTRIN_H)
-endif()
-
-CHECK_INCLUDE_FILE(fenv.h HAVE_FENV_H)
-if(HAVE_FENV_H)
-    add_definitions(-DHAVE_FENV_H)
-endif()
-
-CHECK_INCLUDE_FILE(dlfcn.h HAVE_DLFCN_H)
-if(HAVE_DLFCN_H)
-    add_definitions(-DHAVE_DLFCN_H)
-    list(APPEND volk_libraries ${CMAKE_DL_LIBS})
-endif()
-
-########################################################################
-# Setup the compiler name
-########################################################################
-set(COMPILER_NAME ${CMAKE_C_COMPILER_ID})
-if(MSVC) #its not set otherwise
-    set(COMPILER_NAME MSVC)
-endif()
-
-message(STATUS "Compiler name: ${COMPILER_NAME}")
-
-if(NOT DEFINED COMPILER_NAME)
-    message(FATAL_ERROR "COMPILER_NAME undefined. Volk build may not support 
this compiler.")
-endif()
-
-########################################################################
-# Special clang flag so flag checks can fail
-########################################################################
-if(COMPILER_NAME MATCHES "GNU")
-    include(CheckCXXCompilerFlag)
-    CHECK_CXX_COMPILER_FLAG("-Werror=unused-command-line-argument" 
HAVE_WERROR_UNUSED_CMD_LINE_ARG)
-    if(HAVE_WERROR_UNUSED_CMD_LINE_ARG)
-        set(VOLK_FLAG_CHECK_FLAGS "-Werror=unused-command-line-argument")
-    endif()
-endif()
-
-########################################################################
-# check for posix_memalign, since some OSs do not internally define
-# _XOPEN_SOURCE or _POSIX_C_SOURCE; they leave this to the user.
-########################################################################
-
-include(CheckFunctionExists)
-CHECK_FUNCTION_EXISTS(posix_memalign HAVE_POSIX_MEMALIGN)
-
-if(HAVE_POSIX_MEMALIGN)
-      add_definitions(-DHAVE_POSIX_MEMALIGN)
-endif(HAVE_POSIX_MEMALIGN)
-
-########################################################################
-# detect x86 flavor of CPU
-########################################################################
-if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(i.86|x86|x86_64|amd64)$")
-    message(STATUS "x86* CPU detected")
-    set(CPU_IS_x86 TRUE)
-endif()
-
-########################################################################
-# determine passing architectures based on compile flag tests
-########################################################################
-execute_process(
-    COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B}
-    ${CMAKE_SOURCE_DIR}/gen/volk_compile_utils.py
-    --mode "arch_flags" --compiler "${COMPILER_NAME}"
-    OUTPUT_VARIABLE arch_flag_lines OUTPUT_STRIP_TRAILING_WHITESPACE
-)
-
-macro(check_arch arch_name)
-    set(flags ${ARGN})
-    set(have_${arch_name} TRUE)
-    foreach(flag ${flags})
-        include(CheckCXXCompilerFlag)
-        set(have_flag have${flag})
-        execute_process( #make the have_flag have nice alphanum chars (just 
for looks/not necessary)
-            COMMAND ${PYTHON_EXECUTABLE} -c "import re; print(re.sub('\\W', 
'_', '${have_flag}'))"
-            OUTPUT_VARIABLE have_flag OUTPUT_STRIP_TRAILING_WHITESPACE
-        )
-        if(VOLK_FLAG_CHECK_FLAGS)
-            set(CMAKE_REQUIRED_FLAGS ${VOLK_FLAG_CHECK_FLAGS})
-        endif()
-        CHECK_CXX_COMPILER_FLAG(${flag} ${have_flag})
-        unset(CMAKE_REQUIRED_FLAGS)
-        if (NOT ${have_flag})
-            set(have_${arch_name} FALSE)
-        endif()
-    endforeach()
-    if (have_${arch_name})
-        list(APPEND available_archs ${arch_name})
-    endif()
-endmacro(check_arch)
-
-foreach(line ${arch_flag_lines})
-    string(REGEX REPLACE "," ";" arch_flags ${line})
-    check_arch(${arch_flags})
-endforeach(line)
-
-macro(OVERRULE_ARCH arch reason)
-    message(STATUS "${reason}, Overruled arch ${arch}")
-    list(REMOVE_ITEM available_archs ${arch})
-endmacro(OVERRULE_ARCH)
-
-########################################################################
-# eliminate AVX on if not on x86, or if the compiler does not accept
-# the xgetbv instruction, or {if not cross-compiling and the xgetbv
-# executable does not function correctly}.
-########################################################################
-set(HAVE_XGETBV 0)
-set(HAVE_AVX_CVTPI32_PS 0)
-if(CPU_IS_x86)
-    # check to see if the compiler/linker works with xgetb instruction
-    file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv.c "unsigned long long 
_xgetbv(unsigned int index) { unsigned int eax, edx; __asm__ 
__volatile__(\"xgetbv\" : \"=a\"(eax), \"=d\"(edx) : \"c\"(index)); return 
((unsigned long long)edx << 32) | eax; } int main (void) { (void) _xgetbv(0); 
return (0); }")
-    execute_process(COMMAND ${CMAKE_C_COMPILER} -o
-        ${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv
-        ${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv.c
-        OUTPUT_QUIET ERROR_QUIET
-        RESULT_VARIABLE avx_compile_result)
-    if(NOT ${avx_compile_result} EQUAL 0)
-        OVERRULE_ARCH(avx "Compiler or linker missing xgetbv instruction")
-    elseif(NOT CROSSCOMPILE_MULTILIB)
-        execute_process(COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv
-            OUTPUT_QUIET ERROR_QUIET
-            RESULT_VARIABLE avx_exe_result)
-        if(NOT ${avx_exe_result} EQUAL 0)
-            OVERRULE_ARCH(avx "CPU missing xgetbv")
-        else()
-            set(HAVE_XGETBV 1)
-        endif()
-    else()
-        # cross compiling and compiler/linker seems to work; assume working
-        set(HAVE_XGETBV 1)
-    endif()
-    file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv
-        ${CMAKE_CURRENT_BINARY_DIR}/test_xgetbv.c)
-
-    #########################################################################
-    # eliminate AVX if cvtpi32_ps intrinsic fails like some versions of clang
-    #########################################################################
-
-    # check to see if the compiler/linker works with cvtpi32_ps instrinsic 
when using AVX
-    file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps.c "#include 
<immintrin.h>\nint main (void) {__m128 __a; __m64 __b; __m128 foo = 
_mm_cvtpi32_ps(__a, __b); return (0); }")
-    execute_process(COMMAND ${CMAKE_C_COMPILER} -mavx -o
-      ${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps
-      ${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps.c
-      OUTPUT_QUIET ERROR_QUIET
-      RESULT_VARIABLE avx_compile_result)
-    if(NOT ${avx_compile_result} EQUAL 0)
-      OVERRULE_ARCH(avx "Compiler missing cvtpi32_ps instrinsic")
-    elseif(NOT CROSSCOMPILE_MULTILIB)
-      execute_process(COMMAND ${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps
-        OUTPUT_QUIET ERROR_QUIET
-        RESULT_VARIABLE avx_exe_result)
-      if(NOT ${avx_exe_result} EQUAL 0)
-        OVERRULE_ARCH(avx "CPU missing cvtpi32_ps")
-      else()
-        set(HAVE_AVX_CVTPI32_PS 1)
-      endif()
-    else()
-      set(HAVE_AVX_CVTPI32_PS 1)
-    endif()
-    file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps
-      ${CMAKE_CURRENT_BINARY_DIR}/test_cvtpi32_ps.c)
-
-    # Disable SSE4a if Clang is less than version 3.2
-    if("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang")
-      # Figure out the version of Clang
-      if(CMAKE_VERSION VERSION_LESS "2.8.10")
-        # Exctract the Clang version from the --version string.
-        # In cmake 2.8.10, we can just use CMAKE_C_COMPILER_VERSION
-        # without having to go through these string manipulations
-        execute_process(COMMAND ${CMAKE_C_COMPILER} --version
-          OUTPUT_VARIABLE clang_version)
-        string(REGEX MATCH "[0-9].[0-9]" CMAKE_C_COMPILER_VERSION 
${clang_version})
-      endif(CMAKE_VERSION VERSION_LESS "2.8.10")
-
-      if(CMAKE_C_COMPILER_VERSION VERSION_LESS "3.2")
-        OVERRULE_ARCH(sse4_a "Clang >= 3.2 required for SSE4a")
-      endif(CMAKE_C_COMPILER_VERSION VERSION_LESS "3.2")
-    endif("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang")
-
-endif(CPU_IS_x86)
-
-if(${HAVE_XGETBV})
-    add_definitions(-DHAVE_XGETBV)
-endif()
-
-if(${HAVE_AVX_CVTPI32_PS})
-    add_definitions(-DHAVE_AVX_CVTPI32_PS)
-endif()
-
-########################################################################
-# if the CPU is not x86, eliminate all Intel SIMD
-########################################################################
-
-if(NOT CPU_IS_x86)
-    OVERRULE_ARCH(3dnow "Architecture is not x86 or x86_64")
-    OVERRULE_ARCH(mmx "Architecture is not x86 or x86_64")
-    OVERRULE_ARCH(sse "Architecture is not x86 or x86_64")
-    OVERRULE_ARCH(sse2 "Architecture is not x86 or x86_64")
-    OVERRULE_ARCH(sse3 "Architecture is not x86 or x86_64")
-    OVERRULE_ARCH(ssse3 "Architecture is not x86 or x86_64")
-    OVERRULE_ARCH(sse4_a "Architecture is not x86 or x86_64")
-    OVERRULE_ARCH(sse4_1 "Architecture is not x86 or x86_64")
-    OVERRULE_ARCH(sse4_2 "Architecture is not x86 or x86_64")
-    OVERRULE_ARCH(avx "Architecture is not x86 or x86_64")
-endif(NOT CPU_IS_x86)
-
-########################################################################
-# implement overruling in the ORC case,
-# since ORC always passes flag detection
-########################################################################
-if(NOT ORC_FOUND)
-    OVERRULE_ARCH(orc "ORC support not found")
-endif()
-
-########################################################################
-# implement overruling in the non-multilib case
-# this makes things work when both -m32 and -m64 pass
-########################################################################
-if(NOT CROSSCOMPILE_MULTILIB AND CPU_IS_x86)
-    include(CheckTypeSize)
-    check_type_size("void*[8]" SIZEOF_CPU BUILTIN_TYPES_ONLY)
-    if (${SIZEOF_CPU} EQUAL 64)
-        OVERRULE_ARCH(32 "CPU width is 64 bits")
-    endif()
-    if (${SIZEOF_CPU} EQUAL 32)
-        OVERRULE_ARCH(64 "CPU width is 32 bits")
-    endif()
-
-    #MSVC 64 bit does not have MMX, overrule it
-    if (${SIZEOF_CPU} EQUAL 64 AND MSVC)
-        OVERRULE_ARCH(mmx "No MMX for Win64")
-    endif()
-
-endif()
-
-########################################################################
-# done overrules! print the result
-########################################################################
-message(STATUS "Available architectures: ${available_archs}")
-
-########################################################################
-# determine available machines given the available architectures
-########################################################################
-execute_process(
-    COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B}
-    ${CMAKE_SOURCE_DIR}/gen/volk_compile_utils.py
-    --mode "machines" --archs "${available_archs}"
-    OUTPUT_VARIABLE available_machines OUTPUT_STRIP_TRAILING_WHITESPACE
-)
-
-########################################################################
-# Implement machine overruling for redundant machines:
-# A machine is redundant when expansion rules occur,
-# and the arch superset passes configuration checks.
-# When this occurs, eliminate the redundant machines
-# to avoid unnecessary compilation of subset machines.
-########################################################################
-foreach(arch mmx orc 64 32)
-    foreach(machine_name ${available_machines})
-        string(REPLACE "_${arch}" "" machine_name_no_arch ${machine_name})
-        if (${machine_name} STREQUAL ${machine_name_no_arch})
-        else()
-            list(REMOVE_ITEM available_machines ${machine_name_no_arch})
-        endif()
-    endforeach(machine_name)
-endforeach(arch)
-
-########################################################################
-# done overrules! print the result
-########################################################################
-message(STATUS "Available machines: ${available_machines}")
-
-########################################################################
-# Create rules to run the volk generator
-########################################################################
-
-#dependencies are all python, xml, and header implementation files
-file(GLOB xml_files ${CMAKE_SOURCE_DIR}/gen/*.xml)
-file(GLOB py_files ${CMAKE_SOURCE_DIR}/gen/*.py)
-file(GLOB h_files ${CMAKE_SOURCE_DIR}/kernels/volk/*.h)
-
-macro(gen_template tmpl output)
-    list(APPEND volk_gen_sources ${output})
-    add_custom_command(
-        OUTPUT ${output}
-        DEPENDS ${xml_files} ${py_files} ${h_files} ${tmpl}
-        COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B}
-        ${CMAKE_SOURCE_DIR}/gen/volk_tmpl_utils.py
-        --input ${tmpl} --output ${output} ${ARGN}
-    )
-endmacro(gen_template)
-
-make_directory(${CMAKE_BINARY_DIR}/include/volk)
-
-gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk.tmpl.h              
${CMAKE_BINARY_DIR}/include/volk/volk.h)
-gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk.tmpl.c              
${CMAKE_BINARY_DIR}/lib/volk.c)
-gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_typedefs.tmpl.h     
${CMAKE_BINARY_DIR}/include/volk/volk_typedefs.h)
-gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_cpu.tmpl.h          
${CMAKE_BINARY_DIR}/include/volk/volk_cpu.h)
-gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_cpu.tmpl.c          
${CMAKE_BINARY_DIR}/lib/volk_cpu.c)
-gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_config_fixed.tmpl.h 
${CMAKE_BINARY_DIR}/include/volk/volk_config_fixed.h)
-gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_machines.tmpl.h     
${CMAKE_BINARY_DIR}/lib/volk_machines.h)
-gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_machines.tmpl.c     
${CMAKE_BINARY_DIR}/lib/volk_machines.c)
-
-set(BASE_CFLAGS NONE)
-STRING(TOUPPER ${CMAKE_BUILD_TYPE} CBTU)
-MESSAGE(STATUS BUILT TYPE ${CBTU})
-MESSAGE(STATUS "Base cflags = ${CMAKE_C_FLAGS_${CBTU}} ${CMAKE_C_FLAGS}")
-set(COMPILER_INFO "")
-IF(MSVC)
-    IF(MSVC90)   #Visual Studio 9
-        SET(cmake_c_compiler_version "Microsoft Visual Studio 9.0")
-    ELSE(MSVC10) #Visual Studio 10
-        SET(cmake_c_compiler_version "Microsoft Visual Studio 10.0")
-    ELSE(MSVC11) #Visual Studio 11
-        SET(cmake_c_compiler_version "Microsoft Visual Studio 11.0")
-    ELSE(MSVC12) #Visual Studio 12
-        SET(cmake_c_compiler_version "Microsoft Visual Studio 12.0")
-    ENDIF()
-ELSE()
-    execute_process(COMMAND ${CMAKE_C_COMPILER} --version
-            OUTPUT_VARIABLE cmake_c_compiler_version)
-ENDIF(MSVC)
-set(COMPILER_INFO "${CMAKE_C_COMPILER}:::${CMAKE_C_FLAGS_${GRCBTU}} 
${CMAKE_C_FLAGS}\n${CMAKE_CXX_COMPILER}:::${CMAKE_CXX_FLAGS_${GRCBTU}} 
${CMAKE_CXX_FLAGS}\n" )
-
-foreach(machine_name ${available_machines})
-    #generate machine source
-    set(machine_source 
${CMAKE_CURRENT_BINARY_DIR}/volk_machine_${machine_name}.c)
-    gen_template(${CMAKE_SOURCE_DIR}/tmpl/volk_machine_xxx.tmpl.c 
${machine_source} ${machine_name})
-
-    #determine machine flags
-    execute_process(
-        COMMAND ${PYTHON_EXECUTABLE} ${PYTHON_DASH_B}
-        ${CMAKE_SOURCE_DIR}/gen/volk_compile_utils.py
-        --mode "machine_flags" --machine "${machine_name}" --compiler 
"${COMPILER_NAME}"
-        OUTPUT_VARIABLE ${machine_name}_flags OUTPUT_STRIP_TRAILING_WHITESPACE
-    )
-    MESSAGE(STATUS "BUILD INFO ::: ${machine_name} ::: ${COMPILER_NAME} ::: 
${CMAKE_C_FLAGS_${CBTU}} ${CMAKE_C_FLAGS} ${${machine_name}_flags}")
-    set(COMPILER_INFO 
"${COMPILER_INFO}${machine_name}:::${COMPILER_NAME}:::${CMAKE_C_FLAGS_${CBTU}} 
${CMAKE_C_FLAGS} ${${machine_name}_flags}\n" )
-    if(${machine_name}_flags)
-        set_source_files_properties(${machine_source} PROPERTIES COMPILE_FLAGS 
"${${machine_name}_flags}")
-    endif()
-
-    #add to available machine defs
-    string(TOUPPER LV_MACHINE_${machine_name} machine_def)
-    list(APPEND machine_defs ${machine_def})
-endforeach(machine_name)
-
-# Convert to a C string to compile and display properly
-string(STRIP "${cmake_c_compiler_version}" cmake_c_compiler_version)
-string(STRIP ${COMPILER_INFO} COMPILER_INFO)
-MESSAGE(STATUS "Compiler Version: ${cmake_c_compiler_version}")
-string(REPLACE "\n" " \\n" cmake_c_compiler_version 
${cmake_c_compiler_version})
-string(REPLACE "\n" " \\n" COMPILER_INFO ${COMPILER_INFO})
-
-########################################################################
-# Set local include directories first
-########################################################################
-include_directories(
-    ${CMAKE_BINARY_DIR}/include
-    ${CMAKE_SOURCE_DIR}/include
-    ${CMAKE_SOURCE_DIR}/kernels
-    ${CMAKE_CURRENT_BINARY_DIR}
-    ${CMAKE_CURRENT_SOURCE_DIR}
-)
-
-########################################################################
-# Handle ASM support
-#  on by default, but let users turn it off
-########################################################################
-if(${CMAKE_VERSION} VERSION_GREATER "2.8.9")
-  set(ASM_ARCHS_AVAILABLE "armv7")
-
-  set(FULL_C_FLAGS "${CMAKE_C_FLAGS}" "${CMAKE_CXX_COMPILER_ARG1}")
-
-  # sort through a list of all architectures we have ASM for
-  # if we find one that matches our current system architecture
-  # set up the assembler flags and include the source files
-  foreach(ARCH ${ASM_ARCHS_AVAILABLE})
-    string(REGEX MATCH "${ARCH}" ASM_ARCH "${FULL_C_FLAGS}")
-    if( ASM_ARCH STREQUAL "armv7" )
-      message(STATUS "---- Adding ASM files") # we always use ATT syntax
-      message(STATUS "-- Detected armv7 architecture; enabling ASM")
-      # setup architecture specific assembler flags
-      set(ARCH_ASM_FLAGS "-mfpu=neon -g")
-      # then add the files
-      include_directories(${CMAKE_SOURCE_DIR}/kernels/volk/asm/neon)
-      file(GLOB asm_files ${CMAKE_SOURCE_DIR}/kernels/volk/asm/neon/*.s)
-      foreach(asm_file ${asm_files})
-        list(APPEND volk_sources ${asm_file})
-        message(STATUS "Adding source file: ${asm_file}")
-      endforeach(asm_file)
-    endif()
-    enable_language(ASM)
-    set(CMAKE_ASM_FLAGS ${ARCH_ASM_FLAGS})
-    message(STATUS "c flags: ${FULL_C_FLAGS}")
-    message(STATUS "asm flags: ${CMAKE_ASM_FLAGS}")
-  endforeach(ARCH)
-
-else(${CMAKE_VERSION} VERSION_GREATER "2.8.9")
-  message(STATUS "Not enabling ASM support. CMake >= 2.8.10 required.")
-  foreach(machine_name ${available_machines})
-    string(REGEX MATCH "neon" NEON_MACHINE ${machine_name})
-    if( NEON_MACHINE STREQUAL "neon")
-      message(FATAL_ERROR "CMake >= 2.8.10 is required for ARM NEON support")
-    endif()
-  endforeach()
-endif(${CMAKE_VERSION} VERSION_GREATER "2.8.9")
-
-########################################################################
-# Handle orc support
-########################################################################
-if(ORC_FOUND)
-    #setup orc library usage
-    include_directories(${ORC_INCLUDE_DIRS})
-    link_directories(${ORC_LIBRARY_DIRS})
-    list(APPEND volk_libraries ${ORC_LIBRARIES})
-
-    #setup orc functions
-    file(GLOB orc_files ${CMAKE_SOURCE_DIR}/orc/*.orc)
-    foreach(orc_file ${orc_files})
-
-        #extract the name for the generated c source from the orc file
-        get_filename_component(orc_file_name_we ${orc_file} NAME_WE)
-        set(orcc_gen ${CMAKE_CURRENT_BINARY_DIR}/${orc_file_name_we}.c)
-
-        #create a rule to generate the source and add to the list of sources
-        add_custom_command(
-            COMMAND ${ORCC_EXECUTABLE} --include math.h --implementation -o 
${orcc_gen} ${orc_file}
-            DEPENDS ${orc_file} OUTPUT ${orcc_gen}
-        )
-        list(APPEND volk_sources ${orcc_gen})
-
-    endforeach(orc_file)
-else()
-    message(STATUS "Did not find liborc and orcc, disabling orc support...")
-endif()
-
-
-########################################################################
-# Handle the generated constants
-########################################################################
-
-execute_process(COMMAND ${PYTHON_EXECUTABLE} -c
-    "import time;print time.strftime('%a, %d %b %Y %H:%M:%S', time.gmtime())"
-    OUTPUT_VARIABLE BUILD_DATE OUTPUT_STRIP_TRAILING_WHITESPACE
-)
-message(STATUS "Loading build date ${BUILD_DATE} into constants...")
-message(STATUS "Loading version ${VERSION} into constants...")
-
-#double escape for windows backslash path separators
-string(REPLACE "\\" "\\\\" prefix ${prefix})
-
-configure_file(
-    ${CMAKE_CURRENT_SOURCE_DIR}/constants.c.in
-    ${CMAKE_CURRENT_BINARY_DIR}/constants.c
address@hidden)
-
-list(APPEND volk_sources ${CMAKE_CURRENT_BINARY_DIR}/constants.c)
-
-########################################################################
-# Setup the volk sources list and library
-########################################################################
-if(NOT WIN32)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden")
-endif()
-
-list(APPEND volk_sources
-    ${CMAKE_CURRENT_SOURCE_DIR}/volk_prefs.c
-    ${CMAKE_CURRENT_SOURCE_DIR}/volk_rank_archs.c
-    ${CMAKE_CURRENT_SOURCE_DIR}/volk_malloc.c
-    ${volk_gen_sources}
-)
-
-#set the machine definitions where applicable
-set_source_files_properties(
-    ${CMAKE_CURRENT_BINARY_DIR}/volk.c
-    ${CMAKE_CURRENT_BINARY_DIR}/volk_machines.c
-PROPERTIES COMPILE_DEFINITIONS "${machine_defs}")
-
-if(MSVC)
-    #add compatibility includes for stdint types
-    include_directories(${CMAKE_SOURCE_DIR}/cmake/msvc)
-    add_definitions(-DHAVE_CONFIG_H)
-    #compile the sources as C++ due to the lack of complex.h under MSVC
-    set_source_files_properties(${volk_sources} PROPERTIES LANGUAGE CXX)
-endif()
-
-#create the volk runtime library
-add_library(volk SHARED ${volk_sources})
-target_link_libraries(volk ${volk_libraries})
-set_target_properties(volk PROPERTIES SOVERSION ${LIBVER})
-set_target_properties(volk PROPERTIES DEFINE_SYMBOL "volk_EXPORTS")
-
-install(TARGETS volk
-    LIBRARY DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_runtime" # .so file
-    ARCHIVE DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_devel"   # .lib file
-    RUNTIME DESTINATION bin              COMPONENT "volk_runtime" # .dll file
-)
-
-if(ENABLE_STATIC_LIBS)
-  add_library(volk_static STATIC ${volk_sources})
-
-  if(NOT WIN32)
-    set_target_properties(volk_static
-      PROPERTIES OUTPUT_NAME volk)
-  endif(NOT WIN32)
-
-  install(TARGETS volk_static
-    ARCHIVE DESTINATION lib${LIB_SUFFIX} COMPONENT "volk_devel"   # .lib file
-    )
-endif(ENABLE_STATIC_LIBS)
-
-########################################################################
-# Build the QA test application
-########################################################################
-
-
-if(Boost_FOUND)
-
-    set_source_files_properties(
-        ${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc PROPERTIES
-        COMPILE_DEFINITIONS "BOOST_TEST_DYN_LINK;BOOST_TEST_MAIN"
-    )
-
-    include_directories(${Boost_INCLUDE_DIRS})
-    link_directories(${Boost_LIBRARY_DIRS})
-
-    add_executable(test_all
-        ${CMAKE_CURRENT_SOURCE_DIR}/testqa.cc
-        ${CMAKE_CURRENT_SOURCE_DIR}/qa_utils.cc
-    )
-    target_link_libraries(test_all volk ${Boost_LIBRARIES})
-    add_test(qa_volk_test_all test_all)
-
-endif(Boost_FOUND)
diff --git a/volk/lib/constants.c.in b/volk/lib/constants.c.in
deleted file mode 100644
index 68e9b24..0000000
--- a/volk/lib/constants.c.in
+++ /dev/null
@@ -1,63 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2013 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#if HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <volk/constants.h>
-
-char*
-volk_prefix()
-{
-  return "@prefix@";
-}
-
-char*
-volk_build_date()
-{
-  return "@BUILD_DATE@";
-}
-
-char*
-volk_version()
-{
-  return "@VERSION@";
-}
-
-char*
-volk_c_compiler()
-{
-  return "@cmake_c_compiler_version@";
-}
-
-char*
-volk_compiler_flags()
-{
-  return "@COMPILER_INFO@";
-}
-
-char*
-volk_available_machines()
-{
-  return "@available_machines@";
-}
diff --git a/volk/lib/gcc_x86_cpuid.h b/volk/lib/gcc_x86_cpuid.h
deleted file mode 100644
index e0254f1..0000000
--- a/volk/lib/gcc_x86_cpuid.h
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
- *
- * This file is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option) any
- * later version.
- *
- * This file is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
- * <http://www.gnu.org/licenses/>.
- */
-
-/* %ecx */
-#define bit_SSE3       (1 << 0)
-#define bit_PCLMUL     (1 << 1)
-#define bit_SSSE3      (1 << 9)
-#define bit_FMA                (1 << 12)
-#define bit_CMPXCHG16B (1 << 13)
-#define bit_SSE4_1     (1 << 19)
-#define bit_SSE4_2     (1 << 20)
-#define bit_MOVBE      (1 << 22)
-#define bit_POPCNT     (1 << 23)
-#define bit_AES                (1 << 25)
-#define bit_XSAVE      (1 << 26)
-#define bit_OSXSAVE    (1 << 27)
-#define bit_AVX                (1 << 28)
-#define bit_F16C       (1 << 29)
-#define bit_RDRND      (1 << 30)
-
-/* %edx */
-#define bit_CMPXCHG8B  (1 << 8)
-#define bit_CMOV       (1 << 15)
-#define bit_MMX                (1 << 23)
-#define bit_FXSAVE     (1 << 24)
-#define bit_SSE                (1 << 25)
-#define bit_SSE2       (1 << 26)
-
-/* Extended Features */
-/* %ecx */
-#define bit_LAHF_LM    (1 << 0)
-#define bit_ABM                (1 << 5)
-#define bit_SSE4a      (1 << 6)
-#define bit_XOP         (1 << 11)
-#define bit_LWP        (1 << 15)
-#define bit_FMA4        (1 << 16)
-#define bit_TBM         (1 << 21)
-
-/* %edx */
-#define bit_MMXEXT     (1 << 22)
-#define bit_LM         (1 << 29)
-#define bit_3DNOWP     (1 << 30)
-#define bit_3DNOW      (1 << 31)
-
-/* Extended Features (%eax == 7) */
-#define bit_FSGSBASE   (1 << 0)
-#define bit_BMI                (1 << 3)
-
-#if defined(__i386__) && defined(__PIC__)
-/* %ebx may be the PIC register.  */
-#if __GNUC__ >= 3
-#define __cpuid(level, a, b, c, d)                     \
-  __asm__ ("xchg{l}\t{%%}ebx, %1\n\t"                  \
-          "cpuid\n\t"                                  \
-          "xchg{l}\t{%%}ebx, %1\n\t"                   \
-          : "=a" (a), "=r" (b), "=c" (c), "=d" (d)     \
-          : "0" (level))
-
-#define __cpuid_count(level, count, a, b, c, d)                \
-  __asm__ ("xchg{l}\t{%%}ebx, %1\n\t"                  \
-          "cpuid\n\t"                                  \
-          "xchg{l}\t{%%}ebx, %1\n\t"                   \
-          : "=a" (a), "=r" (b), "=c" (c), "=d" (d)     \
-          : "0" (level), "2" (count))
-#else
-/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
-   nor alternatives in i386 code.  */
-#define __cpuid(level, a, b, c, d)                     \
-  __asm__ ("xchgl\t%%ebx, %1\n\t"                      \
-          "cpuid\n\t"                                  \
-          "xchgl\t%%ebx, %1\n\t"                       \
-          : "=a" (a), "=r" (b), "=c" (c), "=d" (d)     \
-          : "0" (level))
-
-#define __cpuid_count(level, count, a, b, c, d)                \
-  __asm__ ("xchgl\t%%ebx, %1\n\t"                      \
-          "cpuid\n\t"                                  \
-          "xchgl\t%%ebx, %1\n\t"                       \
-          : "=a" (a), "=r" (b), "=c" (c), "=d" (d)     \
-          : "0" (level), "2" (count))
-#endif
-#else
-#define __cpuid(level, a, b, c, d)                     \
-  __asm__ ("cpuid\n\t"                                 \
-          : "=a" (a), "=b" (b), "=c" (c), "=d" (d)     \
-          : "0" (level))
-
-#define __cpuid_count(level, count, a, b, c, d)                \
-  __asm__ ("cpuid\n\t"                                 \
-          : "=a" (a), "=b" (b), "=c" (c), "=d" (d)     \
-          : "0" (level), "2" (count))
-#endif
-
-/* Return highest supported input value for cpuid instruction.  ext can
-   be either 0x0 or 0x8000000 to return highest supported value for
-   basic or extended cpuid information.  Function returns 0 if cpuid
-   is not supported or whatever cpuid returns in eax register.  If sig
-   pointer is non-null, then first four bytes of the signature
-   (as found in ebx register) are returned in location pointed by sig.  */
-
-static __inline unsigned int
-__get_cpuid_max (unsigned int __ext, unsigned int *__sig)
-{
-  unsigned int __eax, __ebx, __ecx, __edx;
-
-#ifndef __x86_64__
-  /* See if we can use cpuid.  On AMD64 we always can.  */
-#if __GNUC__ >= 3
-  __asm__ ("pushf{l|d}\n\t"
-          "pushf{l|d}\n\t"
-          "pop{l}\t%0\n\t"
-          "mov{l}\t{%0, %1|%1, %0}\n\t"
-          "xor{l}\t{%2, %0|%0, %2}\n\t"
-          "push{l}\t%0\n\t"
-          "popf{l|d}\n\t"
-          "pushf{l|d}\n\t"
-          "pop{l}\t%0\n\t"
-          "popf{l|d}\n\t"
-          : "=&r" (__eax), "=&r" (__ebx)
-          : "i" (0x00200000));
-#else
-/* Host GCCs older than 3.0 weren't supporting Intel asm syntax
-   nor alternatives in i386 code.  */
-  __asm__ ("pushfl\n\t"
-          "pushfl\n\t"
-          "popl\t%0\n\t"
-          "movl\t%0, %1\n\t"
-          "xorl\t%2, %0\n\t"
-          "pushl\t%0\n\t"
-          "popfl\n\t"
-          "pushfl\n\t"
-          "popl\t%0\n\t"
-          "popfl\n\t"
-          : "=&r" (__eax), "=&r" (__ebx)
-          : "i" (0x00200000));
-#endif
-
-  if (!((__eax ^ __ebx) & 0x00200000))
-    return 0;
-#endif
-
-  /* Host supports cpuid.  Return highest supported cpuid input value.  */
-  __cpuid (__ext, __eax, __ebx, __ecx, __edx);
-
-  if (__sig)
-    *__sig = __ebx;
-
-  return __eax;
-}
-
-/* Return cpuid data for requested cpuid level, as found in returned
-   eax, ebx, ecx and edx registers.  The function checks if cpuid is
-   supported and returns 1 for valid cpuid information or 0 for
-   unsupported cpuid level.  All pointers are required to be non-null.  */
-
-static __inline int
-__get_cpuid (unsigned int __level,
-            unsigned int *__eax, unsigned int *__ebx,
-            unsigned int *__ecx, unsigned int *__edx)
-{
-  unsigned int __ext = __level & 0x80000000;
-
-  if (__get_cpuid_max (__ext, 0) < __level)
-    return 0;
-
-  __cpuid (__level, *__eax, *__ebx, *__ecx, *__edx);
-  return 1;
-}
diff --git a/volk/lib/qa_16s_add_quad_aligned16.cc 
b/volk/lib/qa_16s_add_quad_aligned16.cc
deleted file mode 100644
index 8da43b9..0000000
--- a/volk/lib/qa_16s_add_quad_aligned16.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16s_add_quad_aligned16.h>
-#include <volk/volk_16s_add_quad_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_16s_add_quad_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-
-
-void qa_16s_add_quad_aligned16::t1() {
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3200;
-  const int ITERS = 100000;
-  __VOLK_ATTR_ALIGNED(16) short input0[vlen];
-  __VOLK_ATTR_ALIGNED(16) short input1[vlen];
-  __VOLK_ATTR_ALIGNED(16) short input2[vlen];
-  __VOLK_ATTR_ALIGNED(16) short input3[vlen];
-  __VOLK_ATTR_ALIGNED(16) short input4[vlen];
-
-  __VOLK_ATTR_ALIGNED(16) short output0[vlen];
-  __VOLK_ATTR_ALIGNED(16) short output1[vlen];
-  __VOLK_ATTR_ALIGNED(16) short output2[vlen];
-  __VOLK_ATTR_ALIGNED(16) short output3[vlen];
-  __VOLK_ATTR_ALIGNED(16) short output01[vlen];
-  __VOLK_ATTR_ALIGNED(16) short output11[vlen];
-  __VOLK_ATTR_ALIGNED(16) short output21[vlen];
-  __VOLK_ATTR_ALIGNED(16) short output31[vlen];
-
-  for(int i = 0; i < vlen; ++i) {
-    short plus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
-    short minus0 = ((short) (rand() - (RAND_MAX/2))) >> 2;
-    short plus1 = ((short) (rand() - (RAND_MAX/2))) >> 2;
-    short minus1 = ((short) (rand() - (RAND_MAX/2))) >> 2;
-    short plus2 = ((short) (rand() - (RAND_MAX/2))) >> 2;
-    short minus2 = ((short) (rand() - (RAND_MAX/2))) >> 2;
-    short plus3 = ((short) (rand() - (RAND_MAX/2))) >> 2;
-    short minus3 = ((short) (rand() - (RAND_MAX/2))) >> 2;
-    short plus4 = ((short) (rand() - (RAND_MAX/2))) >> 2;
-    short minus4 = ((short) (rand() - (RAND_MAX/2))) >> 2;
-
-    input0[i] = plus0 - minus0;
-    input1[i] = plus1 - minus1;
-    input2[i] = plus2 - minus2;
-    input3[i] = plus3 - minus3;
-    input4[i] = plus4 - minus4;
-
-  }
-  printf("16s_add_quad_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_add_quad_aligned16_manual(output0, output1, output2, output3, 
input0, input1, input2, input3, input4, vlen << 1 , "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_16s_add_quad_aligned16_manual(output01, output11, output21, output31, 
input0, input1, input2, input3, input4, vlen << 1 , "sse2");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse2_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output0[i], output01[i]);
-    CPPUNIT_ASSERT_EQUAL(output1[i], output11[i]);
-    CPPUNIT_ASSERT_EQUAL(output2[i], output21[i]);
-    CPPUNIT_ASSERT_EQUAL(output3[i], output31[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16s_add_quad_aligned16.h 
b/volk/lib/qa_16s_add_quad_aligned16.h
deleted file mode 100644
index 3c1ae97..0000000
--- a/volk/lib/qa_16s_add_quad_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16S_ADD_QUAD_ALIGNED16_H
-#define INCLUDED_QA_16S_ADD_QUAD_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16s_add_quad_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16s_add_quad_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16S_ADD_QUAD_ALIGNED16_H */
diff --git a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc 
b/volk/lib/qa_16s_branch_4_state_8_aligned16.cc
deleted file mode 100644
index 5a58569..0000000
--- a/volk/lib/qa_16s_branch_4_state_8_aligned16.cc
+++ /dev/null
@@ -1,106 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16s_branch_4_state_8_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for ssse3
-
-#ifndef LV_HAVE_SSSE3
-
-void qa_16s_branch_4_state_8_aligned16::t1() {
-  printf("ssse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_16s_branch_4_state_8_aligned16::t1() {
-  const int num_iters = 1000000;
-  const int vlen = 32;
-
-  static char permute0[16]__attribute__((aligned(16))) = {0x0e, 0x0f, 0x0a, 
0x0b, 0x04, 0x05, 0x00, 0x01, 0x0c, 0x0d, 0x08, 0x09, 0x06, 0x07, 0x02, 0x03};
-  static char permute1[16]__attribute__((aligned(16))) = {0x0c, 0x0d, 0x08, 
0x09, 0x06, 0x07, 0x02, 0x03, 0x0e, 0x0f, 0x0a, 0x0b, 0x04, 0x05, 0x00, 0x01};
-  static char permute2[16]__attribute__((aligned(16))) = {0x02, 0x03, 0x06, 
0x07, 0x08, 0x09, 0x0c, 0x0d, 0x00, 0x01, 0x04, 0x05, 0x0a, 0x0b, 0x0e, 0x0f};
-  static char permute3[16]__attribute__((aligned(16))) = {0x00, 0x01, 0x04, 
0x05, 0x0a, 0x0b, 0x0e, 0x0f, 0x02, 0x03, 0x06, 0x07, 0x08, 0x09, 0x0c, 0x0d};
-  static char* permuters[4] = {permute0, permute1, permute2, permute3};
-
-  unsigned int num_bytes = vlen << 1;
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-
-  __VOLK_ATTR_ALIGNED(16) short target[vlen];
-  __VOLK_ATTR_ALIGNED(16) short target2[vlen];
-  __VOLK_ATTR_ALIGNED(16) short target3[vlen];
-
-  __VOLK_ATTR_ALIGNED(16) short src0[vlen];
-  __VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen] =  {
-7, 5, 2, 0, 6, 4, 3, 1, 6, 4, 3, 1, 7, 5, 2, 0, 1, 3, 4, 6, 0, 2, 5, 7, 0, 2, 
5, 7, 1, 3, 4, 6 };
-  __VOLK_ATTR_ALIGNED(16) short cntl0[vlen] = {
-    0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
0x0000, 0x0000, 0x0000 };
-  __VOLK_ATTR_ALIGNED(16) short cntl1[vlen] = {
-    0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 
0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 
0x0000, 0x0000, 0x0000 };
-  __VOLK_ATTR_ALIGNED(16) short cntl2[vlen] = {
-    0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0xffff, 
0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 
0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 
0xffff, 0xffff, 0x0000 };
-  __VOLK_ATTR_ALIGNED(16) short cntl3[vlen] =  {
-    0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 
0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 
0x0000, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0x0000, 
0x0000, 0xffff, 0xffff };
-  __VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4};
-
-
-
-  for(int i = 0; i < vlen; ++i) {
-    src0[i] = i;
-
-  }
-
-
-  printf("16s_branch_4_state_8_aligned\n");
-
-
-  start = clock();
-  for(int i = 0; i < num_iters; ++i) {
-    volk_16s_permute_and_scalar_add_aligned16_manual(target, src0, 
permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2");
-  }
-  end = clock();
-
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-
-  printf("permute_and_scalar_add_time: %f\n", total);
-
-
-
-  start = clock();
-  for(int i = 0; i < num_iters; ++i) {
-    volk_16s_branch_4_state_8_aligned16_manual(target2, src0, permuters, 
cntl2, cntl3, scalars, "ssse3");
-  }
-  end = clock();
-
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-
-  printf("branch_4_state_8_time, ssse3: %f\n", total);
-
-  start = clock();
-  for(int i = 0; i < num_iters; ++i) {
-    volk_16s_branch_4_state_8_aligned16_manual(target3, src0, permuters, 
cntl2, cntl3, scalars, "generic");
-  }
-  end = clock();
-
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-
-  printf("permute_and_scalar_add_time, generic: %f\n", total);
-
-
-
-  for(int i = 0; i < vlen; ++i) {
-    printf("psa... %d, b4s8... %d\n", target[i], target3[i]);
-  }
-
-  for(int i = 0; i < vlen; ++i) {
-
-    CPPUNIT_ASSERT(target[i] == target2[i]);
-    CPPUNIT_ASSERT(target[i] == target3[i]);
-  }
-}
-
-
-#endif
diff --git a/volk/lib/qa_16s_branch_4_state_8_aligned16.h 
b/volk/lib/qa_16s_branch_4_state_8_aligned16.h
deleted file mode 100644
index 41ab073..0000000
--- a/volk/lib/qa_16s_branch_4_state_8_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16S_BRANCH_4_STATE_8_ALIGNED16_H
-#define INCLUDED_QA_16S_BRANCH_4_STATE_8_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16s_branch_4_state_8_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16s_branch_4_state_8_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16S_BRANCH_4_STATE_8_ALIGNED16_H */
diff --git a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc 
b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc
deleted file mode 100644
index dadd2c5..0000000
--- a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.cc
+++ /dev/null
@@ -1,78 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16s_permute_and_scalar_add_aligned16.h>
-#include <volk/volk_16s_permute_and_scalar_add_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_16s_permute_and_scalar_add_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_16s_permute_and_scalar_add_aligned16::t1() {
-  const int vlen = 64;
-
-  unsigned int num_bytes = vlen << 1;
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-
-  __VOLK_ATTR_ALIGNED(16) short target[vlen];
-  __VOLK_ATTR_ALIGNED(16) short target2[vlen];
-  __VOLK_ATTR_ALIGNED(16) short src0[vlen];
-  __VOLK_ATTR_ALIGNED(16) short permute_indexes[vlen];
-  __VOLK_ATTR_ALIGNED(16) short cntl0[vlen];
-  __VOLK_ATTR_ALIGNED(16) short cntl1[vlen];
-  __VOLK_ATTR_ALIGNED(16) short cntl2[vlen];
-  __VOLK_ATTR_ALIGNED(16) short cntl3[vlen];
-  __VOLK_ATTR_ALIGNED(16) short scalars[4] = {1, 2, 3, 4};
-
-  for(int i = 0; i < vlen; ++i) {
-    src0[i] = i;
-    permute_indexes[i] = (3 * i)%vlen;
-    cntl0[i] = 0xff;
-    cntl1[i] = 0xff * (i%2);
-    cntl2[i] = 0xff * ((i>>1)%2);
-    cntl3[i] = 0xff * ((i%4) == 3);
-  }
-
-  printf("16s_permute_and_scalar_add_aligned\n");
-
-  start = clock();
-  for(int i = 0; i < 100000; ++i) {
-    volk_16s_permute_and_scalar_add_aligned16_manual(target, src0, 
permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "generic");
-  }
-  end = clock();
-
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-
-  printf("generic_time: %f\n", total);
-
-  start = clock();
-  for(int i = 0; i < 100000; ++i) {
-    volk_16s_permute_and_scalar_add_aligned16_manual(target2, src0, 
permute_indexes, cntl0, cntl1, cntl2, cntl3, scalars, num_bytes, "sse2");
-  }
-  end = clock();
-
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-
-  printf("sse2_time: %f\n", total);
-
-
-  for(int i = 0; i < vlen; ++i) {
-    //printf("generic... %d, sse2... %d\n", target[i], target2[i]);
-  }
-
-  for(int i = 0; i < vlen; ++i) {
-
-    CPPUNIT_ASSERT(target[i] == target2[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.h 
b/volk/lib/qa_16s_permute_and_scalar_add_aligned16.h
deleted file mode 100644
index 3643aee..0000000
--- a/volk/lib/qa_16s_permute_and_scalar_add_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16S_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H
-#define INCLUDED_QA_16S_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16s_permute_and_scalar_add_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16s_permute_and_scalar_add_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16S_PERMUTE_AND_SCALAR_ADD_ALIGNED16_H */
diff --git a/volk/lib/qa_16s_quad_max_star_aligned16.cc 
b/volk/lib/qa_16s_quad_max_star_aligned16.cc
deleted file mode 100644
index 2a5dec4..0000000
--- a/volk/lib/qa_16s_quad_max_star_aligned16.cc
+++ /dev/null
@@ -1,60 +0,0 @@
-#include <volk/volk.h>
-#include <qa_16s_quad_max_star_aligned16.h>
-#include <volk/volk_16s_quad_max_star_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse2
-
-#ifndef LV_HAVE_SSE2
-
-void qa_16s_quad_max_star_aligned16::t1() {
-  printf("sse2 not available... no test performed\n");
-}
-
-#else
-
-void qa_16s_quad_max_star_aligned16::t1() {
-  const int vlen = 34;
-
-  __VOLK_ATTR_ALIGNED(16) short input0[vlen];
-  __VOLK_ATTR_ALIGNED(16) short input1[vlen];
-  __VOLK_ATTR_ALIGNED(16) short input2[vlen];
-  __VOLK_ATTR_ALIGNED(16) short input3[vlen];
-
-  __VOLK_ATTR_ALIGNED(16) short output0[vlen];
-  __VOLK_ATTR_ALIGNED(16) short output1[vlen];
-
-  for(int i = 0; i < vlen; ++i) {
-    short plus0 = (short) (rand() - (RAND_MAX/2));
-    short plus1 = (short) (rand() - (RAND_MAX/2));
-    short plus2 = (short) (rand() - (RAND_MAX/2));
-    short plus3 = (short) (rand() - (RAND_MAX/2));
-
-    short minus0 = (short) (rand() - (RAND_MAX/2));
-    short minus1 = (short) (rand() - (RAND_MAX/2));
-    short minus2 = (short) (rand() - (RAND_MAX/2));
-    short minus3 = (short) (rand() - (RAND_MAX/2));
-
-    input0[i] = plus0 - minus0;
-    input1[i] = plus1 - minus1;
-    input2[i] = plus2 - minus2;
-    input3[i] = plus3 - minus3;
-  }
-
-  volk_16s_quad_max_star_aligned16_manual(output0, input0, input1, input2, 
input3, 2*vlen, "generic");
-
-  volk_16s_quad_max_star_aligned16_manual(output1, input0, input1, input2, 
input3, 2*vlen, "sse2");
-
-  printf("16s_quad_max_star_aligned\n");
-  for(int i = 0; i < vlen; ++i) {
-    printf("generic... %d, sse2... %d, inputs: %d, %d, %d, %d\n", output0[i], 
output1[i], input0[i], input1[i], input2[i], input3[i]);
-  }
-
-  for(int i = 0; i < vlen; ++i) {
-
-    CPPUNIT_ASSERT_EQUAL(output0[i], output1[i]);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_16s_quad_max_star_aligned16.h 
b/volk/lib/qa_16s_quad_max_star_aligned16.h
deleted file mode 100644
index 51e7708..0000000
--- a/volk/lib/qa_16s_quad_max_star_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_16S_QUAD_MAX_STAR_ALIGNED16_H
-#define INCLUDED_QA_16S_QUAD_MAX_STAR_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_16s_quad_max_star_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_16s_quad_max_star_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_16S_QUAD_MAX_STAR_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_fm_detect_aligned16.cc 
b/volk/lib/qa_32f_fm_detect_aligned16.cc
deleted file mode 100644
index 4e792ec..0000000
--- a/volk/lib/qa_32f_fm_detect_aligned16.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32f_fm_detect_aligned16.h>
-#include <volk/volk_32f_fm_detect_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_fm_detect_aligned16::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-void qa_32f_fm_detect_aligned16::t1() {
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  __VOLK_ATTR_ALIGNED(16) float input0[vlen];
-
-  __VOLK_ATTR_ALIGNED(16) float output0[vlen];
-  __VOLK_ATTR_ALIGNED(16) float output01[vlen];
-
-  for(int i = 0; i < vlen; ++i) {
-    input0[i] = ((float) (rand() - (RAND_MAX/2))) / 
static_cast<float>((RAND_MAX/2));
-  }
-  printf("32f_fm_detect_aligned\n");
-
-  start = clock();
-  float save = 0.1;
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_fm_detect_aligned16_manual(output0, input0, 1.0, &save, vlen, 
"generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  save = 0.1;
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32f_fm_detect_aligned16_manual(output01, input0, 1.0, &save, vlen, 
"sse");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse_time: %f\n", total);
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output0[i], output01[i], fabs(output0[i]) * 
1e-4);
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32f_fm_detect_aligned16.h 
b/volk/lib/qa_32f_fm_detect_aligned16.h
deleted file mode 100644
index a2680c5..0000000
--- a/volk/lib/qa_32f_fm_detect_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_FM_DETECT_ALIGNED16_H
-#define INCLUDED_QA_32F_FM_DETECT_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_fm_detect_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_fm_detect_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_FM_DETECT_ALIGNED16_H */
diff --git a/volk/lib/qa_32f_index_max_aligned16.cc 
b/volk/lib/qa_32f_index_max_aligned16.cc
deleted file mode 100644
index 2df2067..0000000
--- a/volk/lib/qa_32f_index_max_aligned16.cc
+++ /dev/null
@@ -1,103 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32f_index_max_aligned16.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-
-#define ERR_DELTA (1e-4)
-#define NUM_ITERS 1000000
-#define VEC_LEN 3097
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);      // uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  unsigned int i = 0;
-  for (; i < n; i++) {
-
-    buf[i] = uniform () * 32767;
-
-  }
-}
-
-
-#ifndef LV_HAVE_SSE
-
-void qa_32f_index_max_aligned16::t1(){
-  printf("sse not available... no test performed\n");
-}
-
-#else
-
-
-void qa_32f_index_max_aligned16::t1(){
-
-  const int vlen = VEC_LEN;
-
-
-  volk_runtime_init();
-
-  volk_environment_init();
-  int ret;
-
-  unsigned int* target_sse4_1;
-  unsigned int* target_sse;
-  unsigned int* target_generic;
-  float* src0 ;
-
-
-  unsigned int i_target_sse4_1;
-  target_sse4_1 = &i_target_sse4_1;
-  unsigned int i_target_sse;
-  target_sse = &i_target_sse;
-  unsigned int i_target_generic;
-  target_generic = &i_target_generic;
-
-  ret = posix_memalign((void**)&src0, 16, vlen *sizeof(float));
-
-  random_floats((float*)src0, vlen);
-
-  printf("32f_index_max_aligned16\n");
-
-  clock_t start, end;
-  double total;
-
-
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-    volk_32f_index_max_aligned16_manual(target_generic, src0, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic time: %f\n", total);
-
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-    volk_32f_index_max_aligned16_manual(target_sse, src0, vlen, "sse2");
-  }
-
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse time: %f\n", total);
-
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-    get_volk_runtime()->volk_32f_index_max_aligned16(target_sse4_1, src0, 
vlen);
-  }
-
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4.1 time: %f\n", total);
-
-
-  printf("generic: %u, sse: %u, sse4.1: %u\n", target_generic[0], 
target_sse[0], target_sse4_1[0]);
-  CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse[0]);
-  CPPUNIT_ASSERT_EQUAL(target_generic[0], target_sse4_1[0]);
-
-  free(src0);
-}
-
-#endif /*LV_HAVE_SSE3*/
diff --git a/volk/lib/qa_32f_index_max_aligned16.h 
b/volk/lib/qa_32f_index_max_aligned16.h
deleted file mode 100644
index 8cadffa..0000000
--- a/volk/lib/qa_32f_index_max_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32F_INDEX_MAX_ALIGNED16_H
-#define INCLUDED_QA_32F_INDEX_MAX_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32f_index_max_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32f_index_max_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32F_INDEX_MAX_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_index_max_aligned16.cc 
b/volk/lib/qa_32fc_index_max_aligned16.cc
deleted file mode 100644
index 3859bcb..0000000
--- a/volk/lib/qa_32fc_index_max_aligned16.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_index_max_aligned16.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-
-#define ERR_DELTA (1e-4)
-#define NUM_ITERS 1000000
-#define VEC_LEN 3096
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);      // uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  unsigned int i = 0;
-  for (; i < n; i++) {
-
-    buf[i] = uniform () * 32767;
-
-  }
-}
-
-
-#ifndef LV_HAVE_SSE3
-
-void qa_32fc_index_max_aligned16::t1(){
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-
-void qa_32fc_index_max_aligned16::t1(){
-
-  const int vlen = VEC_LEN;
-
-  volk_environment_init();
-  int ret;
-
-  unsigned int* target;
-  unsigned int* target_generic;
-  std::complex<float>* src0 ;
-
-
-  unsigned int i_target;
-  target = &i_target;
-  unsigned int i_target_generic;
-  target_generic = &i_target_generic;
-  ret = posix_memalign((void**)&src0, 16, vlen << 3);
-
-  random_floats((float*)src0, vlen * 2);
-
-  printf("32fc_index_max_aligned16\n");
-
-  clock_t start, end;
-  double total;
-
-
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-    volk_32fc_index_max_aligned16_manual(target_generic, src0, vlen << 3, 
"generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic time: %f\n", total);
-
-  start = clock();
-  for(int k = 0; k < NUM_ITERS; ++k) {
-  volk_32fc_index_max_aligned16_manual(target, src0, vlen << 3, "sse3");
-  }
-
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3 time: %f\n", total);
-
-
-
-
-  printf("generic: %u, sse3: %u\n", target_generic[0], target[0]);
-  CPPUNIT_ASSERT_DOUBLES_EQUAL(target_generic[0], target[0], 1.1);
-
-
-
-  free(src0);
-}
-
-#endif /*LV_HAVE_SSE3*/
diff --git a/volk/lib/qa_32fc_index_max_aligned16.h 
b/volk/lib/qa_32fc_index_max_aligned16.h
deleted file mode 100644
index 0990bcb..0000000
--- a/volk/lib/qa_32fc_index_max_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_INDEX_MAX_ALIGNED16_H
-#define INCLUDED_QA_32FC_INDEX_MAX_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_index_max_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_index_max_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_INDEX_MAX_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc 
b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc
deleted file mode 100644
index daca31d..0000000
--- a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.cc
+++ /dev/null
@@ -1,64 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_power_spectral_density_32f_aligned16.h>
-#include <volk/volk_32fc_power_spectral_density_32f_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse3
-
-#ifndef LV_HAVE_SSE3
-
-void qa_32fc_power_spectral_density_32f_aligned16::t1() {
-  printf("sse3 not available... no test performed\n");
-}
-
-#else
-
-void qa_32fc_power_spectral_density_32f_aligned16::t1() {
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-  const int vlen = 3201;
-  const int ITERS = 10000;
-  __VOLK_ATTR_ALIGNED(16) std::complex<float> input0[vlen];
-
-  __VOLK_ATTR_ALIGNED(16) float output_generic[vlen];
-  __VOLK_ATTR_ALIGNED(16) float output_sse3[vlen];
-
-  const float scalar = vlen;
-  const float rbw = 1.7;
-
-  float* inputLoad = (float*)input0;
-  for(int i = 0; i < 2*vlen; ++i) {
-    inputLoad[i] = (((float) (rand() - (RAND_MAX/2))) / 
static_cast<float>((RAND_MAX/2)));
-  }
-  printf("32fc_power_spectral_density_32f_aligned\n");
-
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_power_spectral_density_32f_aligned16_manual(output_generic, 
input0, scalar, rbw, vlen, "generic");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32fc_power_spectral_density_32f_aligned16_manual(output_sse3, input0, 
scalar, rbw, vlen, "sse3");
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse3_time: %f\n", total);
-
-  for(int i = 0; i < 1; ++i) {
-    //printf("inputs: %d, %d\n", input0[i*2], input0[i*2 + 1]);
-    //printf("generic... %d, ssse3... %d\n", output0[i], output1[i]);
-  }
-
-  for(int i = 0; i < vlen; ++i) {
-    //printf("%d...%d\n", output0[i], output01[i]);
-    CPPUNIT_ASSERT_DOUBLES_EQUAL(output_generic[i], output_sse3[i], 
fabs(output_generic[i]*1e-4));
-  }
-}
-
-#endif
diff --git a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.h 
b/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.h
deleted file mode 100644
index 26f430b..0000000
--- a/volk/lib/qa_32fc_power_spectral_density_32f_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H
-#define INCLUDED_QA_32FC_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_power_spectral_density_32f_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_power_spectral_density_32f_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_POWER_SPECTRAL_DENSITY_32F_ALIGNED16_H */
diff --git a/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc 
b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc
deleted file mode 100644
index b825c20..0000000
--- a/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.cc
+++ /dev/null
@@ -1,138 +0,0 @@
-#include <volk/volk.h>
-#include <qa_32fc_x2_conjugate_dot_prod_32fc_u.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-
-
-#define assertcomplexEqual(expected, actual, delta)                    \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::real(expected), std::real(actual), 
fabs(std::real(expected)) * delta); \
-  CPPUNIT_ASSERT_DOUBLES_EQUAL (std::imag(expected), std::imag(actual), 
fabs(std::imag(expected))* delta);
-
-#define        ERR_DELTA       (1e-4)
-
-//test for sse
-
-#if LV_HAVE_SSE && LV_HAVE_64
-
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);      // uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform () * 32767;
-}
-
-
-void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() {
-  const int vlen = 789743;
-
-  volk_environment_init();
-  int ret;
-
-  std::complex<float>* input;
-  std::complex<float>* taps;
-
-  std::complex<float>* result_generic;
-  std::complex<float>* result;
-
-  ret = posix_memalign((void**)&input, 16, vlen << 3);
-  ret = posix_memalign((void**)&taps, 16, vlen << 3);
-  ret = posix_memalign((void**)&result_generic, 16, 8);
-  ret = posix_memalign((void**)&result, 16, 8);
-
-
-  result_generic[0] = std::complex<float>(0,0);
-  result[0] = std::complex<float>(0,0);
-
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-
-
-
-  volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result_generic, input, taps, 
vlen * 8,  "generic");
-
-
-  volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result, input, taps, vlen * 8, 
"sse");
-
-  printf("32fc_x2_conjugate_dot_prod_32fc_u\n");
-  printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), 
std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0]));
-
-  assertcomplexEqual(result_generic[0], result[0], ERR_DELTA);
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result);
-
-}
-
-
-#elif LV_HAVE_SSE && LV_HAVE_32
-
-static float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);      // uniformly (-1, 1)
-}
-
-static void
-random_floats (float *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform () * 32767;
-}
-
-
-void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() {
-  const int vlen = 789743;
-
-  volk_environment_init();
-  int ret;
-
-  std::complex<float>* input;
-  std::complex<float>* taps;
-
-  std::complex<float>* result_generic;
-  std::complex<float>* result;
-
-  ret = posix_memalign((void**)&input, 16, vlen << 3);
-  ret = posix_memalign((void**)&taps, 16, vlen << 3);
-  ret = posix_memalign((void**)&result_generic, 16, 8);
-  ret = posix_memalign((void**)&result, 16, 8);
-
-
-  result_generic[0] = std::complex<float>(0,0);
-  result[0] = std::complex<float>(0,0);
-
-  random_floats((float*)input, vlen * 2);
-  random_floats((float*)taps, vlen * 2);
-
-
-
-  volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result_generic, input, taps, 
vlen * 8,  "generic");
-
-
-  volk_32fc_x2_conjugate_dot_prod_32fc_u_manual(result, input, taps, vlen * 8, 
"sse_32");
-
-  printf("32fc_x2_conjugate_dot_prod_32fc_u\n");
-  printf("generic: %f +i%f ... sse: %f +i%f\n", std::real(result_generic[0]), 
std::imag(result_generic[0]), std::real(result[0]), std::imag(result[0]));
-
-  assertcomplexEqual(result_generic[0], result[0], ERR_DELTA);
-
-  free(input);
-  free(taps);
-  free(result_generic);
-  free(result);
-
-}
-
-
-#else
-
-void qa_32fc_x2_conjugate_dot_prod_32fc_u::t1() {
-  printf("sse not available... no test performed\n");
-}
-
-#endif /*LV_HAVE_SSE*/
diff --git a/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h 
b/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h
deleted file mode 100644
index f074024..0000000
--- a/volk/lib/qa_32fc_x2_conjugate_dot_prod_32fc_u.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32FC_X2_CONJUGATE_DOT_PROD_32FC_U_H
-#define INCLUDED_QA_32FC_X2_CONJUGATE_DOT_PROD_32FC_U_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32fc_x2_conjugate_dot_prod_32fc_u : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32fc_x2_conjugate_dot_prod_32fc_u);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32FC_X2_CONJUGATE_DOT_PROD_32FC_U_H */
diff --git a/volk/lib/qa_32u_popcnt_aligned16.cc 
b/volk/lib/qa_32u_popcnt_aligned16.cc
deleted file mode 100644
index 5559d93..0000000
--- a/volk/lib/qa_32u_popcnt_aligned16.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_32u_popcnt_aligned16.h>
-#include <volk/volk_32u_popcnt_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE4_2
-
-void qa_32u_popcnt_aligned16::t1() {
-  printf("sse4.2 not available... no test performed\n");
-}
-
-#else
-
-void qa_32u_popcnt_aligned16::t1() {
-
-
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-
-  const int ITERS = 10000000;
-  __VOLK_ATTR_ALIGNED(16) uint32_t input0;
-
-  __VOLK_ATTR_ALIGNED(16) uint32_t output0;
-  __VOLK_ATTR_ALIGNED(16) uint32_t output01;
-
-    input0 = ((uint32_t) (rand() - (RAND_MAX/2)));
-    output0 = 0;
-    output01 = 0;
-
-  printf("32u_popcnt_aligned\n");
-
-  start = clock();
-  uint32_t ret = 0;
-  for(int count = 0; count < ITERS; ++count) {
-    volk_32u_popcnt_aligned16_manual(&ret, input0, "generic");
-    output0 += ret;
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  ret = 0;
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_32u_popcnt_aligned16(&ret, input0);
-    output01 += ret;
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4.2_time: %f\n", total);
-
-
-  CPPUNIT_ASSERT_EQUAL(output0, output01);
-}
-
-#endif
diff --git a/volk/lib/qa_32u_popcnt_aligned16.h 
b/volk/lib/qa_32u_popcnt_aligned16.h
deleted file mode 100644
index fa1dc10..0000000
--- a/volk/lib/qa_32u_popcnt_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_32U_POPCNT_ALIGNED16_H
-#define INCLUDED_QA_32U_POPCNT_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_32u_popcnt_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_32u_popcnt_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_32U_POPCNT_ALIGNED16_H */
diff --git a/volk/lib/qa_64u_popcnt_aligned16.cc 
b/volk/lib/qa_64u_popcnt_aligned16.cc
deleted file mode 100644
index 391601f..0000000
--- a/volk/lib/qa_64u_popcnt_aligned16.cc
+++ /dev/null
@@ -1,62 +0,0 @@
-#include <volk/volk_runtime.h>
-#include <volk/volk.h>
-#include <qa_64u_popcnt_aligned16.h>
-#include <volk/volk_64u_popcnt_aligned16.h>
-#include <cstdlib>
-#include <ctime>
-
-//test for sse
-
-#ifndef LV_HAVE_SSE4_2
-
-void qa_64u_popcnt_aligned16::t1() {
-  printf("sse4.2 not available... no test performed\n");
-}
-
-#else
-
-void qa_64u_popcnt_aligned16::t1() {
-
-
-  volk_runtime_init();
-
-  volk_environment_init();
-  clock_t start, end;
-  double total;
-
-  const int ITERS = 10000000;
-  __VOLK_ATTR_ALIGNED(16) uint64_t input0;
-
-  __VOLK_ATTR_ALIGNED(16) uint64_t output0;
-  __VOLK_ATTR_ALIGNED(16) uint64_t output01;
-
-    input0 = ((uint64_t) (rand() - (RAND_MAX/2)));
-    output0 = 0;
-    output01 = 0;
-
-  printf("64u_popcnt_aligned\n");
-
-  start = clock();
-  uint64_t ret = 0;
-  for(int count = 0; count < ITERS; ++count) {
-    volk_64u_popcnt_aligned16_manual(&ret, input0, "generic");
-    output0 += ret;
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("generic_time: %f\n", total);
-  start = clock();
-  ret = 0;
-  for(int count = 0; count < ITERS; ++count) {
-    get_volk_runtime()->volk_64u_popcnt_aligned16(&ret, input0);
-    output01 += ret;
-  }
-  end = clock();
-  total = (double)(end-start)/(double)CLOCKS_PER_SEC;
-  printf("sse4.2_time: %f\n", total);
-
-
-  CPPUNIT_ASSERT_EQUAL(output0, output01);
-}
-
-#endif
diff --git a/volk/lib/qa_64u_popcnt_aligned16.h 
b/volk/lib/qa_64u_popcnt_aligned16.h
deleted file mode 100644
index 217822d..0000000
--- a/volk/lib/qa_64u_popcnt_aligned16.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef INCLUDED_QA_64U_POPCNT_ALIGNED16_H
-#define INCLUDED_QA_64U_POPCNT_ALIGNED16_H
-
-#include <cppunit/extensions/HelperMacros.h>
-#include <cppunit/TestCase.h>
-
-class qa_64u_popcnt_aligned16 : public CppUnit::TestCase {
-
-  CPPUNIT_TEST_SUITE (qa_64u_popcnt_aligned16);
-  CPPUNIT_TEST (t1);
-  CPPUNIT_TEST_SUITE_END ();
-
- private:
-  void t1 ();
-};
-
-
-#endif /* INCLUDED_QA_64U_POPCNT_ALIGNED16_H */
diff --git a/volk/lib/qa_utils.cc b/volk/lib/qa_utils.cc
deleted file mode 100644
index be20ed6..0000000
--- a/volk/lib/qa_utils.cc
+++ /dev/null
@@ -1,584 +0,0 @@
-#include "qa_utils.h"
-#include <cstring>
-#include <boost/foreach.hpp>
-#include <boost/assign/list_of.hpp>
-#include <boost/tokenizer.hpp>
-#include <boost/xpressive/xpressive.hpp>
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <map>
-#include <list>
-#include <ctime>
-#include <cmath>
-#include <limits>
-#include <boost/lexical_cast.hpp>
-#include <volk/volk.h>
-#include <volk/volk_cpu.h>
-#include <volk/volk_common.h>
-#include <volk/volk_malloc.h>
-#include <boost/typeof/typeof.hpp>
-#include <boost/type_traits.hpp>
-#include <stdio.h>
-
-float uniform() {
-  return 2.0 * ((float) rand() / RAND_MAX - 0.5);      // uniformly (-1, 1)
-}
-
-template <class t>
-void random_floats (t *buf, unsigned n)
-{
-  for (unsigned i = 0; i < n; i++)
-    buf[i] = uniform ();
-}
-
-void load_random_data(void *data, volk_type_t type, unsigned int n) {
-    if(type.is_complex) n *= 2;
-    if(type.is_float) {
-        if(type.size == 8) random_floats<double>((double *)data, n);
-        else random_floats<float>((float *)data, n);
-    } else {
-        float int_max = float(uint64_t(2) << (type.size*8));
-        if(type.is_signed) int_max /= 2.0;
-        for(unsigned int i=0; i<n; i++) {
-            float scaled_rand = (((float) (rand() - (RAND_MAX/2))) / 
static_cast<float>((RAND_MAX/2))) * int_max;
-            //man i really don't know how to do this in a more clever way, you 
have to cast down at some point
-            switch(type.size) {
-            case 8:
-                if(type.is_signed) ((int64_t *)data)[i] = (int64_t) 
scaled_rand;
-                else ((uint64_t *)data)[i] = (uint64_t) scaled_rand;
-            break;
-            case 4:
-                if(type.is_signed) ((int32_t *)data)[i] = (int32_t) 
scaled_rand;
-                else ((uint32_t *)data)[i] = (uint32_t) scaled_rand;
-            break;
-            case 2:
-                if(type.is_signed) ((int16_t *)data)[i] = (int16_t) 
scaled_rand;
-                else ((uint16_t *)data)[i] = (uint16_t) scaled_rand;
-            break;
-            case 1:
-                if(type.is_signed) ((int8_t *)data)[i] = (int8_t) scaled_rand;
-                else ((uint8_t *)data)[i] = (uint8_t) scaled_rand;
-            break;
-            default:
-                throw "load_random_data: no support for data size > 8 or < 1"; 
//no shenanigans here
-            }
-        }
-    }
-}
-
-static std::vector<std::string> get_arch_list(volk_func_desc_t desc) {
-    std::vector<std::string> archlist;
-
-    for(size_t i = 0; i < desc.n_impls; i++) {
-        //if(!(archs[i+1] & volk_get_lvarch())) continue; //this arch isn't 
available on this pc
-        archlist.push_back(std::string(desc.impl_names[i]));
-    }
-
-    return archlist;
-}
-
-volk_type_t volk_type_from_string(std::string name) {
-    volk_type_t type;
-    type.is_float = false;
-    type.is_scalar = false;
-    type.is_complex = false;
-    type.is_signed = false;
-    type.size = 0;
-    type.str = name;
-
-    if(name.size() < 2) throw std::string("name too short to be a datatype");
-
-    //is it a scalar?
-    if(name[0] == 's') {
-        type.is_scalar = true;
-        name = name.substr(1, name.size()-1);
-    }
-
-    //get the data size
-    size_t last_size_pos = name.find_last_of("0123456789");
-    if(last_size_pos == std::string::npos)
-      throw std::string("no size spec in type ").append(name);
-    //will throw if malformed
-    int size = boost::lexical_cast<int>(name.substr(0, last_size_pos+1));
-
-    assert(((size % 8) == 0) && (size <= 64) && (size != 0));
-    type.size = size/8; //in bytes
-
-    for(size_t i=last_size_pos+1; i < name.size(); i++) {
-        switch (name[i]) {
-        case 'f':
-            type.is_float = true;
-            break;
-        case 'i':
-            type.is_signed = true;
-            break;
-        case 'c':
-            type.is_complex = true;
-            break;
-        case 'u':
-            type.is_signed = false;
-            break;
-        default:
-            throw;
-        }
-    }
-
-    return type;
-}
-
-static void get_signatures_from_name(std::vector<volk_type_t> &inputsig,
-                                   std::vector<volk_type_t> &outputsig,
-                                   std::string name) {
-    boost::char_separator<char> sep("_");
-    boost::tokenizer<boost::char_separator<char> > tok(name, sep);
-    std::vector<std::string> toked;
-    tok.assign(name);
-    toked.assign(tok.begin(), tok.end());
-
-    assert(toked[0] == "volk");
-    toked.erase(toked.begin());
-
-    //ok. we're assuming a string in the form
-    //(sig)_(multiplier-opt)_..._(name)_(sig)_(multiplier-opt)_..._(alignment)
-
-    enum { SIDE_INPUT, SIDE_NAME, SIDE_OUTPUT } side = SIDE_INPUT;
-    std::string fn_name;
-    volk_type_t type;
-    BOOST_FOREACH(std::string token, toked) {
-        try {
-            type = volk_type_from_string(token);
-            if(side == SIDE_NAME) side = SIDE_OUTPUT; //if this is the first 
one after the name...
-
-            if(side == SIDE_INPUT) inputsig.push_back(type);
-            else outputsig.push_back(type);
-        } catch (...){
-            if(token[0] == 'x') { //it's a multiplier
-                if(side == SIDE_INPUT) assert(inputsig.size() > 0);
-                else assert(outputsig.size() > 0);
-                int multiplier = boost::lexical_cast<int>(token.substr(1, 
token.size()-1)); //will throw if invalid
-                for(int i=1; i<multiplier; i++) {
-                    if(side == SIDE_INPUT) inputsig.push_back(inputsig.back());
-                    else outputsig.push_back(outputsig.back());
-                }
-            }
-            else if(side == SIDE_INPUT) { //it's the function name, at least 
it better be
-                side = SIDE_NAME;
-                fn_name.append("_");
-                fn_name.append(token);
-            }
-            else if(side == SIDE_OUTPUT) {
-                if(token != toked.back()) throw; //the last token in the name 
is the alignment
-            }
-        }
-    }
-    //we don't need an output signature (some fn's operate on the input data, 
"in place"), but we do need at least one input!
-    assert(inputsig.size() != 0);
-
-}
-
-inline void run_cast_test1(volk_fn_1arg func, std::vector<void *> &buffs, 
unsigned int vlen, unsigned int iter, std::string arch) {
-    while(iter--) func(buffs[0], vlen, arch.c_str());
-}
-
-inline void run_cast_test2(volk_fn_2arg func, std::vector<void *> &buffs, 
unsigned int vlen, unsigned int iter, std::string arch) {
-    while(iter--) func(buffs[0], buffs[1], vlen, arch.c_str());
-}
-
-inline void run_cast_test3(volk_fn_3arg func, std::vector<void *> &buffs, 
unsigned int vlen, unsigned int iter, std::string arch) {
-    while(iter--) func(buffs[0], buffs[1], buffs[2], vlen, arch.c_str());
-}
-
-inline void run_cast_test4(volk_fn_4arg func, std::vector<void *> &buffs, 
unsigned int vlen, unsigned int iter, std::string arch) {
-    while(iter--) func(buffs[0], buffs[1], buffs[2], buffs[3], vlen, 
arch.c_str());
-}
-
-inline void run_cast_test1_s32f(volk_fn_1arg_s32f func, std::vector<void *> 
&buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-    while(iter--) func(buffs[0], scalar, vlen, arch.c_str());
-}
-
-inline void run_cast_test2_s32f(volk_fn_2arg_s32f func, std::vector<void *> 
&buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-    while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
-}
-
-inline void run_cast_test3_s32f(volk_fn_3arg_s32f func, std::vector<void *> 
&buffs, float scalar, unsigned int vlen, unsigned int iter, std::string arch) {
-    while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, 
arch.c_str());
-}
-
-inline void run_cast_test1_s32fc(volk_fn_1arg_s32fc func, std::vector<void *> 
&buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string 
arch) {
-    while(iter--) func(buffs[0], scalar, vlen, arch.c_str());
-}
-
-inline void run_cast_test2_s32fc(volk_fn_2arg_s32fc func, std::vector<void *> 
&buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string 
arch) {
-    while(iter--) func(buffs[0], buffs[1], scalar, vlen, arch.c_str());
-}
-
-inline void run_cast_test3_s32fc(volk_fn_3arg_s32fc func, std::vector<void *> 
&buffs, lv_32fc_t scalar, unsigned int vlen, unsigned int iter, std::string 
arch) {
-    while(iter--) func(buffs[0], buffs[1], buffs[2], scalar, vlen, 
arch.c_str());
-}
-
-// This function is a nop that helps resolve GNU Radio bugs 582 and 583.
-// Without this the cast in run_volk_tests for tol_i = static_cast<int>(float 
tol)
-// won't happen on armhf (reported on cortex A9 and A15).
-void lv_force_cast_hf( int tol_i, float tol_f)
-{
-    int diff_i = 1;
-    float diff_f = 1;
-    if( diff_i > tol_i )
-        std::cout << "" ;
-    if( diff_f > tol_f )
-        std::cout << "" ;
-}
-
-template <class t>
-bool fcompare(t *in1, t *in2, unsigned int vlen, float tol) {
-    bool fail = false;
-    int print_max_errs = 10;
-    for(unsigned int i=0; i<vlen; i++) {
-        // for very small numbers we'll see round off errors due to limited
-        // precision. So a special test case...
-        if(fabs(((t *)(in1))[i]) < 1e-30) {
-            if( fabs( ((t *)(in2))[i] ) > tol )
-            {
-                fail=true;
-                if(print_max_errs-- > 0) {
-                    std::cout << "offset " << i << " in1: " << t(((t 
*)(in1))[i]) << " in2: " << t(((t *)(in2))[i]) << std::endl;
-                }
-            }
-        }
-        // the primary test is the percent different greater than given tol
-        else if(fabs(((t *)(in1))[i] - ((t *)(in2))[i])/fabs(((t *)in1)[i]) > 
tol) {
-            fail=true;
-            if(print_max_errs-- > 0) {
-                std::cout << "offset " << i << " in1: " << t(((t *)(in1))[i]) 
<< " in2: " << t(((t *)(in2))[i]) << std::endl;
-            }
-        }
-    }
-
-    return fail;
-}
-
-template <class t>
-bool ccompare(t *in1, t *in2, unsigned int vlen, float tol) {
-    bool fail = false;
-    int print_max_errs = 10;
-    for(unsigned int i=0; i<2*vlen; i+=2) {
-        t diff[2] = { in1[i] - in2[i], in1[i+1] - in2[i+1] };
-        t err  = std::sqrt(diff[0] * diff[0] + diff[1] * diff[1]);
-        t norm = std::sqrt(in1[i] * in1[i] + in1[i+1] * in1[i+1]);
-
-        // for very small numbers we'll see round off errors due to limited
-        // precision. So a special test case...
-        if (norm < 1e-30) {
-            if (err > tol)
-            {
-                fail=true;
-                if(print_max_errs-- > 0) {
-                    std::cout << "offset " << i/2 << " in1: " << in1[i] << " + 
" << in1[i+1] << "j  in2: " << in2[i] << " + " << in2[i+1] << "j" << std::endl;
-                }
-            }
-        }
-        // the primary test is the percent different greater than given tol
-        else if((err / norm) > tol) {
-            fail=true;
-            if(print_max_errs-- > 0) {
-                std::cout << "offset " << i/2 << " in1: " << in1[i] << " + " 
<< in1[i+1] << "j  in2: " << in2[i] << " + " << in2[i+1] << "j" << std::endl;
-            }
-        }
-    }
-
-    return fail;
-}
-
-template <class t>
-bool icompare(t *in1, t *in2, unsigned int vlen, unsigned int tol) {
-    bool fail = false;
-    int print_max_errs = 10;
-    for(unsigned int i=0; i<vlen; i++) {
-        if(abs(int(((t *)(in1))[i]) - int(((t *)(in2))[i])) > tol) {
-            fail=true;
-            if(print_max_errs-- > 0) {
-                std::cout << "offset " << i << " in1: " << 
static_cast<int>(t(((t *)(in1))[i])) << " in2: " << static_cast<int>(t(((t 
*)(in2))[i])) << std::endl;
-            }
-        }
-    }
-
-    return fail;
-}
-
-class volk_qa_aligned_mem_pool{
-public:
-    void *get_new(size_t size){
-        size_t alignment = volk_get_alignment();
-        void* ptr = volk_malloc(size, alignment);
-        memset(ptr, 0x00, size);
-        _mems.push_back(ptr);
-        return ptr;
-    }
-    ~volk_qa_aligned_mem_pool() {
-        for(unsigned int ii = 0; ii < _mems.size(); ++ii) {
-            volk_free(_mems[ii]);
-        }
-    }
-private: std::vector<void * > _mems;
-};
-
-bool run_volk_tests(volk_func_desc_t desc,
-                    void (*manual_func)(),
-                    std::string name,
-                    float tol,
-                    lv_32fc_t scalar,
-                    int vlen,
-                    int iter,
-                    std::vector<volk_test_results_t> *results,
-                    std::string puppet_master_name,
-                    bool benchmark_mode, 
-                    std::string kernel_regex
-                   ) {
-    boost::xpressive::sregex kernel_expression = 
boost::xpressive::sregex::compile(kernel_regex);
-    if( !boost::xpressive::regex_search(name, kernel_expression) ) {
-        // in this case we have a regex and are only looking to test one kernel
-        return false;
-    }
-    if(results) {
-        results->push_back(volk_test_results_t()); 
-        results->back().name = name;
-        results->back().vlen = vlen;
-        results->back().iter = iter;
-    }
-    std::cout << "RUN_VOLK_TESTS: " << name << "(" << vlen << "," << iter << 
")" << std::endl;
-
-    // The multiply and lv_force_cast_hf are work arounds for GNU Radio bugs 
582 and 583
-    // The bug is the casting/assignment below do not happen, which results in 
false
-    // positives when testing for errors in fcompare and icompare.
-    // Since this only happens on armhf (reported for Cortex A9 and A15) 
combined with
-    // the following fixes it is suspected to be a compiler bug.
-    // Bug 1272024 on launchpad has been filed with Linaro GCC.
-    const float tol_f = tol*1.0000001;
-    const unsigned int tol_i = static_cast<const unsigned int>(tol);
-    lv_force_cast_hf( tol_i, tol_f );
-
-    //first let's get a list of available architectures for the test
-    std::vector<std::string> arch_list = get_arch_list(desc);
-
-    if((!benchmark_mode) && (arch_list.size() < 2)) {
-        std::cout << "no architectures to test" << std::endl;
-        return false;
-    }
-
-    //something that can hang onto memory and cleanup when this function exits
-    volk_qa_aligned_mem_pool mem_pool;
-
-    //now we have to get a function signature by parsing the name
-    std::vector<volk_type_t> inputsig, outputsig;
-    get_signatures_from_name(inputsig, outputsig, name);
-
-    //pull the input scalars into their own vector
-    std::vector<volk_type_t> inputsc;
-    for(size_t i=0; i<inputsig.size(); i++) {
-        if(inputsig[i].is_scalar) {
-            inputsc.push_back(inputsig[i]);
-            inputsig.erase(inputsig.begin() + i);
-            i -= 1;
-        }
-    }
-    //for(int i=0; i<inputsig.size(); i++) std::cout << "Input: " << 
inputsig[i].str << std::endl;
-    //for(int i=0; i<outputsig.size(); i++) std::cout << "Output: " << 
outputsig[i].str << std::endl;
-    std::vector<void *> inbuffs;
-    BOOST_FOREACH(volk_type_t sig, inputsig) {
-        if(!sig.is_scalar) //we don't make buffers for scalars
-          inbuffs.push_back(mem_pool.get_new(vlen*sig.size*(sig.is_complex ? 2 
: 1)));
-    }
-    for(size_t i=0; i<inbuffs.size(); i++) {
-        load_random_data(inbuffs[i], inputsig[i], vlen);
-    }
-
-    //ok let's make a vector of vector of void buffers, which holds the 
input/output vectors for each arch
-    std::vector<std::vector<void *> > test_data;
-    for(size_t i=0; i<arch_list.size(); i++) {
-        std::vector<void *> arch_buffs;
-        for(size_t j=0; j<outputsig.size(); j++) {
-            
arch_buffs.push_back(mem_pool.get_new(vlen*outputsig[j].size*(outputsig[j].is_complex
 ? 2 : 1)));
-        }
-        for(size_t j=0; j<inputsig.size(); j++) {
-            arch_buffs.push_back(inbuffs[j]);
-        }
-        test_data.push_back(arch_buffs);
-    }
-
-    std::vector<volk_type_t> both_sigs;
-    both_sigs.insert(both_sigs.end(), outputsig.begin(), outputsig.end());
-    both_sigs.insert(both_sigs.end(), inputsig.begin(), inputsig.end());
-
-    //now run the test
-    clock_t start, end;
-    std::vector<double> profile_times;
-    for(size_t i = 0; i < arch_list.size(); i++) {
-        start = clock();
-
-        switch(both_sigs.size()) {
-            case 1:
-                if(inputsc.size() == 0) {
-                    run_cast_test1((volk_fn_1arg)(manual_func), test_data[i], 
vlen, iter, arch_list[i]);
-                } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    if(inputsc[0].is_complex) {
-                        
run_cast_test1_s32fc((volk_fn_1arg_s32fc)(manual_func), test_data[i], scalar, 
vlen, iter, arch_list[i]);
-                    } else {
-                        run_cast_test1_s32f((volk_fn_1arg_s32f)(manual_func), 
test_data[i], scalar.real(), vlen, iter, arch_list[i]);
-                    }
-                } else throw "unsupported 1 arg function >1 scalars";
-                break;
-            case 2:
-                if(inputsc.size() == 0) {
-                    run_cast_test2((volk_fn_2arg)(manual_func), test_data[i], 
vlen, iter, arch_list[i]);
-                } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    if(inputsc[0].is_complex) {
-                        
run_cast_test2_s32fc((volk_fn_2arg_s32fc)(manual_func), test_data[i], scalar, 
vlen, iter, arch_list[i]);
-                    } else {
-                        run_cast_test2_s32f((volk_fn_2arg_s32f)(manual_func), 
test_data[i], scalar.real(), vlen, iter, arch_list[i]);
-                    }
-                } else throw "unsupported 2 arg function >1 scalars";
-                break;
-            case 3:
-                if(inputsc.size() == 0) {
-                    run_cast_test3((volk_fn_3arg)(manual_func), test_data[i], 
vlen, iter, arch_list[i]);
-                } else if(inputsc.size() == 1 && inputsc[0].is_float) {
-                    if(inputsc[0].is_complex) {
-                        
run_cast_test3_s32fc((volk_fn_3arg_s32fc)(manual_func), test_data[i], scalar, 
vlen, iter, arch_list[i]);
-                    } else {
-                        run_cast_test3_s32f((volk_fn_3arg_s32f)(manual_func), 
test_data[i], scalar.real(), vlen, iter, arch_list[i]);
-                    }
-                } else throw "unsupported 3 arg function >1 scalars";
-                break;
-            case 4:
-                run_cast_test4((volk_fn_4arg)(manual_func), test_data[i], 
vlen, iter, arch_list[i]);
-                break;
-            default:
-                throw "no function handler for this signature";
-                break;
-        }
-
-        end = clock();
-        double arch_time = 1000.0 * (double)(end-start)/(double)CLOCKS_PER_SEC;
-        std::cout << arch_list[i] << " completed in " << arch_time << "ms" << 
std::endl;
-        if(results) {
-            volk_test_time_t result;
-            result.name = arch_list[i];
-            result.time = arch_time;
-            result.units = "ms";
-            results->back().results[result.name] = result;
-        }
-
-        profile_times.push_back(arch_time);
-    }
-
-    //and now compare each output to the generic output
-    //first we have to know which output is the generic one, they aren't in 
order...
-    size_t generic_offset=0;
-    for(size_t i=0; i<arch_list.size(); i++)
-        if(arch_list[i] == "generic") generic_offset=i;
-
-    //now compare
-    //if(outputsig.size() == 0) outputsig = inputsig; //a hack, i know
-
-    bool fail = false;
-    bool fail_global = false;
-    std::vector<bool> arch_results;
-    for(size_t i=0; i<arch_list.size(); i++) {
-        fail = false;
-        if(i != generic_offset) {
-            for(size_t j=0; j<both_sigs.size(); j++) {
-                if(both_sigs[j].is_float) {
-                    if(both_sigs[j].size == 8) {
-                        if (both_sigs[j].is_complex) {
-                            fail = ccompare((double *) 
test_data[generic_offset][j], (double *) test_data[i][j], vlen, tol_f);
-
-                        } else {
-                            fail = fcompare((double *) 
test_data[generic_offset][j], (double *) test_data[i][j], vlen, tol_f);
-                        }
-                    } else {
-                        if (both_sigs[j].is_complex) {
-                            fail = ccompare((float *) 
test_data[generic_offset][j], (float *) test_data[i][j], vlen, tol_f);
-                        } else {
-                            fail = fcompare((float *) 
test_data[generic_offset][j], (float *) test_data[i][j], vlen, tol_f);
-                        }
-                    }
-                } else {
-                    //i could replace this whole switch statement with a 
memcmp if i wasn't interested in printing the outputs where they differ
-                    switch(both_sigs[j].size) {
-                    case 8:
-                        if(both_sigs[j].is_signed) {
-                            fail = icompare((int64_t *) 
test_data[generic_offset][j], (int64_t *) test_data[i][j], 
vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
-                        } else {
-                            fail = icompare((uint64_t *) 
test_data[generic_offset][j], (uint64_t *) test_data[i][j], 
vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
-                        }
-                        break;
-                    case 4:
-                        if(both_sigs[j].is_signed) {
-                            fail = icompare((int32_t *) 
test_data[generic_offset][j], (int32_t *) test_data[i][j], 
vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
-                        } else {
-                            fail = icompare((uint32_t *) 
test_data[generic_offset][j], (uint32_t *) test_data[i][j], 
vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
-                        }
-                        break;
-                    case 2:
-                        if(both_sigs[j].is_signed) {
-                            fail = icompare((int16_t *) 
test_data[generic_offset][j], (int16_t *) test_data[i][j], 
vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
-                        } else {
-                            fail = icompare((uint16_t *) 
test_data[generic_offset][j], (uint16_t *) test_data[i][j], 
vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
-                        }
-                        break;
-                    case 1:
-                        if(both_sigs[j].is_signed) {
-                            fail = icompare((int8_t *) 
test_data[generic_offset][j], (int8_t *) test_data[i][j], 
vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
-                        } else {
-                            fail = icompare((uint8_t *) 
test_data[generic_offset][j], (uint8_t *) test_data[i][j], 
vlen*(both_sigs[j].is_complex ? 2 : 1), tol_i);
-                        }
-                        break;
-                    default:
-                        fail=1;
-                    }
-                }
-                if(fail) {
-                    fail_global = true;
-                    std::cout << name << ": fail on arch " << arch_list[i] << 
std::endl;
-                }
-                //fail = memcmp(outbuffs[generic_offset], outbuffs[i], 
outputsig[0].size * vlen * (outputsig[0].is_complex ? 2:1));
-            }
-        }
-        arch_results.push_back(!fail);
-    }
-
-    double best_time_a = std::numeric_limits<double>::max();
-    double best_time_u = std::numeric_limits<double>::max();
-    std::string best_arch_a = "generic";
-    std::string best_arch_u = "generic";
-    for(size_t i=0; i < arch_list.size(); i++)
-    {
-        if((profile_times[i] < best_time_u) && arch_results[i] && 
desc.impl_alignment[i] == 0)
-        {
-            best_time_u = profile_times[i];
-            best_arch_u = arch_list[i];
-        }
-        if((profile_times[i] < best_time_a) && arch_results[i])
-        {
-            best_time_a = profile_times[i];
-            best_arch_a = arch_list[i];
-        }
-    }
-
-    std::cout << "Best aligned arch: " << best_arch_a << std::endl;
-    std::cout << "Best unaligned arch: " << best_arch_u << std::endl;
-    if(results) {
-        if(puppet_master_name == "NULL") {
-            results->back().config_name = name;
-        } else {
-            results->back().config_name = puppet_master_name;
-        }
-        results->back().best_arch_a = best_arch_a;
-        results->back().best_arch_u = best_arch_u;
-    }
-
-    return fail_global;
-}
-
-
diff --git a/volk/lib/qa_utils.h b/volk/lib/qa_utils.h
deleted file mode 100644
index 7ca8b8d..0000000
--- a/volk/lib/qa_utils.h
+++ /dev/null
@@ -1,80 +0,0 @@
-#ifndef VOLK_QA_UTILS_H
-#define VOLK_QA_UTILS_H
-
-#include <cstdlib>
-#include <string>
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <map>
-#include <volk/volk.h>
-#include <volk/volk_common.h>
-
-struct volk_type_t {
-    bool is_float;
-    bool is_scalar;
-    bool is_signed;
-    bool is_complex;
-    int size;
-    std::string str;
-};
-
-volk_type_t volk_type_from_string(std::string);
-
-float uniform(void);
-void random_floats(float *buf, unsigned n);
-
-class volk_test_time_t {
-    public:
-        std::string name;
-        double time;
-        std::string units;
-};
-
-class volk_test_results_t {
-    public: 
-        std::string name;
-        std::string config_name;
-        int vlen;
-        int iter;
-        std::map<std::string, volk_test_time_t> results;
-        std::string best_arch_a;
-        std::string best_arch_u;
-};
-
-bool run_volk_tests(
-    volk_func_desc_t, 
-    void(*)(), 
-    std::string, 
-    float, 
-    lv_32fc_t, 
-    int, 
-    int, 
-    std::vector<volk_test_results_t> *results = NULL, 
-    std::string puppet_master_name = "NULL",
-    bool benchmark_mode=false, 
-    std::string kernel_regex=""
-    );
-
-
-#define VOLK_RUN_TESTS(func, tol, scalar, len, iter) \
-    BOOST_AUTO_TEST_CASE(func##_test) { \
-        BOOST_CHECK_EQUAL(run_volk_tests( \
-            func##_get_func_desc(), (void (*)())func##_manual, \
-            std::string(#func), tol, scalar, len, iter, 0, "NULL"), \
-          0); \
-    }
-#define VOLK_PROFILE(func, tol, scalar, len, iter, results, bnmode, 
kernel_regex) run_volk_tests(func##_get_func_desc(), (void (*)())func##_manual, 
std::string(#func), tol, scalar, len, iter, results, "NULL", bnmode, 
kernel_regex)
-#define VOLK_PUPPET_PROFILE(func, puppet_master_func, tol, scalar, len, iter, 
results, bnmode, kernel_regex) run_volk_tests(func##_get_func_desc(), (void 
(*)())func##_manual, std::string(#func), tol, scalar, len, iter, results, 
std::string(#puppet_master_func), bnmode, kernel_regex)
-typedef void (*volk_fn_1arg)(void *, unsigned int, const char*); //one input, 
operate in place
-typedef void (*volk_fn_2arg)(void *, void *, unsigned int, const char*);
-typedef void (*volk_fn_3arg)(void *, void *, void *, unsigned int, const 
char*);
-typedef void (*volk_fn_4arg)(void *, void *, void *, void *, unsigned int, 
const char*);
-typedef void (*volk_fn_1arg_s32f)(void *, float, unsigned int, const char*); 
//one input vector, one scalar float input
-typedef void (*volk_fn_2arg_s32f)(void *, void *, float, unsigned int, const 
char*);
-typedef void (*volk_fn_3arg_s32f)(void *, void *, void *, float, unsigned int, 
const char*);
-typedef void (*volk_fn_1arg_s32fc)(void *, lv_32fc_t, unsigned int, const 
char*); //one input vector, one scalar float input
-typedef void (*volk_fn_2arg_s32fc)(void *, void *, lv_32fc_t, unsigned int, 
const char*);
-typedef void (*volk_fn_3arg_s32fc)(void *, void *, void *, lv_32fc_t, unsigned 
int, const char*);
-
-#endif //VOLK_QA_UTILS_H
diff --git a/volk/lib/testqa.cc b/volk/lib/testqa.cc
deleted file mode 100644
index 7807ce4..0000000
--- a/volk/lib/testqa.cc
+++ /dev/null
@@ -1,126 +0,0 @@
-/* -*- c++ -*- */
-/*
- * Copyright 2012-2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#include "qa_utils.h"
-#include <volk/volk.h>
-#include <boost/test/unit_test.hpp>
-
-//VOLK_RUN_TESTS(volk_16i_x5_add_quad_16i_x4, 1e-4, 2046, 10000);
-//VOLK_RUN_TESTS(volk_16i_branch_4_state_8, 1e-4, 2046, 10000);
-VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_real_32f, 1e-5, 32768.0, 20462, 1);
-VOLK_RUN_TESTS(volk_16ic_deinterleave_real_8i, 0, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_16ic_deinterleave_16i_x2, 0, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_16ic_s32f_deinterleave_32f_x2, 1e-4, 32768.0, 20462, 1);
-VOLK_RUN_TESTS(volk_16ic_deinterleave_real_16i, 0, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_16ic_magnitude_16i, 1, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_16ic_s32f_magnitude_32f, 1e-5, 32768.0, 20462, 1);
-VOLK_RUN_TESTS(volk_16i_s32f_convert_32f, 1e-4, 32768.0, 20462, 1);
-VOLK_RUN_TESTS(volk_16i_convert_8i, 0, 0, 20462, 1);
-//VOLK_RUN_TESTS(volk_16i_max_star_16i, 0, 0, 20462, 10000);
-//VOLK_RUN_TESTS(volk_16i_max_star_horizontal_16i, 0, 0, 20462, 10000);
-//VOLK_RUN_TESTS(volk_16i_permute_and_scalar_add, 1e-4, 0, 2046, 1000);
-//VOLK_RUN_TESTS(volk_16i_x4_quad_max_star_16i, 1e-4, 0, 2046, 1000);
-VOLK_RUN_TESTS(volk_16u_byteswap, 0, 0, 20462, 1);
-//VOLK_RUN_TESTS(volk_16i_32fc_dot_prod_32fc, 1e-4, 0, 204602, 1);
-VOLK_RUN_TESTS(volk_32f_accumulator_s32f, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_x2_add_32f, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32fc_32f_multiply_32fc, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_log2_32f, 1.5e-1, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_expfast_32f, 1e-1, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_x2_pow_32f, 1e-2, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_sin_32f, 1e-6, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_cos_32f, 1e-6, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_tan_32f, 1e-6, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_atan_32f, 1e-3, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_asin_32f, 1e-3, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_acos_32f, 1e-3, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32fc_s32f_power_32fc, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_s32f_calc_spectral_noise_floor_32f, 1e-4, 20.0, 20462, 
1);
-VOLK_RUN_TESTS(volk_32fc_s32f_atan2_32f, 1e-4, 10.0, 20462, 1);
-//VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc, 1e-4, 0, 2046, 10000);
-VOLK_RUN_TESTS(volk_32fc_x2_conjugate_dot_prod_32fc, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32fc_deinterleave_32f_x2, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32fc_deinterleave_64f_x2, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32fc_s32f_deinterleave_real_16i, 0, 32768, 20462, 1);
-VOLK_RUN_TESTS(volk_32fc_deinterleave_real_32f, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32fc_deinterleave_imag_32f, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32fc_deinterleave_real_64f, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32fc_x2_dot_prod_32fc, 1e-4, 0, 204603, 1);
-VOLK_RUN_TESTS(volk_32fc_32f_dot_prod_32fc, 1e-4, 0, 204602, 1);
-VOLK_RUN_TESTS(volk_32fc_index_max_16u, 3, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32fc_s32f_magnitude_16i, 1, 32768, 20462, 1);
-VOLK_RUN_TESTS(volk_32fc_magnitude_32f, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_s32f_convert_16i, 1, 32768, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_s32f_convert_32i, 1, 1<<31, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_convert_64f, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_s32f_convert_8i, 1, 128, 20462, 1);
-//VOLK_RUN_TESTS(volk_32fc_s32f_x2_power_spectral_density_32f, 1e-4, 2046, 
10000);
-VOLK_RUN_TESTS(volk_32fc_s32f_power_spectrum_32f, 1e-4, 0, 2046, 1);
-VOLK_RUN_TESTS(volk_32fc_x2_square_dist_32f, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32fc_x2_s32f_square_dist_scalar_mult_32f, 1e-4, 10, 20462, 
1);
-VOLK_RUN_TESTS(volk_32f_x2_divide_32f, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_x2_dot_prod_32f, 1e-4, 0, 204602, 1);
-VOLK_RUN_TESTS(volk_32f_x2_dot_prod_16i, 1e-4, 0, 204602, 1);
-//VOLK_RUN_TESTS(volk_32f_s32f_32f_fm_detect_32f, 1e-4, 2046, 10000);
-VOLK_RUN_TESTS(volk_32f_index_max_16u, 3, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_x2_s32f_interleave_16ic, 1, 32767, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_x2_interleave_32fc, 0, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_x2_max_32f, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_x2_min_32f, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_s32f_normalize, 1e-4, 100, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_s32f_power_32f, 1e-4, 4, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_sqrt_32f, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_s32f_stddev_32f, 1e-4, 100, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_stddev_and_mean_32f_x2, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_x2_subtract_32f, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_x3_sum_of_poly_32f, 1e-2, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32i_x2_and_32i, 0, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32i_s32f_convert_32f, 1e-4, 100, 20462, 1);
-VOLK_RUN_TESTS(volk_32i_x2_or_32i, 0, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32u_byteswap, 0, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32u_popcntpuppet_32u, 0, 0, 2046, 10000);
-VOLK_RUN_TESTS(volk_64f_convert_32f, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_64f_x2_max_64f, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_64f_x2_min_64f, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_64u_byteswap, 0, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_64u_popcntpuppet_64u, 0, 0, 2046, 10000);
-VOLK_RUN_TESTS(volk_8ic_deinterleave_16i_x2, 0, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_32f_x2, 1e-4, 100, 20462, 1);
-VOLK_RUN_TESTS(volk_8ic_deinterleave_real_16i, 0, 256, 20462, 1);
-VOLK_RUN_TESTS(volk_8ic_s32f_deinterleave_real_32f, 1e-4, 100, 20462, 1);
-VOLK_RUN_TESTS(volk_8ic_deinterleave_real_8i, 0, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_8ic_x2_multiply_conjugate_16ic, 0, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_8ic_x2_s32f_multiply_conjugate_32fc, 1e-4, 100, 20462, 1);
-VOLK_RUN_TESTS(volk_8i_convert_16i, 0, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_8i_s32f_convert_32f, 1e-4, 100, 20462, 1);
-VOLK_RUN_TESTS(volk_32fc_x2_multiply_32fc, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32fc_x2_multiply_conjugate_32fc, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32fc_conjugate_32fc, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_x2_multiply_32f, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32fc_s32fc_multiply_32fc, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_s32f_multiply_32f, 1e-4, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32fc_s32fc_rotatorpuppet_32fc, 1e-3, 
(lv_32fc_t)lv_cmake(0.953939201, 0.3), 20462, 1);
-VOLK_RUN_TESTS(volk_8u_conv_k7_r2puppet_8u, 0, 0, 2060, 1);
-VOLK_RUN_TESTS(volk_32f_invsqrt_32f, 1e-2, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_binary_slicer_32i, 0, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_binary_slicer_8i, 0, 0, 20462, 1);
-VOLK_RUN_TESTS(volk_32f_tanh_32f, 1e-6, 0, 20462, 1);
diff --git a/volk/lib/volk_malloc.c b/volk/lib/volk_malloc.c
deleted file mode 100644
index d5e1709..0000000
--- a/volk/lib/volk_malloc.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/* -*- c -*- */
-/*
- * Copyright 2014 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#include <pthread.h>
-#include <volk/volk_malloc.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-/*
- * For #defines used to determine support for allocation functions,
- * see: http://linux.die.net/man/3/aligned_alloc
-*/
-
-// Disabling use of aligned_alloc. This function requires that size be
-// a multiple of alignment, which is too restrictive for many uses of
-// VOLK.
-
-//// If we are using C11 standard, use the aligned_alloc
-//#ifdef _ISOC11_SOURCE
-//
-//void *volk_malloc(size_t size, size_t alignment)
-//{
-//  void *ptr = aligned_alloc(alignment, size);
-//  if(ptr == NULL) {
-//    fprintf(stderr, "VOLK: Error allocating memory (aligned_alloc)\n");
-//  }
-//  return ptr;
-//}
-//
-//void volk_free(void *ptr)
-//{
-//  free(ptr);
-//}
-//
-//#else // _ISOC11_SOURCE
-
-// Otherwise, test if we are a POSIX or X/Open system
-// This only has a restriction that alignment be a power of 2.
-#if _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || HAVE_POSIX_MEMALIGN
-
-void *volk_malloc(size_t size, size_t alignment)
-{
-  void *ptr;
-  int err = posix_memalign(&ptr, alignment, size);
-  if(err == 0) {
-    return ptr;
-  }
-  else {
-    fprintf(stderr, "VOLK: Error allocating memory (posix_memalign: %d)\n", 
err);
-    return NULL;
-  }
-}
-
-void volk_free(void *ptr)
-{
-  free(ptr);
-}
-
-// _aligned_malloc has no restriction on size,
-// available on Windows since Visual C++ 2005
-#elif _MSC_VER >= 1400
-
-void *volk_malloc(size_t size, size_t alignment)
-{
-  void *ptr = _aligned_malloc(size, alignment);
-  if(ptr == NULL) {
-    fprintf(stderr, "VOLK: Error allocating memory (_aligned_malloc)\n");
-  }
-  return ptr;
-}
-
-void volk_free(void *ptr)
-{
-  _aligned_free(ptr);
-}
-
-// No standard handlers; we'll do it ourselves.
-#else // _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || 
HAVE_POSIX_MEMALIGN
-
-struct block_info
-{
-  void *real;
-};
-
-void *
-volk_malloc(size_t size, size_t alignment)
-{
-  void *real, *user;
-  struct block_info *info;
-
-  /* At least align to sizeof our struct */
-  if (alignment < sizeof(struct block_info))
-    alignment = sizeof(struct block_info);
-
-  /* Alloc */
-  real = malloc(size + (2 * alignment - 1));
-
-  /* Get pointer to the various zones */
-  user = (void *)((((uintptr_t) real) + sizeof(struct block_info) + alignment 
- 1) & ~(alignment - 1));
-  info = (struct block_info *)(((uintptr_t)user) - sizeof(struct block_info));
-
-  /* Store the info for the free */
-  info->real = real;
-
-  /* Return pointer to user */
-  return user;
-}
-
-void
-volk_free(void *ptr)
-{
-  struct block_info *info;
-
-  /* Get the real pointer */
-  info = (struct block_info *)(((uintptr_t)ptr) - sizeof(struct block_info));
-
-  /* Release real pointer */
-  free(info->real);
-}
-
-#endif // _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || 
HAVE_POSIX_MEMALIGN
-
-//#endif // _ISOC11_SOURCE
diff --git a/volk/lib/volk_prefs.c b/volk/lib/volk_prefs.c
deleted file mode 100644
index f787b5e..0000000
--- a/volk/lib/volk_prefs.c
+++ /dev/null
@@ -1,50 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <volk/volk_prefs.h>
-
-//#if defined(_WIN32)
-//#include <Windows.h>
-//#endif
-
-void volk_get_config_path(char *path)
-{
-    const char *suffix = "/.volk/volk_config";
-    char *home = NULL;
-    if (home == NULL) home = getenv("HOME");
-    if (home == NULL) home = getenv("APPDATA");
-    if (home == NULL){
-        path = NULL;
-        return;
-    }
-    strcpy(path, home);
-    strcat(path, suffix);
-}
-
-size_t volk_load_preferences(volk_arch_pref_t **prefs_res)
-{
-    FILE *config_file;
-    char path[512], line[512];
-    size_t n_arch_prefs = 0;
-    volk_arch_pref_t *prefs = NULL;
-
-    //get the config path
-    volk_get_config_path(path);
-    if (path == NULL) return n_arch_prefs; //no prefs found
-    config_file = fopen(path, "r");
-    if(!config_file) return n_arch_prefs; //no prefs found
-
-    //reset the file pointer and write the prefs into volk_arch_prefs
-    while(fgets(line, sizeof(line), config_file) != NULL)
-    {
-        prefs = (volk_arch_pref_t *) realloc(prefs, (n_arch_prefs+1) * 
sizeof(*prefs));
-        volk_arch_pref_t *p = prefs + n_arch_prefs;
-        if(sscanf(line, "%s %s %s", p->name, p->impl_a, p->impl_u) == 3 && 
!strncmp(p->name, "volk_", 5))
-        {
-            n_arch_prefs++;
-        }
-    }
-    fclose(config_file);
-    *prefs_res = prefs;
-    return n_arch_prefs;
-}
diff --git a/volk/lib/volk_rank_archs.c b/volk/lib/volk_rank_archs.c
deleted file mode 100644
index 0e98ac0..0000000
--- a/volk/lib/volk_rank_archs.c
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright 2011-2012 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#include <volk_rank_archs.h>
-#include <volk/volk_prefs.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#if __GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ >= 4
-    #define __popcnt __builtin_popcount
-#else
-    inline unsigned __popcnt(unsigned num)
-    {
-        unsigned pop = 0;
-        while(num)
-        {
-            if (num & 0x1) pop++;
-            num >>= 1;
-        }
-        return pop;
-    }
-#endif
-
-int volk_get_index(
-    const char *impl_names[], //list of implementations by name
-    const size_t n_impls,     //number of implementations available
-    const char *impl_name     //the implementation name to find
-){
-    unsigned int i;
-    for (i = 0; i < n_impls; i++) {
-        if(!strncmp(impl_names[i], impl_name, 20)) {
-            return i;
-        }
-    }
-    //TODO return -1;
-    //something terrible should happen here
-    printf("Volk warning: no arch found, returning generic impl\n");
-    return volk_get_index(impl_names, n_impls, "generic"); //but we'll fake it 
for now
-}
-
-int volk_rank_archs(
-    const char *kern_name,    //name of the kernel to rank
-    const char *impl_names[], //list of implementations by name
-    const int* impl_deps,     //requirement mask per implementation
-    const bool* alignment,    //alignment status of each implementation
-    size_t n_impls,            //number of implementations available
-    const bool align          //if false, filter aligned implementations
-){
-  size_t i;
-  static volk_arch_pref_t *volk_arch_prefs;
-  static size_t n_arch_prefs = 0;
-  static int prefs_loaded = 0;
-  if(!prefs_loaded) {
-      n_arch_prefs = volk_load_preferences(&volk_arch_prefs);
-      prefs_loaded = 1;
-  }
-
-  // If we've defined VOLK_GENERIC to be anything, always return the
-  // 'generic' kernel. Used in GR's QA code.
-  char *gen_env = getenv("VOLK_GENERIC");
-  if(gen_env) {
-    return volk_get_index(impl_names, n_impls, "generic");
-  }
-
-    //now look for the function name in the prefs list
-    for(i = 0; i < n_arch_prefs; i++)
-    {
-        if(!strncmp(kern_name, volk_arch_prefs[i].name, 
sizeof(volk_arch_prefs[i].name))) //found it
-        {
-            const char *impl_name = align? volk_arch_prefs[i].impl_a : 
volk_arch_prefs[i].impl_u;
-            return volk_get_index(impl_names, n_impls, impl_name);
-        }
-    }
-
-    //return the best index with the largest deps
-    size_t best_index_a = 0;
-    size_t best_index_u = 0;
-    int best_value_a = -1;
-    int best_value_u = -1;
-    for(i = 0; i < n_impls; i++)
-    {
-        const signed val = __popcnt(impl_deps[i]);
-        if (alignment[i] && val > best_value_a)
-        {
-            best_index_a = i;
-            best_value_a = val;
-        }
-        if (!alignment[i] && val > best_value_u)
-        {
-            best_index_u = i;
-            best_value_u = val;
-        }
-    }
-
-    //when align and we found a best aligned, use it
-    if (align && best_value_a != -1) return best_index_a;
-
-    //otherwise return the best unaligned
-    return best_index_u;
-}
diff --git a/volk/lib/volk_rank_archs.h b/volk/lib/volk_rank_archs.h
deleted file mode 100644
index b3bf8ff..0000000
--- a/volk/lib/volk_rank_archs.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright 2011-2012 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_VOLK_RANK_ARCHS_H
-#define INCLUDED_VOLK_RANK_ARCHS_H
-
-#include <stdlib.h>
-#include <stdbool.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-int volk_get_index(
-    const char *impl_names[], //list of implementations by name
-    const size_t n_impls,     //number of implementations available
-    const char *impl_name     //the implementation name to find
-);
-
-int volk_rank_archs(
-    const char *kern_name,    //name of the kernel to rank
-    const char *impl_names[], //list of implementations by name
-    const int* impl_deps,     //requirement mask per implementation
-    const bool* alignment,    //alignment status of each implementation
-    size_t n_impls,            //number of implementations available
-    const bool align          //if false, filter aligned implementations
-);
-
-#ifdef __cplusplus
-}
-#endif
-#endif /*INCLUDED_VOLK_RANK_ARCHS_H*/
diff --git a/volk/orc/volk_16ic_deinterleave_16i_x2_a_orc_impl.orc 
b/volk/orc/volk_16ic_deinterleave_16i_x2_a_orc_impl.orc
deleted file mode 100644
index 76faa93..0000000
--- a/volk/orc/volk_16ic_deinterleave_16i_x2_a_orc_impl.orc
+++ /dev/null
@@ -1,5 +0,0 @@
-.function volk_16ic_deinterleave_16i_x2_a_orc_impl
-.dest 2 idst
-.dest 2 qdst
-.source 4 src
-splitlw qdst, idst, src
diff --git a/volk/orc/volk_16ic_deinterleave_real_8i_a_orc_impl.orc 
b/volk/orc/volk_16ic_deinterleave_real_8i_a_orc_impl.orc
deleted file mode 100644
index 8db49fd..0000000
--- a/volk/orc/volk_16ic_deinterleave_real_8i_a_orc_impl.orc
+++ /dev/null
@@ -1,6 +0,0 @@
-.function volk_16ic_deinterleave_real_8i_a_orc_impl
-.dest 1 dst
-.source 4 src
-.temp 2 iw
-select0lw iw, src
-convhwb dst, iw
diff --git a/volk/orc/volk_16ic_magnitude_16i_a_orc_impl.orc 
b/volk/orc/volk_16ic_magnitude_16i_a_orc_impl.orc
deleted file mode 100644
index fbaebc4..0000000
--- a/volk/orc/volk_16ic_magnitude_16i_a_orc_impl.orc
+++ /dev/null
@@ -1,23 +0,0 @@
-.function volk_16ic_magnitude_16i_a_orc_impl
-.source 4 src
-.dest 2 dst
-.floatparam 4 scalar
-.temp 8 iql
-.temp 8 iqf
-.temp 8 prodiqf
-.temp 4 qf
-.temp 4 if
-.temp 4 sumf
-.temp 4 rootf
-.temp 4 rootl
-
-x2 convswl iql, src
-x2 convlf iqf, iql
-x2 divf iqf, iqf, scalar
-x2 mulf prodiqf, iqf, iqf
-splitql qf, if, prodiqf
-addf sumf, if, qf
-sqrtf rootf, sumf
-mulf rootf, rootf, scalar
-convfl rootl, rootf
-convlw dst, rootl
diff --git a/volk/orc/volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl.orc 
b/volk/orc/volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl.orc
deleted file mode 100644
index fd8915d..0000000
--- a/volk/orc/volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl.orc
+++ /dev/null
@@ -1,12 +0,0 @@
-.function volk_16ic_s32f_deinterleave_32f_x2_a_orc_impl
-.dest 4 idst
-.dest 4 qdst
-.source 4 src
-.floatparam 4 scalar
-.temp 8 iql
-.temp 8 iqf
-
-x2 convswl iql, src
-x2 convlf iqf, iql
-x2 divf iqf, iqf, scalar
-splitql qdst, idst, iqf
diff --git a/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc 
b/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc
deleted file mode 100644
index 66fef7d..0000000
--- a/volk/orc/volk_16sc_magnitude_32f_aligned16_orc_impl.orc
+++ /dev/null
@@ -1,25 +0,0 @@
-.function volk_16ic_magnitude_32f_a_orc_impl
-.source 4 src
-.dest 4 dst
-.floatparam 4 scalar
-.temp 4 reall
-.temp 4 imagl
-.temp 2 reals
-.temp 2 imags
-.temp 4 realf
-.temp 4 imagf
-.temp 4 sumf
-
-
-
-splitlw reals, imags, src
-convswl reall, reals
-convswl imagl, imags
-convlf realf, reall
-convlf imagf, imagl
-divf realf, realf, scalar
-divf imagf, imagf, scalar
-mulf realf, realf, realf
-mulf imagf, imagf, imagf
-addf sumf, realf, imagf
-sqrtf dst, sumf
diff --git a/volk/orc/volk_16u_byteswap_a_orc_impl.orc 
b/volk/orc/volk_16u_byteswap_a_orc_impl.orc
deleted file mode 100644
index b96ba84..0000000
--- a/volk/orc/volk_16u_byteswap_a_orc_impl.orc
+++ /dev/null
@@ -1,3 +0,0 @@
-.function volk_16u_byteswap_a_orc_impl
-.dest 2 dst
-swapw dst, dst
diff --git a/volk/orc/volk_32f_s32f_multiply_32f_a_orc_impl.orc 
b/volk/orc/volk_32f_s32f_multiply_32f_a_orc_impl.orc
deleted file mode 100644
index ea23fc0..0000000
--- a/volk/orc/volk_32f_s32f_multiply_32f_a_orc_impl.orc
+++ /dev/null
@@ -1,5 +0,0 @@
-.function volk_32f_s32f_multiply_32f_a_orc_impl
-.dest 4 dst
-.source 4 src1
-.floatparam 4 scalar
-mulf dst, src1, scalar
diff --git a/volk/orc/volk_32f_s32f_normalize_a_orc_impl.orc 
b/volk/orc/volk_32f_s32f_normalize_a_orc_impl.orc
deleted file mode 100644
index 986fdf6..0000000
--- a/volk/orc/volk_32f_s32f_normalize_a_orc_impl.orc
+++ /dev/null
@@ -1,5 +0,0 @@
-.function volk_32f_s32f_normalize_a_orc_impl
-.source 4 src1
-.floatparam 4 invscalar
-.dest 4 dst
-mulf dst, src1, invscalar
diff --git a/volk/orc/volk_32f_sqrt_32f_a_orc_impl.orc 
b/volk/orc/volk_32f_sqrt_32f_a_orc_impl.orc
deleted file mode 100644
index f339b11..0000000
--- a/volk/orc/volk_32f_sqrt_32f_a_orc_impl.orc
+++ /dev/null
@@ -1,4 +0,0 @@
-.function volk_32f_sqrt_32f_a_orc_impl
-.source 4 src
-.dest 4 dst
-sqrtf dst, src
diff --git a/volk/orc/volk_32f_x2_add_32f_a_orc_impl.orc 
b/volk/orc/volk_32f_x2_add_32f_a_orc_impl.orc
deleted file mode 100644
index 450cc6a..0000000
--- a/volk/orc/volk_32f_x2_add_32f_a_orc_impl.orc
+++ /dev/null
@@ -1,5 +0,0 @@
-.function volk_32f_x2_add_32f_a_orc_impl
-.dest 4 dst
-.source 4 src1
-.source 4 src2
-addf dst, src1, src2
diff --git a/volk/orc/volk_32f_x2_divide_32f_a_orc_impl.orc 
b/volk/orc/volk_32f_x2_divide_32f_a_orc_impl.orc
deleted file mode 100644
index ee3b61b..0000000
--- a/volk/orc/volk_32f_x2_divide_32f_a_orc_impl.orc
+++ /dev/null
@@ -1,5 +0,0 @@
-.function volk_32f_x2_divide_32f_a_orc_impl
-.dest 4 dst
-.source 4 src1
-.source 4 src2
-divf dst, src1, src2
diff --git a/volk/orc/volk_32f_x2_dot_prod_32f_a_orc_impl.orc 
b/volk/orc/volk_32f_x2_dot_prod_32f_a_orc_impl.orc
deleted file mode 100644
index b367f30..0000000
--- a/volk/orc/volk_32f_x2_dot_prod_32f_a_orc_impl.orc
+++ /dev/null
@@ -1,6 +0,0 @@
-.function volk_32f_x2_dot_prod_32f_a_orc_impl
-.source 4 src1
-.source 4 src2
-.dest 4 dst
-.accumulator 4 accum
-addf dst, src1, src2
diff --git a/volk/orc/volk_32f_x2_max_32f_a_orc_impl.orc 
b/volk/orc/volk_32f_x2_max_32f_a_orc_impl.orc
deleted file mode 100644
index 7252016..0000000
--- a/volk/orc/volk_32f_x2_max_32f_a_orc_impl.orc
+++ /dev/null
@@ -1,5 +0,0 @@
-.function volk_32f_x2_max_32f_a_orc_impl
-.dest 4 dst
-.source 4 src1
-.source 4 src2
-maxf dst, src1, src2
diff --git a/volk/orc/volk_32f_x2_min_32f_a_orc_impl.orc 
b/volk/orc/volk_32f_x2_min_32f_a_orc_impl.orc
deleted file mode 100644
index a71ed82..0000000
--- a/volk/orc/volk_32f_x2_min_32f_a_orc_impl.orc
+++ /dev/null
@@ -1,5 +0,0 @@
-.function volk_32f_x2_min_32f_a_orc_impl
-.dest 4 dst
-.source 4 src1
-.source 4 src2
-minf dst, src1, src2
diff --git a/volk/orc/volk_32f_x2_multiply_32f_a_orc_impl.orc 
b/volk/orc/volk_32f_x2_multiply_32f_a_orc_impl.orc
deleted file mode 100644
index c17d539..0000000
--- a/volk/orc/volk_32f_x2_multiply_32f_a_orc_impl.orc
+++ /dev/null
@@ -1,5 +0,0 @@
-.function volk_32f_x2_multiply_32f_a_orc_impl
-.dest 4 dst
-.source 4 src1
-.source 4 src2
-mulf dst, src1, src2
diff --git a/volk/orc/volk_32f_x2_subtract_32f_a_orc_impl.orc 
b/volk/orc/volk_32f_x2_subtract_32f_a_orc_impl.orc
deleted file mode 100644
index b3b0f25..0000000
--- a/volk/orc/volk_32f_x2_subtract_32f_a_orc_impl.orc
+++ /dev/null
@@ -1,5 +0,0 @@
-.function volk_32f_x2_subtract_32f_a_orc_impl
-.dest 4 dst
-.source 4 src1
-.source 4 src2
-subf dst, src1, src2
diff --git a/volk/orc/volk_32fc_32f_multiply_32fc_a_orc_impl.orc 
b/volk/orc/volk_32fc_32f_multiply_32fc_a_orc_impl.orc
deleted file mode 100644
index aa82699..0000000
--- a/volk/orc/volk_32fc_32f_multiply_32fc_a_orc_impl.orc
+++ /dev/null
@@ -1,7 +0,0 @@
-.function volk_32fc_32f_multiply_32fc_a_orc_impl
-.source 8 src1
-.source 4 src2
-.dest 8 dst
-.temp 8 tmp
-mergelq tmp, src2, src2
-x2 mulf dst, src1, tmp
diff --git a/volk/orc/volk_32fc_magnitude_32f_a_orc_impl.orc 
b/volk/orc/volk_32fc_magnitude_32f_a_orc_impl.orc
deleted file mode 100644
index 032ab2b..0000000
--- a/volk/orc/volk_32fc_magnitude_32f_a_orc_impl.orc
+++ /dev/null
@@ -1,13 +0,0 @@
-.function volk_32fc_magnitude_32f_a_orc_impl
-.source 8 src
-.dest 4 dst
-.temp 8 iqf
-.temp 8 prodiqf
-.temp 4 qf
-.temp 4 if
-.temp 4 sumf
-
-x2 mulf prodiqf, src, src
-splitql qf, if, prodiqf
-addf sumf, if, qf
-sqrtf dst, sumf
diff --git a/volk/orc/volk_32fc_s32f_magnitude_16i_a_orc_impl.orc 
b/volk/orc/volk_32fc_s32f_magnitude_16i_a_orc_impl.orc
deleted file mode 100644
index d3bf789..0000000
--- a/volk/orc/volk_32fc_s32f_magnitude_16i_a_orc_impl.orc
+++ /dev/null
@@ -1,23 +0,0 @@
-.function volk_32fc_s32f_magnitude_16i_a_orc_impl
-.source 8 src
-.dest 2 dst
-.floatparam 4 scalar
-.temp 8 iqf
-.temp 8 prodiqf
-.temp 4 qf
-.temp 4 if
-.temp 4 sumf
-.temp 4 rootf
-.temp 4 rootl
-#.temp 4 maskl
-
-x2 mulf prodiqf, src, src
-splitql qf, if, prodiqf
-addf sumf, if, qf
-sqrtf rootf, sumf
-mulf rootf, rootf, scalar
-#cmpltf maskl, 32768.0, rootf
-#andl maskl, maskl, 0x80000000
-#orl rootf, rootf, maskl
-convfl rootl, rootf
-convsuslw dst, rootl
diff --git a/volk/orc/volk_32fc_s32fc_multiply_32fc_a_orc_impl.orc 
b/volk/orc/volk_32fc_s32fc_multiply_32fc_a_orc_impl.orc
deleted file mode 100644
index 2577e03..0000000
--- a/volk/orc/volk_32fc_s32fc_multiply_32fc_a_orc_impl.orc
+++ /dev/null
@@ -1,18 +0,0 @@
-.function volk_32fc_s32fc_multiply_32fc_a_orc_impl
-.source 8 src1
-.floatparam 8 scalar
-.dest 8 dst
-.temp 8 iqprod
-.temp 4 real
-.temp 4 imag
-.temp 4 ac
-.temp 4 bd
-.temp 8 swapped
-x2 mulf iqprod, src1, scalar
-splitql bd, ac, iqprod
-subf real, ac, bd
-swaplq swapped, src1
-x2 mulf iqprod, swapped, scalar
-splitql bd, ac, iqprod
-addf imag, ac, bd
-mergelq dst, real, imag
diff --git a/volk/orc/volk_32fc_x2_multiply_32fc_a_orc_impl.orc 
b/volk/orc/volk_32fc_x2_multiply_32fc_a_orc_impl.orc
deleted file mode 100644
index cb8a12d..0000000
--- a/volk/orc/volk_32fc_x2_multiply_32fc_a_orc_impl.orc
+++ /dev/null
@@ -1,18 +0,0 @@
-.function volk_32fc_x2_multiply_32fc_a_orc_impl
-.source 8 src1
-.source 8 src2
-.dest 8 dst
-.temp 8 iqprod
-.temp 4 real
-.temp 4 imag
-.temp 4 ac
-.temp 4 bd
-.temp 8 swapped
-x2 mulf iqprod, src1, src2
-splitql bd, ac, iqprod
-subf real, ac, bd
-swaplq swapped, src1
-x2 mulf iqprod, swapped, src2
-splitql bd, ac, iqprod
-addf imag, ac, bd
-mergelq dst, real, imag
diff --git a/volk/orc/volk_32i_x2_and_32i_a_orc_impl.orc 
b/volk/orc/volk_32i_x2_and_32i_a_orc_impl.orc
deleted file mode 100644
index 1845e46..0000000
--- a/volk/orc/volk_32i_x2_and_32i_a_orc_impl.orc
+++ /dev/null
@@ -1,5 +0,0 @@
-.function volk_32i_x2_and_32i_a_orc_impl
-.dest 4 dst
-.source 4 src1
-.source 4 src2
-andl dst, src1, src2
diff --git a/volk/orc/volk_32i_x2_or_32i_a_orc_impl.orc 
b/volk/orc/volk_32i_x2_or_32i_a_orc_impl.orc
deleted file mode 100644
index 004663f..0000000
--- a/volk/orc/volk_32i_x2_or_32i_a_orc_impl.orc
+++ /dev/null
@@ -1,5 +0,0 @@
-.function volk_32i_x2_or_32i_a_orc_impl
-.dest 4 dst
-.source 4 src1
-.source 4 src2
-orl dst, src1, src2
diff --git a/volk/orc/volk_8i_convert_16i_a_orc_impl.orc 
b/volk/orc/volk_8i_convert_16i_a_orc_impl.orc
deleted file mode 100644
index 17198bf..0000000
--- a/volk/orc/volk_8i_convert_16i_a_orc_impl.orc
+++ /dev/null
@@ -1,6 +0,0 @@
-.function volk_8i_convert_16i_a_orc_impl
-.source 1 src
-.dest 2 dst
-.temp 2 tmp
-convsbw tmp, src
-shlw dst, tmp, 8
diff --git a/volk/orc/volk_8i_s32f_convert_32f_a_orc_impl.orc 
b/volk/orc/volk_8i_s32f_convert_32f_a_orc_impl.orc
deleted file mode 100644
index ad54fb1..0000000
--- a/volk/orc/volk_8i_s32f_convert_32f_a_orc_impl.orc
+++ /dev/null
@@ -1,11 +0,0 @@
-.function volk_8i_s32f_convert_32f_a_orc_impl
-.source 1 src
-.dest 4 dst
-.floatparam 4 scalar
-.temp 4 flsrc
-.temp 4 lsrc
-.temp 2 ssrc
-convsbw ssrc, src
-convswl lsrc, ssrc
-convlf flsrc, lsrc
-mulf dst, flsrc, scalar
diff --git a/volk/python/volk_modtool/CMakeLists.txt 
b/volk/python/volk_modtool/CMakeLists.txt
deleted file mode 100644
index cf0ddce..0000000
--- a/volk/python/volk_modtool/CMakeLists.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright 2013 Free Software Foundation, Inc.
-#
-# This file is part of GNU Radio
-#
-# GNU Radio is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# GNU Radio is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GNU Radio; see the file COPYING.  If not, write to
-# the Free Software Foundation, Inc., 51 Franklin Street,
-# Boston, MA 02110-1301, USA.
-
-########################################################################
-# Install python files and apps
-########################################################################
-include(GrPython)
-
-VOLK_PYTHON_INSTALL(
-    FILES
-    __init__.py
-    cfg.py
-    volk_modtool_generate.py
-    DESTINATION ${VOLK_PYTHON_DIR}/volk_modtool
-    COMPONENT "volk"
-)
-
-VOLK_PYTHON_INSTALL(
-    PROGRAMS
-    volk_modtool
-    DESTINATION ${VOLK_RUNTIME_DIR}
-    COMPONENT "volk"
-)
diff --git a/volk/python/volk_modtool/README b/volk/python/volk_modtool/README
deleted file mode 100644
index 5445ed9..0000000
--- a/volk/python/volk_modtool/README
+++ /dev/null
@@ -1,114 +0,0 @@
-The volk_modtool tool is installed along with VOLK as a way of helping
-to construct, add to, and interogate the VOLK library or companion
-libraries.
-
-volk_modtool is installed into $prefix/bin.
-
-VOLK modtool enables creating standalone (out-of-tree) VOLK modules
-and provides a few tools for sharing VOLK kernels between VOLK
-modules.  If you need to design or work with VOLK kernels away from
-the canonical VOLK library, this is the tool.  If you need to tailor
-your own VOLK library for whatever reason, this is the tool.
-
-The canonical VOLK library installs a volk.h and a libvolk.so.  Your
-own library will install volk_$name.h and libvolk_$name.so.  Ya Gronk?
-Good.
-
-There isn't a substantial difference between the canonical VOLK
-module and any other VOLK module.  They're all peers.  Any module
-created via VOLK modtool will come complete with a default
-volk_modtool.cfg file associating the module with the base from which
-it came, its distinctive $name and its destination (or path).  These
-values (created from user input if VOLK modtool runs without a
-user-supplied config file or a default config file) serve as default
-values for some VOLK modtool actions.  It's more or less intended for
-the user to change directories to the top level of a created VOLK
-module and then run volk_modtool to take advantage of the values
-stored in the default volk_modtool.cfg file.
-
-Apart from creating new VOLK modules, VOLK modtool allows you to list
-the names of kernels in other modules, list the names of kernels in
-the current module, add kernels from another module into the current
-module, and remove kernels from the current module.  When moving
-kernels between modules, VOLK modtool does its best to keep the qa
-and profiling code for those kernels intact.  If the base has a test
-or a profiling call for some kernel, those calls will follow the
-kernel when VOLK modtool adds that kernel.  If QA or profiling
-requires a puppet kernel, the puppet kernel will follow the original
-kernel when VOLK modtool adds that original kernel.  VOLK modtool
-respects puppets.
-
-======================================================================
-
-Installing a new VOLK Library:
-
-Run the command "volk_modtool -i". This will ask you three questions:
-
-  name: // the name to give your VOLK library: volk_<name>
-  destination: // directory new source tree is built under -- must exists.
-               // It will create <directory>/volk_<name>
-  base: // the directory containing the original VOLK source code
-
-The name provided must be alphanumeric (and cannot start with a
-number). No special characters including dashes and underscores are
-allowed.
-
-This will build a new skeleton directory in the destination provided
-with the name volk_<name>. It will contain the necessary structure to
-build:
-
-  mkdir build
-  cd build
-  cmake -DCMAKE_INSTALL_PREFIX=/opt/volk ../
-  make
-  sudo make install
-
-Right now, the library is empty and contains no kernels. Kernels can
-be added from another VOLK library using the '-a' option. If not
-specified, the kernel will be extracted from the base VOLK
-directory. Using the '-b' allows us to specify another VOLK library to
-use for this purpose.
-
-  volk_modtool -a -n 32fc_x2_conjugate_dot_prod_32fc
-
-This will put the code for the new kernel into
-<destination>/volk_<name>/kernels/volk_<name>/
-
-Other kernels must be added by hand. See the following webpages for
-more information about creating VOLK kernels:
-  http://gnuradio.org/doc/doxygen/volk_guide.html
-  http://gnuradio.org/redmine/projects/gnuradio/wiki/Volk
-
-
-======================================================================
-
-OPTIONS
-
-Options for Adding and Removing Kernels:
-  -a, --add_kernel
-       Add kernel from existing VOLK module. Uses the base VOLK module
-       unless -b is used. Use -n to specify the kernel name.
-       Requires: -n.
-       Optional: -b
-
-  -A, --add_all_kernels
-       Add all kernels from existing VOLK module. Uses the base VOLK
-       module unless -b is used.
-       Optional: -b
-
-  -x, --remove_kernel
-       Remove kernel from module.
-       Required: -n.
-       Optional: -b
-
-Options for Listing Kernels:
-  -l, --list
-       Lists all kernels available in the base VOLK module.
-
-  -k, --kernels
-       Lists all kernels in this VOLK module.
-
-  -r, --remote-list
-       Lists all kernels in another VOLK module that is specified
-       using the -b option.
-
diff --git a/volk/python/volk_modtool/__init__.py 
b/volk/python/volk_modtool/__init__.py
deleted file mode 100644
index e66bb0a..0000000
--- a/volk/python/volk_modtool/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2013 Free Software Foundation, Inc.
-#
-# This file is part of GNU Radio
-#
-# GNU Radio is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# GNU Radio is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GNU Radio; see the file COPYING.  If not, write to
-# the Free Software Foundation, Inc., 51 Franklin Street,
-# Boston, MA 02110-1301, USA.
-#
-
-from cfg import volk_modtool_config
-from volk_modtool_generate import volk_modtool
diff --git a/volk/python/volk_modtool/cfg.py b/volk/python/volk_modtool/cfg.py
deleted file mode 100644
index a7eb32a..0000000
--- a/volk/python/volk_modtool/cfg.py
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2013 Free Software Foundation, Inc.
-#
-# This file is part of GNU Radio
-#
-# GNU Radio is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# GNU Radio is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GNU Radio; see the file COPYING.  If not, write to
-# the Free Software Foundation, Inc., 51 Franklin Street,
-# Boston, MA 02110-1301, USA.
-#
-
-import ConfigParser
-import sys
-import os
-import exceptions
-import re
-
-
-class volk_modtool_config:
-    def key_val_sub(self, num, stuff, section):
-        return re.sub('\$' + 'k' + str(num), stuff[num][0], (re.sub('\$' + 
str(num), stuff[num][1], section[1][num])));
-
-    def verify(self):
-        for i in self.verification:
-            self.verify_section(i)
-    def remap(self):
-        for i in self.remapification:
-            self.verify_section(i)
-
-    def verify_section(self, section):
-        stuff = self.cfg.items(section[0])
-        for i in range(len(section[1])):
-            eval(self.key_val_sub(i, stuff, section))
-            try:
-               val = eval(self.key_val_sub(i, stuff, section))
-               if val == False:
-                   raise exceptions.ValueError
-            except ValueError:
-                raise exceptions.ValueError('Verification function returns 
False... key:%s, val:%s'%(stuff[i][0], stuff[i][1]))
-            except:
-                raise exceptions.IOError('bad configuration... key:%s, 
val:%s'%(stuff[i][0], stuff[i][1]))
-
-
-    def __init__(self, cfg=None):
-        self.config_name = 'config'
-        self.config_defaults = ['name', 'destination', 'base']
-        self.config_defaults_remap = ['1',
-                                      'self.cfg.set(self.config_name, \'$k1\', 
os.path.realpath(os.path.expanduser(\'$1\')))',
-                                      'self.cfg.set(self.config_name, \'$k2\', 
os.path.realpath(os.path.expanduser(\'$2\')))']
-
-        self.config_defaults_verify = ['re.match(\'[a-zA-Z0-9]+$\', \'$0\')',
-                                       'os.path.exists(\'$1\')',
-                                       'os.path.exists(\'$2\')']
-        self.remapification = [(self.config_name, self.config_defaults_remap)]
-        self.verification = [(self.config_name, self.config_defaults_verify)]
-        default = os.path.join(os.getcwd(), 'volk_modtool.cfg')
-        icfg = ConfigParser.RawConfigParser()
-        if cfg:
-            icfg.read(cfg)
-        elif os.path.exists(default):
-            icfg.read(default)
-        else:
-            print "Initializing config file..."
-            icfg.add_section(self.config_name)
-            for kn in self.config_defaults:
-                rv = raw_input("%s: "%(kn))
-                icfg.set(self.config_name, kn, rv)
-        self.cfg = icfg
-        self.remap()
-        self.verify()
-
-
-
-    def read_map(self, name, inp):
-        if self.cfg.has_section(name):
-            self.cfg.remove_section(name)
-        self.cfg.add_section(name)
-        for i in inp:
-            self.cfg.set(name, i, inp[i])
-
-    def get_map(self, name):
-        retval = {}
-        stuff = self.cfg.items(name)
-        for i in stuff:
-            retval[i[0]] = i[1]
-        return retval
-
-
-
-
-
-
-
diff --git a/volk/python/volk_modtool/volk_modtool 
b/volk/python/volk_modtool/volk_modtool
deleted file mode 100755
index 9f31b9e..0000000
--- a/volk/python/volk_modtool/volk_modtool
+++ /dev/null
@@ -1,128 +0,0 @@
-#!/usr/bin/env python
-#
-# Copyright 2013 Free Software Foundation, Inc.
-#
-# This file is part of GNU Radio
-#
-# GNU Radio is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# GNU Radio is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GNU Radio; see the file COPYING.  If not, write to
-# the Free Software Foundation, Inc., 51 Franklin Street,
-# Boston, MA 02110-1301, USA.
-#
-
-from volk_modtool import volk_modtool, volk_modtool_config
-from optparse import OptionParser, OptionGroup
-
-import exceptions
-import os
-import sys
-
-if __name__ == '__main__':
-    parser = OptionParser();
-    actions = OptionGroup(parser, 'Actions');
-    actions.add_option('-i', '--install', action='store_true',
-                       help='Create a new volk module.')
-    parser.add_option('-b', '--base_path', action='store', default=None,
-                      help='Base path for action. By default, volk_modtool.cfg 
loads this value.')
-    parser.add_option('-n', '--kernel_name', action='store', default=None,
-                      help='Kernel name for action. No default')
-    parser.add_option('-c', '--config', action='store', dest='config_file', 
default=None,
-                      help='Config file for volk_modtool.  By default, 
volk_modtool.cfg in the local directory will be used/created.')
-    actions.add_option('-a', '--add_kernel', action='store_true',
-                       help='Add kernel from existing volk module. Requires: 
-n. Optional: -b')
-    actions.add_option('-A', '--add_all_kernels', action='store_true',
-                       help='Add all kernels from existing volk module. 
Optional: -b')
-    actions.add_option('-x', '--remove_kernel', action='store_true',
-                       help='Remove kernel from module. Required: -n. 
Optional: -b')
-    actions.add_option('-l', '--list', action='store_true',
-                       help='List all kernels in the base.')
-    actions.add_option('-k', '--kernels', action='store_true',
-                       help='List all kernels in the module.')
-    actions.add_option('-r', '--remote_list', action='store_true',
-                       help='List all available kernels in remote volk module. 
Requires: -b.')
-    actions.add_option('-m', '--moo', action='store_true',
-                       help='Have you mooed today?')
-    parser.add_option_group(actions)
-
-    (options, args) = parser.parse_args();
-    if len(sys.argv) < 2:
-        parser.print_help()
-
-    elif options.moo:
-        print "         (__)    "
-        print "         (oo)    "
-        print "   /------\/     "
-        print "  / |    ||      "
-        print " *  /\---/\      "
-        print "    ~~   ~~      "
-
-    else:
-        my_cfg = volk_modtool_config(options.config_file);
-
-        my_modtool = volk_modtool(my_cfg.get_map(my_cfg.config_name));
-
-
-        if options.install:
-            my_modtool.make_module_skeleton();
-            my_modtool.write_default_cfg(my_cfg.cfg);
-
-
-        if options.add_kernel:
-            if not options.kernel_name:
-                raise exceptions.IOError("This action requires the -n 
option.");
-            else:
-                name = options.kernel_name;
-            if options.base_path:
-                base = options.base_path;
-            else:
-                base = my_cfg.cfg.get(my_cfg.config_name, 'base');
-                my_modtool.import_kernel(name, base);
-
-        if options.remove_kernel:
-            if not options.kernel_name:
-                raise exceptions.IOError("This action requires the -n 
option.");
-            else:
-                name = options.kernel_name;
-            my_modtool.remove_kernel(name);
-
-        if options.add_all_kernels:
-
-            if options.base_path:
-                base = options.base_path;
-            else:
-                base = my_cfg.cfg.get(my_cfg.config_name, 'base');
-            kernelset = my_modtool.get_current_kernels(base);
-            for i in kernelset:
-                my_modtool.import_kernel(i, base);
-
-        if options.remote_list:
-            if not options.base_path:
-                raise exceptions.IOError("This action requires the -b option.  
Try -l or -k for listing kernels in the base or the module.")
-            else:
-                base = options.base_path;
-            kernelset = my_modtool.get_current_kernels(base);
-            for i in kernelset:
-                print i;
-
-        if options.list:
-            kernelset = my_modtool.get_current_kernels();
-            for i in kernelset:
-                print i;
-
-        if options.kernels:
-            dest = my_cfg.cfg.get(my_cfg.config_name, 'destination');
-            name = my_cfg.cfg.get(my_cfg.config_name, 'name');
-            base = os.path.join(dest, 'volk_' + name);
-            kernelset = my_modtool.get_current_kernels(base);
-            for i in kernelset:
-                print i;
diff --git a/volk/python/volk_modtool/volk_modtool_generate.py 
b/volk/python/volk_modtool/volk_modtool_generate.py
deleted file mode 100644
index 746dc01..0000000
--- a/volk/python/volk_modtool/volk_modtool_generate.py
+++ /dev/null
@@ -1,330 +0,0 @@
-#
-# Copyright 2013 Free Software Foundation, Inc.
-#
-# This file is part of GNU Radio
-#
-# GNU Radio is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# GNU Radio is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GNU Radio; see the file COPYING.  If not, write to
-# the Free Software Foundation, Inc., 51 Franklin Street,
-# Boston, MA 02110-1301, USA.
-#
-
-import os
-import glob
-import sys
-import re
-import glob
-import shutil
-import exceptions
-from sets import Set
-
-class volk_modtool:
-    def __init__(self, cfg):
-        self.volk = re.compile('volk');
-        self.remove_after_underscore = re.compile("_.*");
-        self.volk_run_tests = re.compile('^\s*VOLK_RUN_TESTS.*\n', 
re.MULTILINE);
-        self.volk_profile = 
re.compile('^\s*(VOLK_PROFILE|VOLK_PUPPET_PROFILE).*\n', re.MULTILINE);
-        self.my_dict = cfg;
-        self.lastline = re.compile('\s*char path\[1024\];.*');
-        self.badassert = re.compile('^\s*assert\(toked\[0\] == "volk_.*\n', 
re.MULTILINE);
-        self.goodassert = '    assert(toked[0] == "volk");\n'
-        self.baderase = re.compile('^\s*toked.erase\(toked.begin\(\)\);.*\n', 
re.MULTILINE);
-        self.gooderase = '    toked.erase(toked.begin());\n    
toked.erase(toked.begin());\n';
-
-    def get_basename(self, base=None):
-        if not base:
-            base = self.my_dict['base']
-        candidate = base.split('/')[-1];
-        if len(candidate.split('_')) == 1:
-            return '';
-        else:
-            return candidate.split('_')[-1];
-
-    def get_current_kernels(self, base=None):
-        if not base:
-            base = self.my_dict['base']
-            name = self.get_basename();
-        else:
-            name = self.get_basename(base);
-        if name == '':
-            hdr_files = glob.glob(os.path.join(base, "kernels/volk/*.h"));
-            begins = re.compile("(?<=volk_).*")
-        else:
-            hdr_files = glob.glob(os.path.join(base, "kernels/volk_" + name + 
"/*.h"));
-            begins = re.compile("(?<=volk_" + name + "_).*")
-
-        datatypes = [];
-        functions = [];
-
-
-        for line in hdr_files:
-
-            subline = re.search(".*\.h.*", os.path.basename(line))
-            if subline:
-                subsubline = begins.search(subline.group(0));
-                if subsubline:
-                    dtype = self.remove_after_underscore.sub("", 
subsubline.group(0));
-                    subdtype = re.search("[0-9]+[A-z]+", dtype);
-                    if subdtype:
-                        datatypes.append(subdtype.group(0));
-
-
-        datatypes = set(datatypes);
-
-        for line in hdr_files:
-            for dt in datatypes:
-                if dt in line:
-                    #subline = re.search("(?<=volk_)" + dt + ".*(?=\.h)", 
line);
-                    subline = re.search(begins.pattern[:-2] + dt + 
".*(?=\.h)", line);
-                    if subline:
-                        functions.append(subline.group(0));
-
-        return set(functions);
-
-    def make_module_skeleton(self):
-
-        dest = os.path.join(self.my_dict['destination'], 'volk_' + 
self.my_dict['name'])
-        if os.path.exists(dest):
-            raise exceptions.IOError("Destination %s already exits!"%(dest));
-
-        if not os.path.exists(os.path.join(self.my_dict['destination'], 
'volk_' + self.my_dict['name'], 'kernels/volk_' + self.my_dict['name'])):
-            os.makedirs(os.path.join(self.my_dict['destination'], 'volk_' + 
self.my_dict['name'], 'kernels/volk_' + self.my_dict['name']))
-
-        current_kernel_names = self.get_current_kernels();
-
-        for root, dirnames, filenames in os.walk(self.my_dict['base']):
-            for name in filenames:
-                t_table = map(lambda a: re.search(a, name), 
current_kernel_names);
-                t_table = set(t_table);
-                if t_table == set([None]):
-                    infile = os.path.join(root, name);
-                    instring = open(infile, 'r').read();
-                    outstring = re.sub(self.volk, 'volk_' + 
self.my_dict['name'], instring);
-                    newname = re.sub(self.volk, 'volk_' + 
self.my_dict['name'], name);
-                    relpath = os.path.relpath(infile, self.my_dict['base']);
-                    newrelpath = re.sub(self.volk, 'volk_' + 
self.my_dict['name'], relpath);
-                    dest = os.path.join(self.my_dict['destination'], 'volk_' + 
self.my_dict['name'], os.path.dirname(newrelpath), newname);
-
-                    if not os.path.exists(os.path.dirname(dest)):
-                        os.makedirs(os.path.dirname(dest))
-                    open(dest, 'w+').write(outstring);
-
-
-        infile = os.path.join(self.my_dict['destination'], 'volk_' + 
self.my_dict['name'], 'lib/testqa.cc');
-        instring = open(infile, 'r').read();
-        outstring = re.sub(self.volk_run_tests, '', instring);
-        open(infile, 'w+').write(outstring);
-
-        infile = os.path.join(self.my_dict['destination'], 'volk_' + 
self.my_dict['name'], 'apps/volk_' + self.my_dict['name'] + '_profile.cc');
-        instring = open(infile, 'r').read();
-        outstring = re.sub(self.volk_profile, '', instring);
-        open(infile, 'w+').write(outstring);
-
-        infile = os.path.join(self.my_dict['destination'], 'volk_' + 
self.my_dict['name'], 'lib/qa_utils.cc');
-        instring = open(infile, 'r').read();
-        outstring = re.sub(self.badassert, self.goodassert, instring);
-        outstring = re.sub(self.baderase, self.gooderase, outstring);
-        open(infile, 'w+').write(outstring);
-
-    def write_default_cfg(self, cfg):
-        outfile = open(os.path.join(self.my_dict['destination'], 'volk_' + 
self.my_dict['name'], 'volk_modtool.cfg'), 'wb');
-        cfg.write(outfile);
-        outfile.close();
-
-
-    def convert_kernel(self, oldvolk, name, base, inpath, top):
-        infile = os.path.join(inpath, 'kernels/' + top[:-1] + '/' + top + name 
+ '.h');
-        instring = open(infile, 'r').read();
-        outstring = re.sub(oldvolk, 'volk_' + self.my_dict['name'], instring);
-        newname = 'volk_' + self.my_dict['name'] + '_' + name + '.h';
-        relpath = os.path.relpath(infile, base);
-        newrelpath = re.sub(oldvolk, 'volk_' + self.my_dict['name'], relpath);
-        dest = os.path.join(self.my_dict['destination'], 'volk_' + 
self.my_dict['name'], os.path.dirname(newrelpath), newname);
-
-        if not os.path.exists(os.path.dirname(dest)):
-            os.makedirs(os.path.dirname(dest))
-        open(dest, 'w+').write(outstring);
-
-        # copy orc proto-kernels if they exist
-        for orcfile in glob.glob(inpath + '/orc/' + top + name + '*.orc'):
-            if os.path.isfile(orcfile):
-                instring = open(orcfile, 'r').read();
-                outstring = re.sub(oldvolk, 'volk_' + self.my_dict['name'], 
instring);
-                newname = 'volk_' + self.my_dict['name'] + '_' + name + '.orc';
-                relpath = os.path.relpath(orcfile, base);
-                newrelpath = re.sub(oldvolk, 'volk_' + self.my_dict['name'], 
relpath);
-                dest = os.path.join(self.my_dict['destination'], 'volk_' + 
self.my_dict['name'], os.path.dirname(newrelpath), newname);
-                if not os.path.exists(os.path.dirname(dest)):
-                    os.makedirs(os.path.dirname(dest));
-                open(dest, 'w+').write(outstring)
-
-
-    def remove_kernel(self, name):
-        basename = self.my_dict['name'];
-        if len(basename) > 0:
-            top = 'volk_' + basename + '_';
-        else:
-            top = 'volk_'
-        base = os.path.join(self.my_dict['destination'], top[:-1]) ;
-
-        if not name in self.get_current_kernels():
-
-            raise exceptions.IOError("Requested kernel %s is not in module 
%s"%(name,base));
-
-
-
-        inpath = os.path.abspath(base);
-
-
-        kernel = re.compile(name)
-        search_kernels = Set([kernel])
-        profile = re.compile('^\s*VOLK_PROFILE')
-        puppet = re.compile('^\s*VOLK_PUPPET')
-        src_dest = os.path.join(inpath, 'apps/', top[:-1] + '_profile.cc');
-        infile = open(src_dest);
-        otherlines = infile.readlines();
-        open(src_dest, 'w+').write('');
-
-        for otherline in otherlines:
-            write_okay = True;
-            if kernel.search(otherline):
-                write_okay = False;
-                if puppet.match(otherline):
-                    args = re.search("(?<=VOLK_PUPPET_PROFILE).*", otherline)
-                    m_func = args.group(0).split(',')[0];
-                    func = re.search('(?<=' + top + ').*', m_func);
-                    search_kernels.add(re.compile(func.group(0)));
-            if write_okay:
-                open(src_dest, 'a').write(otherline);
-
-
-        src_dest = os.path.join(inpath, 'lib/testqa.cc')
-        infile = open(src_dest);
-        otherlines = infile.readlines();
-        open(src_dest, 'w+').write('');
-
-        for otherline in otherlines:
-            write_okay = True;
-
-            for kernel in search_kernels:
-                if kernel.search(otherline):
-                    write_okay = False;
-
-            if write_okay:
-                open(src_dest, 'a').write(otherline);
-
-        for kernel in search_kernels:
-            infile = os.path.join(inpath, 'kernels/' + top[:-1] + '/' + top + 
kernel.pattern + '.h');
-            print "Removing kernel %s"%(kernel.pattern)
-            if os.path.exists(infile):
-                os.remove(infile);
-        # remove the orc proto-kernels if they exist. There are no puppets here
-        # so just need to glob for files matching kernel name
-        print glob.glob(inpath + '/orc/' + top + name + '*.orc');
-        for orcfile in glob.glob(inpath + '/orc/' + top + name + '*.orc'):
-            print orcfile
-            if(os.path.exists(orcfile)):
-                os.remove(orcfile);
-
-    def import_kernel(self, name, base):
-        if not (base):
-            base = self.my_dict['base'];
-            basename = self.getbasename();
-        else:
-            basename = self.get_basename(base);
-        if not name in self.get_current_kernels(base):
-            raise exceptions.IOError("Requested kernel %s is not in module 
%s"%(name,base));
-
-        inpath = os.path.abspath(base);
-        if len(basename) > 0:
-            top = 'volk_' + basename + '_';
-        else:
-            top = 'volk_'
-        oldvolk = re.compile(top[:-1]);
-
-        self.convert_kernel(oldvolk, name, base, inpath, top);
-
-        kernel = re.compile(name)
-        search_kernels = Set([kernel])
-
-        profile = re.compile('^\s*VOLK_PROFILE')
-        puppet = re.compile('^\s*VOLK_PUPPET')
-        infile = open(os.path.join(inpath, 'apps/', oldvolk.pattern + 
'_profile.cc'));
-        otherinfile = open(os.path.join(self.my_dict['destination'], 'volk_' + 
self.my_dict['name'], 'apps/volk_' + self.my_dict['name'] + '_profile.cc'));
-        dest = os.path.join(self.my_dict['destination'], 'volk_' + 
self.my_dict['name'], 'apps/volk_' + self.my_dict['name'] + '_profile.cc');
-        lines = infile.readlines();
-        otherlines = otherinfile.readlines();
-        open(dest, 'w+').write('');
-        insert = False;
-        inserted = False
-        for otherline in otherlines:
-
-            if self.lastline.match(otherline):
-                insert = True;
-            if insert and not inserted:
-                inserted = True;
-                for line in lines:
-                    if kernel.search(line):
-                        if profile.match(line):
-                            outline = re.sub(oldvolk, 'volk_' + 
self.my_dict['name'], line);
-                            open(dest, 'a').write(outline);
-                        elif puppet.match(line):
-                            outline = re.sub(oldvolk, 'volk_' + 
self.my_dict['name'], line);
-                            open(dest, 'a').write(outline);
-                            args = re.search("(?<=VOLK_PUPPET_PROFILE).*", 
line)
-                            m_func = args.group(0).split(',')[0];
-                            func = re.search('(?<=' + top + ').*', m_func);
-                            search_kernels.add(re.compile(func.group(0)));
-                            self.convert_kernel(oldvolk, func.group(0), base, 
inpath, top);
-            write_okay = True;
-            for kernel in search_kernels:
-                if kernel.search(otherline):
-                    write_okay = False
-            if write_okay:
-                open(dest, 'a').write(otherline);
-
-        for kernel in search_kernels:
-            print "Adding kernel %s from module %s"%(kernel.pattern,base)
-
-        infile = open(os.path.join(inpath, 'lib/testqa.cc'));
-        otherinfile = open(os.path.join(self.my_dict['destination'], 'volk_' + 
self.my_dict['name'], 'lib/testqa.cc'));
-        dest = os.path.join(self.my_dict['destination'], 'volk_' + 
self.my_dict['name'], 'lib/testqa.cc');
-        lines = infile.readlines();
-        otherlines = otherinfile.readlines();
-        open(dest, 'w+').write('');
-        inserted = False;
-        insert = False
-        for otherline in otherlines:
-
-            if (re.match('\s*', otherline) == None or re.match('\s*#.*', 
otherline) == None):
-
-                insert = True;
-            if insert and not inserted:
-                inserted = True;
-                for line in lines:
-                    for kernel in search_kernels:
-                        if kernel.search(line):
-                            if self.volk_run_tests.match(line):
-                                outline = re.sub(oldvolk, 'volk_' + 
self.my_dict['name'], line);
-                                open(dest, 'a').write(outline);
-            write_okay = True;
-            for kernel in search_kernels:
-                if kernel.search(otherline):
-                    write_okay = False
-            if write_okay:
-                open(dest, 'a').write(otherline);
-
-
-
-
-
diff --git a/volk/tmpl/volk.tmpl.c b/volk/tmpl/volk.tmpl.c
deleted file mode 100644
index 2f54bbc..0000000
--- a/volk/tmpl/volk.tmpl.c
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Copyright 2011-2012 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#include <volk/volk_common.h>
-#include "volk_machines.h"
-#include <volk/volk_typedefs.h>
-#include <volk/volk_cpu.h>
-#include "volk_rank_archs.h"
-#include <volk/volk.h>
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-static size_t __alignment = 0;
-static intptr_t __alignment_mask = 0;
-
-struct volk_machine *get_machine(void)
-{
-  extern struct volk_machine *volk_machines[];
-  extern unsigned int n_volk_machines;
-  static struct volk_machine *machine = NULL;
-
-  if(machine != NULL)
-    return machine;
-  else {
-    unsigned int max_score = 0;
-    unsigned int i;
-    struct volk_machine *max_machine = NULL;
-    for(i=0; i<n_volk_machines; i++) {
-      if(!(volk_machines[i]->caps & (~volk_get_lvarch()))) {
-        if(volk_machines[i]->caps > max_score) {
-          max_score = volk_machines[i]->caps;
-          max_machine = volk_machines[i];
-        }
-      }
-    }
-    machine = max_machine;
-    printf("Using Volk machine: %s\n", machine->name);
-    __alignment = machine->alignment;
-    __alignment_mask = (intptr_t)(__alignment-1);
-    return machine;
-  }
-}
-
-void volk_list_machines(void)
-{
-  extern struct volk_machine *volk_machines[];
-  extern unsigned int n_volk_machines;
-
-  unsigned int i;
-  for(i=0; i<n_volk_machines; i++) {
-    if(!(volk_machines[i]->caps & (~volk_get_lvarch()))) {
-        printf("%s;", volk_machines[i]->name);
-    }
-  }
-  printf("\n");
-}
-
-const char* volk_get_machine(void)
-{
-  extern struct volk_machine *volk_machines[];
-  extern unsigned int n_volk_machines;
-  static struct volk_machine *machine = NULL;
-
-  if(machine != NULL)
-    return machine->name;
-  else {
-    unsigned int max_score = 0;
-    unsigned int i;
-    struct volk_machine *max_machine = NULL;
-    for(i=0; i<n_volk_machines; i++) {
-      if(!(volk_machines[i]->caps & (~volk_get_lvarch()))) {
-        if(volk_machines[i]->caps > max_score) {
-          max_score = volk_machines[i]->caps;
-          max_machine = volk_machines[i];
-        }
-      }
-    }
-    machine = max_machine;
-    return machine->name;
-  }
-}
-
-size_t volk_get_alignment(void)
-{
-    get_machine(); //ensures alignment is set
-    return __alignment;
-}
-
-bool volk_is_aligned(const void *ptr)
-{
-    return ((intptr_t)(ptr) & __alignment_mask) == 0;
-}
-
-#define LV_HAVE_GENERIC
-#define LV_HAVE_DISPATCHER
-
-#for $kern in $kernels
-
-#if $kern.has_dispatcher
-#include <volk/$(kern.name).h> //pulls in the dispatcher
-#end if
-
-static inline void __$(kern.name)_d($kern.arglist_full)
-{
-    #if $kern.has_dispatcher
-    $(kern.name)_dispatcher($kern.arglist_names);
-    return;
-    #end if
-
-    if (volk_is_aligned(
-    #set $num_open_parens = 0
-    #for $arg_type, $arg_name in $kern.args
-        #if '*' in $arg_type
-        VOLK_OR_PTR($arg_name,
-        #set $num_open_parens += 1
-        #end if
-    #end for
-        0$(')'*$num_open_parens)
-    )){
-        $(kern.name)_a($kern.arglist_names);
-    }
-    else{
-        $(kern.name)_u($kern.arglist_names);
-    }
-}
-
-static inline void __init_$(kern.name)(void)
-{
-    const char *name = get_machine()->$(kern.name)_name;
-    const char **impl_names = get_machine()->$(kern.name)_impl_names;
-    const int *impl_deps = get_machine()->$(kern.name)_impl_deps;
-    const bool *alignment = get_machine()->$(kern.name)_impl_alignment;
-    const size_t n_impls = get_machine()->$(kern.name)_n_impls;
-    const size_t index_a = volk_rank_archs(name, impl_names, impl_deps, 
alignment, n_impls, true/*aligned*/);
-    const size_t index_u = volk_rank_archs(name, impl_names, impl_deps, 
alignment, n_impls, false/*unaligned*/);
-    $(kern.name)_a = get_machine()->$(kern.name)_impls[index_a];
-    $(kern.name)_u = get_machine()->$(kern.name)_impls[index_u];
-
-    assert($(kern.name)_a);
-    assert($(kern.name)_u);
-
-    $(kern.name) = &__$(kern.name)_d;
-}
-
-static inline void __$(kern.name)_a($kern.arglist_full)
-{
-    __init_$(kern.name)();
-    $(kern.name)_a($kern.arglist_names);
-}
-
-static inline void __$(kern.name)_u($kern.arglist_full)
-{
-    __init_$(kern.name)();
-    $(kern.name)_u($kern.arglist_names);
-}
-
-static inline void __$(kern.name)($kern.arglist_full)
-{
-    __init_$(kern.name)();
-    $(kern.name)($kern.arglist_names);
-}
-
-$kern.pname $(kern.name)_a = &__$(kern.name)_a;
-$kern.pname $(kern.name)_u = &__$(kern.name)_u;
-$kern.pname $(kern.name)   = &__$(kern.name);
-
-void $(kern.name)_manual($kern.arglist_full, const char* impl_name)
-{
-    const int index = volk_get_index(
-        get_machine()->$(kern.name)_impl_names,
-        get_machine()->$(kern.name)_n_impls,
-        impl_name
-    );
-    get_machine()->$(kern.name)_impls[index](
-        $kern.arglist_names
-    );
-}
-
-volk_func_desc_t $(kern.name)_get_func_desc(void) {
-    const char **impl_names = get_machine()->$(kern.name)_impl_names;
-    const int *impl_deps = get_machine()->$(kern.name)_impl_deps;
-    const bool *alignment = get_machine()->$(kern.name)_impl_alignment;
-    const size_t n_impls = get_machine()->$(kern.name)_n_impls;
-    volk_func_desc_t desc = {
-        impl_names,
-        impl_deps,
-        alignment,
-        n_impls
-    };
-    return desc;
-}
-
-#end for
diff --git a/volk/tmpl/volk.tmpl.h b/volk/tmpl/volk.tmpl.h
deleted file mode 100644
index 1d0957e..0000000
--- a/volk/tmpl/volk.tmpl.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright 2011-2012 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_VOLK_RUNTIME
-#define INCLUDED_VOLK_RUNTIME
-
-#include <volk/volk_typedefs.h>
-#include <volk/volk_config_fixed.h>
-#include <volk/volk_common.h>
-#include <volk/volk_complex.h>
-#include <volk/volk_malloc.h>
-
-#include <stdlib.h>
-#include <stdbool.h>
-
-__VOLK_DECL_BEGIN
-
-typedef struct volk_func_desc
-{
-    const char **impl_names;
-    const int *impl_deps;
-    const bool *impl_alignment;
-    const size_t n_impls;
-} volk_func_desc_t;
-
-//! Prints a list of machines available
-VOLK_API void volk_list_machines(void);
-
-//! Returns the name of the machine this instance will use
-VOLK_API const char* volk_get_machine(void);
-
-//! Get the machine alignment in bytes
-VOLK_API size_t volk_get_alignment(void);
-
-/*!
- * The VOLK_OR_PTR macro is a convenience macro
- * for checking the alignment of a set of pointers.
- * Example usage:
- * volk_is_aligned(VOLK_OR_PTR((VOLK_OR_PTR(p0, p1), p2)))
- */
-#define VOLK_OR_PTR(ptr0, ptr1) \
-    (const void *)(((intptr_t)(ptr0)) | ((intptr_t)(ptr1)))
-
-/*!
- * Is the pointer on a machine alignment boundary?
- *
- * Note: for performance reasons, this function
- * is not usable until another volk API call is made
- * which will perform certain initialization tasks.
- *
- * \param ptr the pointer to some memory buffer
- * \return 1 for alignment boundary, else 0
- */
-VOLK_API bool volk_is_aligned(const void *ptr);
-
-#for $kern in $kernels
-
-//! A function pointer to the dispatcher implementation
-extern VOLK_API $kern.pname $kern.name;
-
-//! A function pointer to the fastest aligned implementation
-extern VOLK_API $kern.pname $(kern.name)_a;
-
-//! A function pointer to the fastest unaligned implementation
-extern VOLK_API $kern.pname $(kern.name)_u;
-
-//! Call into a specific implementation given by name
-extern VOLK_API void $(kern.name)_manual($kern.arglist_full, const char* 
impl_name);
-
-//! Get description paramaters for this kernel
-extern VOLK_API volk_func_desc_t $(kern.name)_get_func_desc(void);
-#end for
-
-__VOLK_DECL_END
-
-#endif /*INCLUDED_VOLK_RUNTIME*/
diff --git a/volk/tmpl/volk_config_fixed.tmpl.h 
b/volk/tmpl/volk_config_fixed.tmpl.h
deleted file mode 100644
index e1c01ae..0000000
--- a/volk/tmpl/volk_config_fixed.tmpl.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright 2011-2012 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_VOLK_CONFIG_FIXED_H
-#define INCLUDED_VOLK_CONFIG_FIXED_H
-
-#for $i, $arch in enumerate($archs)
-#define LV_$(arch.name.upper()) $i
-#end for
-
-#endif /*INCLUDED_VOLK_CONFIG_FIXED*/
diff --git a/volk/tmpl/volk_cpu.tmpl.c b/volk/tmpl/volk_cpu.tmpl.c
deleted file mode 100644
index f6df4d1..0000000
--- a/volk/tmpl/volk_cpu.tmpl.c
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright 2011-2012 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#include <volk/volk_cpu.h>
-#include <volk/volk_config_fixed.h>
-#include <stdlib.h>
-
-struct VOLK_CPU volk_cpu;
-
-#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || 
defined(_M_X64)
-    #define VOLK_CPU_x86
-#endif
-
-#if defined(VOLK_CPU_x86)
-
-//implement get cpuid for gcc compilers using a system or local copy of cpuid.h
-#if defined(__GNUC__)
-    #if defined(HAVE_CPUID_H)
-        #include <cpuid.h>
-    #else
-        #include "gcc_x86_cpuid.h"
-    #endif
-    #define cpuid_x86(op, r) __get_cpuid(op, (unsigned int *)r+0, (unsigned 
int *)r+1, (unsigned int *)r+2, (unsigned int *)r+3)
-
-    /* Return Intel AVX extended CPU capabilities register.
-     * This function will bomb on non-AVX-capable machines, so
-     * check for AVX capability before executing.
-     */
-    #if ((__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 2) || 
(__clang_major__ >= 3)) && defined(HAVE_XGETBV)
-    static inline unsigned long long _xgetbv(unsigned int index){
-        unsigned int eax, edx;
-        __asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(index));
-        return ((unsigned long long)edx << 32) | eax;
-    }
-    #define __xgetbv() _xgetbv(0)
-    #else
-    #define __xgetbv() 0
-    #endif
-
-//implement get cpuid for MSVC compilers using __cpuid intrinsic
-#elif defined(_MSC_VER) && defined(HAVE_INTRIN_H)
-    #include <intrin.h>
-    #define cpuid_x86(op, r) __cpuid(((int*)r), op)
-
-    #if defined(_XCR_XFEATURE_ENABLED_MASK)
-    #define __xgetbv() _xgetbv(_XCR_XFEATURE_ENABLED_MASK)
-    #else
-    #define __xgetbv() 0
-    #endif
-
-#else
-    #error "A get cpuid for volk is not available on this compiler..."
-#endif //defined(__GNUC__)
-
-#endif //defined(VOLK_CPU_x86)
-
-static inline unsigned int cpuid_x86_bit(unsigned int reg, unsigned int op, 
unsigned int bit) {
-#if defined(VOLK_CPU_x86)
-    unsigned int regs[4];
-    cpuid_x86(op, regs);
-    return regs[reg] >> bit & 0x01;
-#else
-    return 0;
-#endif
-}
-
-static inline unsigned int check_extended_cpuid(unsigned int val) {
-#if defined(VOLK_CPU_x86)
-    unsigned int regs[4];
-    cpuid_x86(0x80000000, regs);
-    return regs[0] >= val;
-#else
-    return 0;
-#endif
-}
-
-static inline unsigned int get_avx_enabled(void) {
-#if defined(VOLK_CPU_x86)
-    return __xgetbv() & 0x6;
-#else
-    return 0;
-#endif
-}
-
-//neon detection is linux specific
-#if defined(__arm__) && defined(__linux__)
-    #include <asm/hwcap.h>
-    #include <linux/auxvec.h>
-    #include <stdio.h>
-    #define VOLK_CPU_ARM
-#endif
-
-static int has_neon(void){
-#if defined(VOLK_CPU_ARM)
-    FILE *auxvec_f;
-    unsigned long auxvec[2];
-    unsigned int found_neon = 0;
-    auxvec_f = fopen("/proc/self/auxv", "rb");
-    if(!auxvec_f) return 0;
-
-    size_t r = 1;
-    //so auxv is basically 32b of ID and 32b of value
-    //so it goes like this
-    while(!found_neon && r) {
-      r = fread(auxvec, sizeof(unsigned long), 2, auxvec_f);
-      if((auxvec[0] == AT_HWCAP) && (auxvec[1] & HWCAP_NEON))
-        found_neon = 1;
-    }
-
-    fclose(auxvec_f);
-    return found_neon;
-#else
-    return 0;
-#endif
-}
-
-static int has_ppc(void){
-#ifdef __PPC__
-    return 1;
-#else
-    return 0;
-#endif
-}
-
-#for $arch in $archs
-static int i_can_has_$arch.name (void) {
-    #for $check, $params in $arch.checks
-    if ($(check)($(', '.join($params))) == 0) return 0;
-    #end for
-    return 1;
-}
-
-#end for
-
-#if defined(HAVE_FENV_H)
-    #if defined(FE_TONEAREST)
-        #include <fenv.h>
-        static inline void set_float_rounding(void){
-            fesetround(FE_TONEAREST);
-        }
-    #else
-        static inline void set_float_rounding(void){
-            //do nothing
-        }
-    #endif
-#elif defined(_MSC_VER)
-    #include <float.h>
-    static inline void set_float_rounding(void){
-        unsigned int cwrd;
-        _controlfp_s(&cwrd, 0, 0);
-        _controlfp_s(&cwrd, _RC_NEAR, _MCW_RC);
-    }
-#else
-    static inline void set_float_rounding(void){
-        //do nothing
-    }
-#endif
-
-void volk_cpu_init() {
-    #for $arch in $archs
-    volk_cpu.has_$arch.name = &i_can_has_$arch.name;
-    #end for
-    set_float_rounding();
-}
-
-unsigned int volk_get_lvarch() {
-    unsigned int retval = 0;
-    volk_cpu_init();
-    #for $arch in $archs
-    retval += volk_cpu.has_$(arch.name)() << LV_$(arch.name.upper());
-    #end for
-    return retval;
-}
diff --git a/volk/tmpl/volk_cpu.tmpl.h b/volk/tmpl/volk_cpu.tmpl.h
deleted file mode 100644
index 4d66512..0000000
--- a/volk/tmpl/volk_cpu.tmpl.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright 2011-2012 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_VOLK_CPU_H
-#define INCLUDED_VOLK_CPU_H
-
-#include <volk/volk_common.h>
-
-__VOLK_DECL_BEGIN
-
-struct VOLK_CPU {
-    #for $arch in $archs
-    int (*has_$arch.name) ();
-    #end for
-};
-
-extern struct VOLK_CPU volk_cpu;
-
-void volk_cpu_init ();
-unsigned int volk_get_lvarch ();
-
-__VOLK_DECL_END
-
-#endif /*INCLUDED_VOLK_CPU_H*/
diff --git a/volk/tmpl/volk_machine_xxx.tmpl.c 
b/volk/tmpl/volk_machine_xxx.tmpl.c
deleted file mode 100644
index 68d7f3e..0000000
--- a/volk/tmpl/volk_machine_xxx.tmpl.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright 2011-2012 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#set $this_machine = $machine_dict[$args[0]]
-#set $arch_names = $this_machine.arch_names
-
-#for $arch in $this_machine.archs
-#define LV_HAVE_$(arch.name.upper()) 1
-#end for
-
-#include <volk/volk_common.h>
-#include "volk_machines.h"
-#include <volk/volk_config_fixed.h>
-
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
-#for $kern in $kernels
-#include <volk/$(kern.name).h>
-#end for
-
-########################################################################
-#def make_arch_have_list($archs)
-$(' | '.join(['(1 << LV_%s)'%a.name.upper() for a in $archs]))#slurp
-#end def
-
-########################################################################
-#def make_impl_name_list($impls)
-{$(', '.join(['"%s"'%i.name for i in $impls]))}#slurp
-#end def
-
-########################################################################
-#def make_impl_align_list($impls)
-{$(', '.join(['true' if i.is_aligned else 'false' for i in $impls]))}#slurp
-#end def
-
-########################################################################
-#def make_impl_deps_list($impls)
-{$(', '.join([' | '.join(['(1 << LV_%s)'%d.upper() for d in i.deps]) for i in 
$impls]))}#slurp
-#end def
-
-########################################################################
-#def make_impl_fcn_list($name, $impls)
-{$(', '.join(['%s_%s'%($name, i.name) for i in $impls]))}#slurp
-#end def
-
-struct volk_machine volk_machine_$(this_machine.name) = {
-    $make_arch_have_list($this_machine.archs),
-    "$this_machine.name",
-    $this_machine.alignment,
-    #for $kern in $kernels
-        #set $impls = $kern.get_impls($arch_names)
-    "$kern.name",                                   ##//kernel name
-    $make_impl_name_list($impls),                   ##//list of kernel 
implementations by name
-    $make_impl_deps_list($impls),                   ##//list of arch 
dependencies per implementation
-    $make_impl_align_list($impls),                  ##//alignment required? 
for each implementation
-    $make_impl_fcn_list($kern.name, $impls),        ##//pointer to each 
implementation
-    $(len($impls)),                                 ##//number of 
implementations listed here
-    #end for
-};
diff --git a/volk/tmpl/volk_machines.tmpl.c b/volk/tmpl/volk_machines.tmpl.c
deleted file mode 100644
index 57dd03c..0000000
--- a/volk/tmpl/volk_machines.tmpl.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright 2011-2012 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#include <volk/volk_common.h>
-#include <volk/volk_typedefs.h>
-#include "volk_machines.h"
-
-struct volk_machine *volk_machines[] = {
-#for $machine in $machines
-#ifdef LV_MACHINE_$(machine.name.upper())
-&volk_machine_$(machine.name),
-#endif
-#end for
-};
-
-unsigned int n_volk_machines = sizeof(volk_machines)/sizeof(*volk_machines);
diff --git a/volk/tmpl/volk_machines.tmpl.h b/volk/tmpl/volk_machines.tmpl.h
deleted file mode 100644
index 7e11b10..0000000
--- a/volk/tmpl/volk_machines.tmpl.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright 2011-2012 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_LIBVOLK_MACHINES_H
-#define INCLUDED_LIBVOLK_MACHINES_H
-
-#include <volk/volk_common.h>
-#include <volk/volk_typedefs.h>
-
-#include <stdbool.h>
-#include <stdlib.h>
-
-__VOLK_DECL_BEGIN
-
-struct volk_machine {
-    const unsigned int caps; //capabilities (i.e., archs compiled into this 
machine, in the volk_get_lvarch format)
-    const char *name;
-    const size_t alignment; //the maximum byte alignment required for 
functions in this library
-    #for $kern in $kernels
-    const char *$(kern.name)_name;
-    const char *$(kern.name)_impl_names[$(len($archs))];
-    const int $(kern.name)_impl_deps[$(len($archs))];
-    const bool $(kern.name)_impl_alignment[$(len($archs))];
-    const $(kern.pname) $(kern.name)_impls[$(len($archs))];
-    const size_t $(kern.name)_n_impls;
-    #end for
-};
-
-#for $machine in $machines
-#ifdef LV_MACHINE_$(machine.name.upper())
-extern struct volk_machine volk_machine_$(machine.name);
-#endif
-#end for
-
-__VOLK_DECL_END
-
-#endif //INCLUDED_LIBVOLK_MACHINES_H
diff --git a/volk/tmpl/volk_typedefs.tmpl.h b/volk/tmpl/volk_typedefs.tmpl.h
deleted file mode 100644
index 6f54269..0000000
--- a/volk/tmpl/volk_typedefs.tmpl.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright 2011-2012 Free Software Foundation, Inc.
- *
- * This file is part of GNU Radio
- *
- * GNU Radio is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * GNU Radio is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Radio; see the file COPYING.  If not, write to
- * the Free Software Foundation, Inc., 51 Franklin Street,
- * Boston, MA 02110-1301, USA.
- */
-
-#ifndef INCLUDED_VOLK_TYPEDEFS
-#define INCLUDED_VOLK_TYPEDEFS
-
-#include <inttypes.h>
-#include <volk/volk_complex.h>
-
-#for $kern in $kernels
-typedef void (*$(kern.pname))($kern.arglist_types);
-#end for
-
-#endif /*INCLUDED_VOLK_TYPEDEFS*/
diff --git a/volk/volk.pc.in b/volk/volk.pc.in
deleted file mode 100644
index 8bb818c..0000000
--- a/volk/volk.pc.in
+++ /dev/null
@@ -1,14 +0,0 @@
address@hidden@
address@hidden@
address@hidden@
address@hidden@
address@hidden@
-
-
-Name: volk
-Description: VOLK: Vector Optimized Library of Kernels
-Requires:
-Version: @VERSION@
-Libs: -L${libdir} -lvolk
-Cflags: -I${includedir} ${LV_CXXFLAGS}
-



reply via email to

[Prev in Thread] Current Thread [Next in Thread]