From 4be612868985689ae05a375f895103f20a532967 Mon Sep 17 00:00:00 2001
From: Clayton Smith <argilo@gmail.com>
Date: Sun, 17 Dec 2023 10:28:27 -0500
Subject: [PATCH] Remove references to simdmath library

Signed-off-by: Clayton Smith <argilo@gmail.com>
---
 kernels/volk/volk_32f_s32f_power_32f.h        | 117 ------------
 kernels/volk/volk_32fc_s32f_power_32fc.h      |  81 --------
 ...fc_s32f_power_spectral_densitypuppet_32f.h |  30 ---
 .../volk/volk_32fc_s32f_power_spectrum_32f.h  |  80 --------
 ..._32fc_s32f_x2_power_spectral_density_32f.h | 179 ------------------
 5 files changed, 487 deletions(-)

diff --git a/kernels/volk/volk_32f_s32f_power_32f.h b/kernels/volk/volk_32f_s32f_power_32f.h
index 0e8536537..51b7b698b 100644
--- a/kernels/volk/volk_32f_s32f_power_32f.h
+++ b/kernels/volk/volk_32f_s32f_power_32f.h
@@ -62,123 +62,6 @@
 #include <math.h>
 #include <stdio.h>
 
-#ifdef LV_HAVE_SSE4_1
-#include <tmmintrin.h>
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-#include <simdmath.h>
-#endif /* LV_HAVE_LIB_SIMDMATH */
-
-static inline void volk_32f_s32f_power_32f_a_sse4_1(float* cVector,
-                                                    const float* aVector,
-                                                    const float power,
-                                                    unsigned int num_points)
-{
-    unsigned int number = 0;
-
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-    const unsigned int quarterPoints = num_points / 4;
-    __m128 vPower = _mm_set_ps1(power);
-    __m128 zeroValue = _mm_setzero_ps();
-    __m128 signMask;
-    __m128 negatedValues;
-    __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
-    __m128 onesMask = _mm_set_ps1(1);
-
-    __m128 aVal, cVal;
-    for (; number < quarterPoints; number++) {
-
-        aVal = _mm_load_ps(aPtr);
-        signMask = _mm_cmplt_ps(aVal, zeroValue);
-        negatedValues = _mm_sub_ps(zeroValue, aVal);
-        aVal = _mm_blendv_ps(aVal, negatedValues, signMask);
-
-        // powf4 doesn't support negative values in the base, so we mask them off and then
-        // apply the negative after
-        cVal = powf4(aVal, vPower); // Takes each input value to the specified power
-
-        cVal = _mm_mul_ps(_mm_blendv_ps(onesMask, negativeOneToPower, signMask), cVal);
-
-        _mm_store_ps(cPtr, cVal); // Store the results back into the C container
-
-        aPtr += 4;
-        cPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-#endif /* LV_HAVE_LIB_SIMDMATH */
-
-    for (; number < num_points; number++) {
-        *cPtr++ = powf((*aPtr++), power);
-    }
-}
-
-#endif /* LV_HAVE_SSE4_1 */
-
-
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-#include <simdmath.h>
-#endif /* LV_HAVE_LIB_SIMDMATH */
-
-static inline void volk_32f_s32f_power_32f_a_sse(float* cVector,
-                                                 const float* aVector,
-                                                 const float power,
-                                                 unsigned int num_points)
-{
-    unsigned int number = 0;
-
-    float* cPtr = cVector;
-    const float* aPtr = aVector;
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-    const unsigned int quarterPoints = num_points / 4;
-    __m128 vPower = _mm_set_ps1(power);
-    __m128 zeroValue = _mm_setzero_ps();
-    __m128 signMask;
-    __m128 negatedValues;
-    __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power));
-    __m128 onesMask = _mm_set_ps1(1);
-
-    __m128 aVal, cVal;
-    for (; number < quarterPoints; number++) {
-
-        aVal = _mm_load_ps(aPtr);
-        signMask = _mm_cmplt_ps(aVal, zeroValue);
-        negatedValues = _mm_sub_ps(zeroValue, aVal);
-        aVal =
-            _mm_or_ps(_mm_andnot_ps(signMask, aVal), _mm_and_ps(signMask, negatedValues));
-
-        // powf4 doesn't support negative values in the base, so we mask them off and then
-        // apply the negative after
-        cVal = powf4(aVal, vPower); // Takes each input value to the specified power
-
-        cVal = _mm_mul_ps(_mm_or_ps(_mm_andnot_ps(signMask, onesMask),
-                                    _mm_and_ps(signMask, negativeOneToPower)),
-                          cVal);
-
-        _mm_store_ps(cPtr, cVal); // Store the results back into the C container
-
-        aPtr += 4;
-        cPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-#endif /* LV_HAVE_LIB_SIMDMATH */
-
-    for (; number < num_points; number++) {
-        *cPtr++ = powf((*aPtr++), power);
-    }
-}
-
-#endif /* LV_HAVE_SSE */
-
-
 #ifdef LV_HAVE_GENERIC
 
 static inline void volk_32f_s32f_power_32f_generic(float* cVector,
diff --git a/kernels/volk/volk_32fc_s32f_power_32fc.h b/kernels/volk/volk_32fc_s32f_power_32fc.h
index 0ac2c6cdc..96f35e788 100644
--- a/kernels/volk/volk_32fc_s32f_power_32fc.h
+++ b/kernels/volk/volk_32fc_s32f_power_32fc.h
@@ -56,87 +56,6 @@ static inline lv_32fc_t __volk_s32fc_s32f_power_s32fc_a(const lv_32fc_t exp,
     return mag * lv_cmake(-cosf(arg), sinf(arg));
 }
 
-#ifdef LV_HAVE_SSE
-#include <xmmintrin.h>
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-#include <simdmath.h>
-#endif /* LV_HAVE_LIB_SIMDMATH */
-
-static inline void volk_32fc_s32f_power_32fc_a_sse(lv_32fc_t* cVector,
-                                                   const lv_32fc_t* aVector,
-                                                   const float power,
-                                                   unsigned int num_points)
-{
-    unsigned int number = 0;
-
-    lv_32fc_t* cPtr = cVector;
-    const lv_32fc_t* aPtr = aVector;
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-    const unsigned int quarterPoints = num_points / 4;
-    __m128 vPower = _mm_set_ps1(power);
-
-    __m128 cplxValue1, cplxValue2, magnitude, phase, iValue, qValue;
-    for (; number < quarterPoints; number++) {
-
-        cplxValue1 = _mm_load_ps((float*)aPtr);
-        aPtr += 2;
-
-        cplxValue2 = _mm_load_ps((float*)aPtr);
-        aPtr += 2;
-
-        // Convert to polar coordinates
-
-        // Arrange in i1i2i3i4 format
-        iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
-        // Arrange in q1q2q3q4 format
-        qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
-
-        phase = atan2f4(qValue, iValue); // Calculate the Phase
-
-        magnitude = _mm_sqrt_ps(
-            _mm_add_ps(_mm_mul_ps(iValue, iValue),
-                       _mm_mul_ps(qValue, qValue))); // Calculate the magnitude by square
-                                                     // rooting the added I2 and Q2 values
-
-        // Now calculate the power of the polar coordinate data
-        magnitude = powf4(magnitude, vPower); // Take the magnitude to the specified power
-
-        phase = _mm_mul_ps(phase, vPower); // Multiply the phase by the specified power
-
-        // Convert back to cartesian coordinates
-        iValue = _mm_mul_ps(cosf4(phase),
-                            magnitude); // Multiply the cos of the phase by the magnitude
-        qValue = _mm_mul_ps(sinf4(phase),
-                            magnitude); // Multiply the sin of the phase by the magnitude
-
-        cplxValue1 =
-            _mm_unpacklo_ps(iValue, qValue); // Interleave the lower two i & q values
-        cplxValue2 =
-            _mm_unpackhi_ps(iValue, qValue); // Interleave the upper two i & q values
-
-        _mm_store_ps((float*)cPtr,
-                     cplxValue1); // Store the results back into the C container
-
-        cPtr += 2;
-
-        _mm_store_ps((float*)cPtr,
-                     cplxValue2); // Store the results back into the C container
-
-        cPtr += 2;
-    }
-
-    number = quarterPoints * 4;
-#endif /* LV_HAVE_LIB_SIMDMATH */
-
-    for (; number < num_points; number++) {
-        *cPtr++ = __volk_s32fc_s32f_power_s32fc_a((*aPtr++), power);
-    }
-}
-#endif /* LV_HAVE_SSE */
-
-
 #ifdef LV_HAVE_GENERIC
 
 static inline void volk_32fc_s32f_power_32fc_generic(lv_32fc_t* cVector,
diff --git a/kernels/volk/volk_32fc_s32f_power_spectral_densitypuppet_32f.h b/kernels/volk/volk_32fc_s32f_power_spectral_densitypuppet_32f.h
index 127a512a1..e36a9ae21 100644
--- a/kernels/volk/volk_32fc_s32f_power_spectral_densitypuppet_32f.h
+++ b/kernels/volk/volk_32fc_s32f_power_spectral_densitypuppet_32f.h
@@ -15,36 +15,6 @@
 #include <volk/volk_32fc_s32f_x2_power_spectral_density_32f.h>
 
 
-#ifdef LV_HAVE_AVX
-
-static inline void
-volk_32fc_s32f_power_spectral_densitypuppet_32f_a_avx(float* logPowerOutput,
-                                                      const lv_32fc_t* complexFFTInput,
-                                                      const float normalizationFactor,
-                                                      unsigned int num_points)
-{
-    volk_32fc_s32f_x2_power_spectral_density_32f_a_avx(
-        logPowerOutput, complexFFTInput, normalizationFactor, 2.5, num_points);
-}
-
-#endif /* LV_HAVE_AVX */
-
-
-#ifdef LV_HAVE_SSE3
-
-static inline void
-volk_32fc_s32f_power_spectral_densitypuppet_32f_a_sse3(float* logPowerOutput,
-                                                       const lv_32fc_t* complexFFTInput,
-                                                       const float normalizationFactor,
-                                                       unsigned int num_points)
-{
-    volk_32fc_s32f_x2_power_spectral_density_32f_a_sse3(
-        logPowerOutput, complexFFTInput, normalizationFactor, 2.5, num_points);
-}
-
-#endif /* LV_HAVE_SSE3 */
-
-
 #ifdef LV_HAVE_GENERIC
 
 static inline void
diff --git a/kernels/volk/volk_32fc_s32f_power_spectrum_32f.h b/kernels/volk/volk_32fc_s32f_power_spectrum_32f.h
index 5bc158e45..be9aa88a4 100644
--- a/kernels/volk/volk_32fc_s32f_power_spectrum_32f.h
+++ b/kernels/volk/volk_32fc_s32f_power_spectrum_32f.h
@@ -90,86 +90,6 @@ volk_32fc_s32f_power_spectrum_32f_generic(float* logPowerOutput,
 }
 #endif /* LV_HAVE_GENERIC */
 
-#ifdef LV_HAVE_SSE3
-#include <pmmintrin.h>
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-#include <simdmath.h>
-#endif /* LV_HAVE_LIB_SIMDMATH */
-
-static inline void
-volk_32fc_s32f_power_spectrum_32f_a_sse3(float* logPowerOutput,
-                                         const lv_32fc_t* complexFFTInput,
-                                         const float normalizationFactor,
-                                         unsigned int num_points)
-{
-    const float* inputPtr = (const float*)complexFFTInput;
-    float* destPtr = logPowerOutput;
-    uint64_t number = 0;
-    const float iNormalizationFactor = 1.0 / normalizationFactor;
-#ifdef LV_HAVE_LIB_SIMDMATH
-    __m128 magScalar = _mm_set_ps1(10.0);
-    magScalar = _mm_div_ps(magScalar, logf4(magScalar));
-
-    __m128 invNormalizationFactor = _mm_set_ps1(iNormalizationFactor);
-
-    __m128 power;
-    __m128 input1, input2;
-    const uint64_t quarterPoints = num_points / 4;
-    for (; number < quarterPoints; number++) {
-        // Load the complex values
-        input1 = _mm_load_ps(inputPtr);
-        inputPtr += 4;
-        input2 = _mm_load_ps(inputPtr);
-        inputPtr += 4;
-
-        // Apply the normalization factor
-        input1 = _mm_mul_ps(input1, invNormalizationFactor);
-        input2 = _mm_mul_ps(input2, invNormalizationFactor);
-
-        // Multiply each value by itself
-        // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
-        input1 = _mm_mul_ps(input1, input1);
-        // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
-        input2 = _mm_mul_ps(input2, input2);
-
-        // Horizontal add, to add (r*r) + (i*i) for each complex value
-        // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
-        power = _mm_hadd_ps(input1, input2);
-
-        // Calculate the natural log power
-        power = logf4(power);
-
-        // Convert to log10 and multiply by 10.0
-        power = _mm_mul_ps(power, magScalar);
-
-        // Store the floating point results
-        _mm_store_ps(destPtr, power);
-
-        destPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-#endif /* LV_HAVE_LIB_SIMDMATH */
-    // Calculate the FFT for any remaining points
-
-    for (; number < num_points; number++) {
-        // Calculate dBm
-        // 50 ohm load assumption
-        // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
-        // 75 ohm load assumption
-        // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
-
-        const float real = *inputPtr++ * iNormalizationFactor;
-        const float imag = *inputPtr++ * iNormalizationFactor;
-
-        *destPtr = volk_log2to10factor * log2f_non_ieee(((real * real) + (imag * imag)));
-
-        destPtr++;
-    }
-}
-#endif /* LV_HAVE_SSE3 */
-
 #ifdef LV_HAVE_NEON
 #include <arm_neon.h>
 #include <volk/volk_neon_intrinsics.h>
diff --git a/kernels/volk/volk_32fc_s32f_x2_power_spectral_density_32f.h b/kernels/volk/volk_32fc_s32f_x2_power_spectral_density_32f.h
index 4d75c9dbf..bdc66b864 100644
--- a/kernels/volk/volk_32fc_s32f_x2_power_spectral_density_32f.h
+++ b/kernels/volk/volk_32fc_s32f_x2_power_spectral_density_32f.h
@@ -46,185 +46,6 @@
 #include <math.h>
 #include <stdio.h>
 
-#ifdef LV_HAVE_AVX
-#include <immintrin.h>
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-#include <simdmath.h>
-#endif /* LV_HAVE_LIB_SIMDMATH */
-
-static inline void
-volk_32fc_s32f_x2_power_spectral_density_32f_a_avx(float* logPowerOutput,
-                                                   const lv_32fc_t* complexFFTInput,
-                                                   const float normalizationFactor,
-                                                   const float rbw,
-                                                   unsigned int num_points)
-{
-    const float* inputPtr = (const float*)complexFFTInput;
-    float* destPtr = logPowerOutput;
-    uint64_t number = 0;
-    const float iRBW = 1.0 / rbw;
-    const float iNormalizationFactor = 1.0 / normalizationFactor;
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-    __m256 magScalar = _mm256_set1_ps(10.0);
-    magScalar = _mm256_div_ps(magScalar, logf4(magScalar));
-
-    __m256 invRBW = _mm256_set1_ps(iRBW);
-
-    __m256 invNormalizationFactor = _mm256_set1_ps(iNormalizationFactor);
-
-    __m256 power;
-    __m256 input1, input2;
-    const uint64_t eighthPoints = num_points / 8;
-    for (; number < eighthPoints; number++) {
-        // Load the complex values
-        input1 = _mm256_load_ps(inputPtr);
-        inputPtr += 8;
-        input2 = _mm256_load_ps(inputPtr);
-        inputPtr += 8;
-
-        // Apply the normalization factor
-        input1 = _mm256_mul_ps(input1, invNormalizationFactor);
-        input2 = _mm256_mul_ps(input2, invNormalizationFactor);
-
-        // Multiply each value by itself
-        // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
-        input1 = _mm256_mul_ps(input1, input1);
-        // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
-        input2 = _mm256_mul_ps(input2, input2);
-
-        // Horizontal add, to add (r*r) + (i*i) for each complex value
-        // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
-        inputVal1 = _mm256_permute2f128_ps(input1, input2, 0x20);
-        inputVal2 = _mm256_permute2f128_ps(input1, input2, 0x31);
-
-        power = _mm256_hadd_ps(inputVal1, inputVal2);
-
-        // Divide by the rbw
-        power = _mm256_mul_ps(power, invRBW);
-
-        // Calculate the natural log power
-        power = logf4(power);
-
-        // Convert to log10 and multiply by 10.0
-        power = _mm256_mul_ps(power, magScalar);
-
-        // Store the floating point results
-        _mm256_store_ps(destPtr, power);
-
-        destPtr += 8;
-    }
-
-    number = eighthPoints * 8;
-#endif /* LV_HAVE_LIB_SIMDMATH */
-    // Calculate the FFT for any remaining points
-    for (; number < num_points; number++) {
-        // Calculate dBm
-        // 50 ohm load assumption
-        // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
-        // 75 ohm load assumption
-        // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
-
-        const float real = *inputPtr++ * iNormalizationFactor;
-        const float imag = *inputPtr++ * iNormalizationFactor;
-
-        *destPtr = volk_log2to10factor *
-                   log2f_non_ieee((((real * real) + (imag * imag))) * iRBW);
-        destPtr++;
-    }
-}
-#endif /* LV_HAVE_AVX */
-
-#ifdef LV_HAVE_SSE3
-#include <pmmintrin.h>
-
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-#include <simdmath.h>
-#endif /* LV_HAVE_LIB_SIMDMATH */
-
-static inline void
-volk_32fc_s32f_x2_power_spectral_density_32f_a_sse3(float* logPowerOutput,
-                                                    const lv_32fc_t* complexFFTInput,
-                                                    const float normalizationFactor,
-                                                    const float rbw,
-                                                    unsigned int num_points)
-{
-    const float* inputPtr = (const float*)complexFFTInput;
-    float* destPtr = logPowerOutput;
-    uint64_t number = 0;
-    const float iRBW = 1.0 / rbw;
-    const float iNormalizationFactor = 1.0 / normalizationFactor;
-
-#ifdef LV_HAVE_LIB_SIMDMATH
-    __m128 magScalar = _mm_set_ps1(10.0);
-    magScalar = _mm_div_ps(magScalar, logf4(magScalar));
-
-    __m128 invRBW = _mm_set_ps1(iRBW);
-
-    __m128 invNormalizationFactor = _mm_set_ps1(iNormalizationFactor);
-
-    __m128 power;
-    __m128 input1, input2;
-    const uint64_t quarterPoints = num_points / 4;
-    for (; number < quarterPoints; number++) {
-        // Load the complex values
-        input1 = _mm_load_ps(inputPtr);
-        inputPtr += 4;
-        input2 = _mm_load_ps(inputPtr);
-        inputPtr += 4;
-
-        // Apply the normalization factor
-        input1 = _mm_mul_ps(input1, invNormalizationFactor);
-        input2 = _mm_mul_ps(input2, invNormalizationFactor);
-
-        // Multiply each value by itself
-        // (r1*r1), (i1*i1), (r2*r2), (i2*i2)
-        input1 = _mm_mul_ps(input1, input1);
-        // (r3*r3), (i3*i3), (r4*r4), (i4*i4)
-        input2 = _mm_mul_ps(input2, input2);
-
-        // Horizontal add, to add (r*r) + (i*i) for each complex value
-        // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4)
-        power = _mm_hadd_ps(input1, input2);
-
-        // Divide by the rbw
-        power = _mm_mul_ps(power, invRBW);
-
-        // Calculate the natural log power
-        power = logf4(power);
-
-        // Convert to log10 and multiply by 10.0
-        power = _mm_mul_ps(power, magScalar);
-
-        // Store the floating point results
-        _mm_store_ps(destPtr, power);
-
-        destPtr += 4;
-    }
-
-    number = quarterPoints * 4;
-#endif /* LV_HAVE_LIB_SIMDMATH */
-    // Calculate the FFT for any remaining points
-    for (; number < num_points; number++) {
-        // Calculate dBm
-        // 50 ohm load assumption
-        // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
-        // 75 ohm load assumption
-        // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
-
-        const float real = *inputPtr++ * iNormalizationFactor;
-        const float imag = *inputPtr++ * iNormalizationFactor;
-
-        *destPtr = volk_log2to10factor *
-                   log2f_non_ieee((((real * real) + (imag * imag))) * iRBW);
-        destPtr++;
-    }
-}
-#endif /* LV_HAVE_SSE3 */
-
-
 #ifdef LV_HAVE_GENERIC
 
 static inline void