From 4be612868985689ae05a375f895103f20a532967 Mon Sep 17 00:00:00 2001 From: Clayton Smith Date: Sun, 17 Dec 2023 10:28:27 -0500 Subject: [PATCH] Remove references to simdmath library Signed-off-by: Clayton Smith --- kernels/volk/volk_32f_s32f_power_32f.h | 117 ------------ kernels/volk/volk_32fc_s32f_power_32fc.h | 81 -------- ...fc_s32f_power_spectral_densitypuppet_32f.h | 30 --- .../volk/volk_32fc_s32f_power_spectrum_32f.h | 80 -------- ..._32fc_s32f_x2_power_spectral_density_32f.h | 179 ------------------ 5 files changed, 487 deletions(-) diff --git a/kernels/volk/volk_32f_s32f_power_32f.h b/kernels/volk/volk_32f_s32f_power_32f.h index 0e8536537..51b7b698b 100644 --- a/kernels/volk/volk_32f_s32f_power_32f.h +++ b/kernels/volk/volk_32f_s32f_power_32f.h @@ -62,123 +62,6 @@ #include #include -#ifdef LV_HAVE_SSE4_1 -#include - -#ifdef LV_HAVE_LIB_SIMDMATH -#include -#endif /* LV_HAVE_LIB_SIMDMATH */ - -static inline void volk_32f_s32f_power_32f_a_sse4_1(float* cVector, - const float* aVector, - const float power, - unsigned int num_points) -{ - unsigned int number = 0; - - float* cPtr = cVector; - const float* aPtr = aVector; - -#ifdef LV_HAVE_LIB_SIMDMATH - const unsigned int quarterPoints = num_points / 4; - __m128 vPower = _mm_set_ps1(power); - __m128 zeroValue = _mm_setzero_ps(); - __m128 signMask; - __m128 negatedValues; - __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power)); - __m128 onesMask = _mm_set_ps1(1); - - __m128 aVal, cVal; - for (; number < quarterPoints; number++) { - - aVal = _mm_load_ps(aPtr); - signMask = _mm_cmplt_ps(aVal, zeroValue); - negatedValues = _mm_sub_ps(zeroValue, aVal); - aVal = _mm_blendv_ps(aVal, negatedValues, signMask); - - // powf4 doesn't support negative values in the base, so we mask them off and then - // apply the negative after - cVal = powf4(aVal, vPower); // Takes each input value to the specified power - - cVal = _mm_mul_ps(_mm_blendv_ps(onesMask, negativeOneToPower, signMask), cVal); - - _mm_store_ps(cPtr, cVal); // Store the results back into the C container - - aPtr += 4; - cPtr += 4; - } - - number = quarterPoints * 4; -#endif /* LV_HAVE_LIB_SIMDMATH */ - - for (; number < num_points; number++) { - *cPtr++ = powf((*aPtr++), power); - } -} - -#endif /* LV_HAVE_SSE4_1 */ - - -#ifdef LV_HAVE_SSE -#include - -#ifdef LV_HAVE_LIB_SIMDMATH -#include -#endif /* LV_HAVE_LIB_SIMDMATH */ - -static inline void volk_32f_s32f_power_32f_a_sse(float* cVector, - const float* aVector, - const float power, - unsigned int num_points) -{ - unsigned int number = 0; - - float* cPtr = cVector; - const float* aPtr = aVector; - -#ifdef LV_HAVE_LIB_SIMDMATH - const unsigned int quarterPoints = num_points / 4; - __m128 vPower = _mm_set_ps1(power); - __m128 zeroValue = _mm_setzero_ps(); - __m128 signMask; - __m128 negatedValues; - __m128 negativeOneToPower = _mm_set_ps1(powf(-1, power)); - __m128 onesMask = _mm_set_ps1(1); - - __m128 aVal, cVal; - for (; number < quarterPoints; number++) { - - aVal = _mm_load_ps(aPtr); - signMask = _mm_cmplt_ps(aVal, zeroValue); - negatedValues = _mm_sub_ps(zeroValue, aVal); - aVal = - _mm_or_ps(_mm_andnot_ps(signMask, aVal), _mm_and_ps(signMask, negatedValues)); - - // powf4 doesn't support negative values in the base, so we mask them off and then - // apply the negative after - cVal = powf4(aVal, vPower); // Takes each input value to the specified power - - cVal = _mm_mul_ps(_mm_or_ps(_mm_andnot_ps(signMask, onesMask), - _mm_and_ps(signMask, negativeOneToPower)), - cVal); - - _mm_store_ps(cPtr, cVal); // Store the results back into the C container - - aPtr += 4; - cPtr += 4; - } - - number = quarterPoints * 4; -#endif /* LV_HAVE_LIB_SIMDMATH */ - - for (; number < num_points; number++) { - *cPtr++ = powf((*aPtr++), power); - } -} - -#endif /* LV_HAVE_SSE */ - - #ifdef LV_HAVE_GENERIC static inline void volk_32f_s32f_power_32f_generic(float* cVector, diff --git a/kernels/volk/volk_32fc_s32f_power_32fc.h b/kernels/volk/volk_32fc_s32f_power_32fc.h index 0ac2c6cdc..96f35e788 100644 --- a/kernels/volk/volk_32fc_s32f_power_32fc.h +++ b/kernels/volk/volk_32fc_s32f_power_32fc.h @@ -56,87 +56,6 @@ static inline lv_32fc_t __volk_s32fc_s32f_power_s32fc_a(const lv_32fc_t exp, return mag * lv_cmake(-cosf(arg), sinf(arg)); } -#ifdef LV_HAVE_SSE -#include - -#ifdef LV_HAVE_LIB_SIMDMATH -#include -#endif /* LV_HAVE_LIB_SIMDMATH */ - -static inline void volk_32fc_s32f_power_32fc_a_sse(lv_32fc_t* cVector, - const lv_32fc_t* aVector, - const float power, - unsigned int num_points) -{ - unsigned int number = 0; - - lv_32fc_t* cPtr = cVector; - const lv_32fc_t* aPtr = aVector; - -#ifdef LV_HAVE_LIB_SIMDMATH - const unsigned int quarterPoints = num_points / 4; - __m128 vPower = _mm_set_ps1(power); - - __m128 cplxValue1, cplxValue2, magnitude, phase, iValue, qValue; - for (; number < quarterPoints; number++) { - - cplxValue1 = _mm_load_ps((float*)aPtr); - aPtr += 2; - - cplxValue2 = _mm_load_ps((float*)aPtr); - aPtr += 2; - - // Convert to polar coordinates - - // Arrange in i1i2i3i4 format - iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0)); - // Arrange in q1q2q3q4 format - qValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1)); - - phase = atan2f4(qValue, iValue); // Calculate the Phase - - magnitude = _mm_sqrt_ps( - _mm_add_ps(_mm_mul_ps(iValue, iValue), - _mm_mul_ps(qValue, qValue))); // Calculate the magnitude by square - // rooting the added I2 and Q2 values - - // Now calculate the power of the polar coordinate data - magnitude = powf4(magnitude, vPower); // Take the magnitude to the specified power - - phase = _mm_mul_ps(phase, vPower); // Multiply the phase by the specified power - - // Convert back to cartesian coordinates - iValue = _mm_mul_ps(cosf4(phase), - magnitude); // Multiply the cos of the phase by the magnitude - qValue = _mm_mul_ps(sinf4(phase), - magnitude); // Multiply the sin of the phase by the magnitude - - cplxValue1 = - _mm_unpacklo_ps(iValue, qValue); // Interleave the lower two i & q values - cplxValue2 = - _mm_unpackhi_ps(iValue, qValue); // Interleave the upper two i & q values - - _mm_store_ps((float*)cPtr, - cplxValue1); // Store the results back into the C container - - cPtr += 2; - - _mm_store_ps((float*)cPtr, - cplxValue2); // Store the results back into the C container - - cPtr += 2; - } - - number = quarterPoints * 4; -#endif /* LV_HAVE_LIB_SIMDMATH */ - - for (; number < num_points; number++) { - *cPtr++ = __volk_s32fc_s32f_power_s32fc_a((*aPtr++), power); - } -} -#endif /* LV_HAVE_SSE */ - - #ifdef LV_HAVE_GENERIC static inline void volk_32fc_s32f_power_32fc_generic(lv_32fc_t* cVector, diff --git a/kernels/volk/volk_32fc_s32f_power_spectral_densitypuppet_32f.h b/kernels/volk/volk_32fc_s32f_power_spectral_densitypuppet_32f.h index 127a512a1..e36a9ae21 100644 --- a/kernels/volk/volk_32fc_s32f_power_spectral_densitypuppet_32f.h +++ b/kernels/volk/volk_32fc_s32f_power_spectral_densitypuppet_32f.h @@ -15,36 +15,6 @@ #include -#ifdef LV_HAVE_AVX - -static inline void -volk_32fc_s32f_power_spectral_densitypuppet_32f_a_avx(float* logPowerOutput, - const lv_32fc_t* complexFFTInput, - const float normalizationFactor, - unsigned int num_points) -{ - volk_32fc_s32f_x2_power_spectral_density_32f_a_avx( - logPowerOutput, complexFFTInput, normalizationFactor, 2.5, num_points); -} - -#endif /* LV_HAVE_AVX */ - - -#ifdef LV_HAVE_SSE3 - -static inline void -volk_32fc_s32f_power_spectral_densitypuppet_32f_a_sse3(float* logPowerOutput, - const lv_32fc_t* complexFFTInput, - const float normalizationFactor, - unsigned int num_points) -{ - volk_32fc_s32f_x2_power_spectral_density_32f_a_sse3( - logPowerOutput, complexFFTInput, normalizationFactor, 2.5, num_points); -} - -#endif /* LV_HAVE_SSE3 */ - - #ifdef LV_HAVE_GENERIC static inline void diff --git a/kernels/volk/volk_32fc_s32f_power_spectrum_32f.h b/kernels/volk/volk_32fc_s32f_power_spectrum_32f.h index 5bc158e45..be9aa88a4 100644 --- a/kernels/volk/volk_32fc_s32f_power_spectrum_32f.h +++ b/kernels/volk/volk_32fc_s32f_power_spectrum_32f.h @@ -90,86 +90,6 @@ volk_32fc_s32f_power_spectrum_32f_generic(float* logPowerOutput, } #endif /* LV_HAVE_GENERIC */ -#ifdef LV_HAVE_SSE3 -#include - -#ifdef LV_HAVE_LIB_SIMDMATH -#include -#endif /* LV_HAVE_LIB_SIMDMATH */ - -static inline void -volk_32fc_s32f_power_spectrum_32f_a_sse3(float* logPowerOutput, - const lv_32fc_t* complexFFTInput, - const float normalizationFactor, - unsigned int num_points) -{ - const float* inputPtr = (const float*)complexFFTInput; - float* destPtr = logPowerOutput; - uint64_t number = 0; - const float iNormalizationFactor = 1.0 / normalizationFactor; -#ifdef LV_HAVE_LIB_SIMDMATH - __m128 magScalar = _mm_set_ps1(10.0); - magScalar = _mm_div_ps(magScalar, logf4(magScalar)); - - __m128 invNormalizationFactor = _mm_set_ps1(iNormalizationFactor); - - __m128 power; - __m128 input1, input2; - const uint64_t quarterPoints = num_points / 4; - for (; number < quarterPoints; number++) { - // Load the complex values - input1 = _mm_load_ps(inputPtr); - inputPtr += 4; - input2 = _mm_load_ps(inputPtr); - inputPtr += 4; - - // Apply the normalization factor - input1 = _mm_mul_ps(input1, invNormalizationFactor); - input2 = _mm_mul_ps(input2, invNormalizationFactor); - - // Multiply each value by itself - // (r1*r1), (i1*i1), (r2*r2), (i2*i2) - input1 = _mm_mul_ps(input1, input1); - // (r3*r3), (i3*i3), (r4*r4), (i4*i4) - input2 = _mm_mul_ps(input2, input2); - - // Horizontal add, to add (r*r) + (i*i) for each complex value - // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4) - power = _mm_hadd_ps(input1, input2); - - // Calculate the natural log power - power = logf4(power); - - // Convert to log10 and multiply by 10.0 - power = _mm_mul_ps(power, magScalar); - - // Store the floating point results - _mm_store_ps(destPtr, power); - - destPtr += 4; - } - - number = quarterPoints * 4; -#endif /* LV_HAVE_LIB_SIMDMATH */ - // Calculate the FFT for any remaining points - - for (; number < num_points; number++) { - // Calculate dBm - // 50 ohm load assumption - // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10) - // 75 ohm load assumption - // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15) - - const float real = *inputPtr++ * iNormalizationFactor; - const float imag = *inputPtr++ * iNormalizationFactor; - - *destPtr = volk_log2to10factor * log2f_non_ieee(((real * real) + (imag * imag))); - - destPtr++; - } -} -#endif /* LV_HAVE_SSE3 */ - #ifdef LV_HAVE_NEON #include #include diff --git a/kernels/volk/volk_32fc_s32f_x2_power_spectral_density_32f.h b/kernels/volk/volk_32fc_s32f_x2_power_spectral_density_32f.h index 4d75c9dbf..bdc66b864 100644 --- a/kernels/volk/volk_32fc_s32f_x2_power_spectral_density_32f.h +++ b/kernels/volk/volk_32fc_s32f_x2_power_spectral_density_32f.h @@ -46,185 +46,6 @@ #include #include -#ifdef LV_HAVE_AVX -#include - -#ifdef LV_HAVE_LIB_SIMDMATH -#include -#endif /* LV_HAVE_LIB_SIMDMATH */ - -static inline void -volk_32fc_s32f_x2_power_spectral_density_32f_a_avx(float* logPowerOutput, - const lv_32fc_t* complexFFTInput, - const float normalizationFactor, - const float rbw, - unsigned int num_points) -{ - const float* inputPtr = (const float*)complexFFTInput; - float* destPtr = logPowerOutput; - uint64_t number = 0; - const float iRBW = 1.0 / rbw; - const float iNormalizationFactor = 1.0 / normalizationFactor; - -#ifdef LV_HAVE_LIB_SIMDMATH - __m256 magScalar = _mm256_set1_ps(10.0); - magScalar = _mm256_div_ps(magScalar, logf4(magScalar)); - - __m256 invRBW = _mm256_set1_ps(iRBW); - - __m256 invNormalizationFactor = _mm256_set1_ps(iNormalizationFactor); - - __m256 power; - __m256 input1, input2; - const uint64_t eighthPoints = num_points / 8; - for (; number < eighthPoints; number++) { - // Load the complex values - input1 = _mm256_load_ps(inputPtr); - inputPtr += 8; - input2 = _mm256_load_ps(inputPtr); - inputPtr += 8; - - // Apply the normalization factor - input1 = _mm256_mul_ps(input1, invNormalizationFactor); - input2 = _mm256_mul_ps(input2, invNormalizationFactor); - - // Multiply each value by itself - // (r1*r1), (i1*i1), (r2*r2), (i2*i2) - input1 = _mm256_mul_ps(input1, input1); - // (r3*r3), (i3*i3), (r4*r4), (i4*i4) - input2 = _mm256_mul_ps(input2, input2); - - // Horizontal add, to add (r*r) + (i*i) for each complex value - // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4) - inputVal1 = _mm256_permute2f128_ps(input1, input2, 0x20); - inputVal2 = _mm256_permute2f128_ps(input1, input2, 0x31); - - power = _mm256_hadd_ps(inputVal1, inputVal2); - - // Divide by the rbw - power = _mm256_mul_ps(power, invRBW); - - // Calculate the natural log power - power = logf4(power); - - // Convert to log10 and multiply by 10.0 - power = _mm256_mul_ps(power, magScalar); - - // Store the floating point results - _mm256_store_ps(destPtr, power); - - destPtr += 8; - } - - number = eighthPoints * 8; -#endif /* LV_HAVE_LIB_SIMDMATH */ - // Calculate the FFT for any remaining points - for (; number < num_points; number++) { - // Calculate dBm - // 50 ohm load assumption - // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10) - // 75 ohm load assumption - // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15) - - const float real = *inputPtr++ * iNormalizationFactor; - const float imag = *inputPtr++ * iNormalizationFactor; - - *destPtr = volk_log2to10factor * - log2f_non_ieee((((real * real) + (imag * imag))) * iRBW); - destPtr++; - } -} -#endif /* LV_HAVE_AVX */ - -#ifdef LV_HAVE_SSE3 -#include - - -#ifdef LV_HAVE_LIB_SIMDMATH -#include -#endif /* LV_HAVE_LIB_SIMDMATH */ - -static inline void -volk_32fc_s32f_x2_power_spectral_density_32f_a_sse3(float* logPowerOutput, - const lv_32fc_t* complexFFTInput, - const float normalizationFactor, - const float rbw, - unsigned int num_points) -{ - const float* inputPtr = (const float*)complexFFTInput; - float* destPtr = logPowerOutput; - uint64_t number = 0; - const float iRBW = 1.0 / rbw; - const float iNormalizationFactor = 1.0 / normalizationFactor; - -#ifdef LV_HAVE_LIB_SIMDMATH - __m128 magScalar = _mm_set_ps1(10.0); - magScalar = _mm_div_ps(magScalar, logf4(magScalar)); - - __m128 invRBW = _mm_set_ps1(iRBW); - - __m128 invNormalizationFactor = _mm_set_ps1(iNormalizationFactor); - - __m128 power; - __m128 input1, input2; - const uint64_t quarterPoints = num_points / 4; - for (; number < quarterPoints; number++) { - // Load the complex values - input1 = _mm_load_ps(inputPtr); - inputPtr += 4; - input2 = _mm_load_ps(inputPtr); - inputPtr += 4; - - // Apply the normalization factor - input1 = _mm_mul_ps(input1, invNormalizationFactor); - input2 = _mm_mul_ps(input2, invNormalizationFactor); - - // Multiply each value by itself - // (r1*r1), (i1*i1), (r2*r2), (i2*i2) - input1 = _mm_mul_ps(input1, input1); - // (r3*r3), (i3*i3), (r4*r4), (i4*i4) - input2 = _mm_mul_ps(input2, input2); - - // Horizontal add, to add (r*r) + (i*i) for each complex value - // (r1*r1)+(i1*i1), (r2*r2) + (i2*i2), (r3*r3)+(i3*i3), (r4*r4)+(i4*i4) - power = _mm_hadd_ps(input1, input2); - - // Divide by the rbw - power = _mm_mul_ps(power, invRBW); - - // Calculate the natural log power - power = logf4(power); - - // Convert to log10 and multiply by 10.0 - power = _mm_mul_ps(power, magScalar); - - // Store the floating point results - _mm_store_ps(destPtr, power); - - destPtr += 4; - } - - number = quarterPoints * 4; -#endif /* LV_HAVE_LIB_SIMDMATH */ - // Calculate the FFT for any remaining points - for (; number < num_points; number++) { - // Calculate dBm - // 50 ohm load assumption - // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10) - // 75 ohm load assumption - // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15) - - const float real = *inputPtr++ * iNormalizationFactor; - const float imag = *inputPtr++ * iNormalizationFactor; - - *destPtr = volk_log2to10factor * - log2f_non_ieee((((real * real) + (imag * imag))) * iRBW); - destPtr++; - } -} -#endif /* LV_HAVE_SSE3 */ - - #ifdef LV_HAVE_GENERIC static inline void