From f726435d61e328da88f6850df232cc01b6b5a472 Mon Sep 17 00:00:00 2001 From: Alexey Slokva Date: Sat, 23 Dec 2023 17:32:51 -0500 Subject: [PATCH] Compute the minimum over both register lanes Signed-off-by: Clayton Smith --- kernels/volk/volk_8u_x4_conv_k7_r2_8u.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernels/volk/volk_8u_x4_conv_k7_r2_8u.h b/kernels/volk/volk_8u_x4_conv_k7_r2_8u.h index 7b3a3c20..161b9cd4 100644 --- a/kernels/volk/volk_8u_x4_conv_k7_r2_8u.h +++ b/kernels/volk/volk_8u_x4_conv_k7_r2_8u.h @@ -191,6 +191,7 @@ static inline void volk_8u_x4_conv_k7_r2_8u_avx2(unsigned char* Y, __m256i m5, m6; m5 = ((__m256i*)Y)[0]; m5 = _mm256_min_epu8(m5, ((__m256i*)Y)[1]); + m5 = ((__m256i)_mm256_min_epu8(_mm256_permute2x128_si256(m5, m5, 0x21), m5)); __m256i m7; m7 = _mm256_min_epu8(_mm256_srli_si256(m5, 8), m5); m7 = ((__m256i)_mm256_min_epu8(((__m256i)_mm256_srli_epi64(m7, 32)), @@ -270,6 +271,7 @@ static inline void volk_8u_x4_conv_k7_r2_8u_avx2(unsigned char* Y, __m256i m12, m13; m12 = ((__m256i*)X)[0]; m12 = _mm256_min_epu8(m12, ((__m256i*)X)[1]); + m12 = ((__m256i)_mm256_min_epu8(_mm256_permute2x128_si256(m12, m12, 0x21), m12)); __m256i m14; m14 = _mm256_min_epu8(_mm256_srli_si256(m12, 8), m12); m14 = ((__m256i)_mm256_min_epu8(((__m256i)_mm256_srli_epi64(m14, 32)),