Skip to content

Commit

Permalink
Merge pull request #734 from argilo/remove-volk-volatile
Browse files Browse the repository at this point in the history
Remove __VOLK_VOLATILE
  • Loading branch information
jdemel authored Jan 7, 2024
2 parents 67cf98a + fee5f67 commit c5b539e
Show file tree
Hide file tree
Showing 4 changed files with 2 additions and 104 deletions.
5 changes: 0 additions & 5 deletions include/volk/volk_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
#define __VOLK_ATTR_IMPORT __declspec(dllimport)
#define __VOLK_PREFETCH(addr)
#define __VOLK_ASM __asm
#define __VOLK_VOLATILE
#elif defined(__clang__)
// AppleClang also defines __GNUC__, so do this check first. These
// will probably be the same as for __GNUC__, but let's keep them
Expand All @@ -33,7 +32,6 @@
#define __VOLK_ATTR_INLINE __attribute__((always_inline))
#define __VOLK_ATTR_DEPRECATED __attribute__((deprecated))
#define __VOLK_ASM __asm__
#define __VOLK_VOLATILE __volatile__
#define __VOLK_ATTR_EXPORT __attribute__((visibility("default")))
#define __VOLK_ATTR_IMPORT __attribute__((visibility("default")))
#define __VOLK_PREFETCH(addr) __builtin_prefetch(addr)
Expand All @@ -43,7 +41,6 @@
#define __VOLK_ATTR_INLINE __attribute__((always_inline))
#define __VOLK_ATTR_DEPRECATED __attribute__((deprecated))
#define __VOLK_ASM __asm__
#define __VOLK_VOLATILE __volatile__
#if __GNUC__ >= 4
#define __VOLK_ATTR_EXPORT __attribute__((visibility("default")))
#define __VOLK_ATTR_IMPORT __attribute__((visibility("default")))
Expand All @@ -61,7 +58,6 @@
#define __VOLK_ATTR_IMPORT __declspec(dllimport)
#define __VOLK_PREFETCH(addr)
#define __VOLK_ASM __asm
#define __VOLK_VOLATILE
#else
#define __VOLK_ATTR_ALIGNED(x)
#define __VOLK_ATTR_UNUSED
Expand All @@ -71,7 +67,6 @@
#define __VOLK_ATTR_IMPORT
#define __VOLK_PREFETCH(addr)
#define __VOLK_ASM __asm__
#define __VOLK_VOLATILE __volatile__
#endif

////////////////////////////////////////////////////////////////////////
Expand Down
60 changes: 0 additions & 60 deletions kernels/volk/volk_16i_x4_quad_max_star_16i.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,66 +120,6 @@ static inline void volk_16i_x4_quad_max_star_16i_a_sse2(short* target,
p_target += 1;
}


/*__VOLK_ASM __VOLK_VOLATILE
(
"volk_16i_x4_quad_max_star_16i_a_sse2_L1:\n\t"
"cmp $0, %[bound]\n\t"
"je volk_16i_x4_quad_max_star_16i_a_sse2_END\n\t"
"movaps (%[src0]), %%xmm1\n\t"
"movaps (%[src1]), %%xmm2\n\t"
"movaps (%[src2]), %%xmm3\n\t"
"movaps (%[src3]), %%xmm4\n\t"
"pxor %%xmm5, %%xmm5\n\t"
"pxor %%xmm6, %%xmm6\n\t"
"movaps %%xmm1, %%xmm7\n\t"
"movaps %%xmm3, %%xmm8\n\t"
"psubw %%xmm2, %%xmm1\n\t"
"psubw %%xmm4, %%xmm3\n\t"
"pcmpgtw %%xmm1, %%xmm5\n\t"
"pcmpgtw %%xmm3, %%xmm6\n\t"
"pand %%xmm5, %%xmm2\n\t"
"pand %%xmm6, %%xmm4\n\t"
"pandn %%xmm7, %%xmm5\n\t"
"pandn %%xmm8, %%xmm6\n\t"
"paddw %%xmm2, %%xmm5\n\t"
"paddw %%xmm4, %%xmm6\n\t"
"pxor %%xmm1, %%xmm1\n\t"
"movaps %%xmm5, %%xmm2\n\t"
"psubw %%xmm6, %%xmm5\n\t"
"add $16, %[src0]\n\t"
"add $-1, %[bound]\n\t"
"pcmpgtw %%xmm5, %%xmm1\n\t"
"add $16, %[src1]\n\t"
"pand %%xmm1, %%xmm6\n\t"
"pandn %%xmm2, %%xmm1\n\t"
"add $16, %[src2]\n\t"
"paddw %%xmm6, %%xmm1\n\t"
"add $16, %[src3]\n\t"
"movaps %%xmm1, (%[target])\n\t"
"addw $16, %[target]\n\t"
"jmp volk_16i_x4_quad_max_star_16i_a_sse2_L1\n\t"
"volk_16i_x4_quad_max_star_16i_a_sse2_END:\n\t"
:
:[bound]"r"(bound), [src0]"r"(src0), [src1]"r"(src1), [src2]"r"(src2),
[src3]"r"(src3), [target]"r"(target)
:
);
*/

short temp0 = 0;
short temp1 = 0;
for (i = bound * 8; i < (bound * 8) + leftovers; ++i) {
Expand Down
37 changes: 0 additions & 37 deletions kernels/volk/volk_16i_x5_add_quad_16i_x4.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,43 +116,6 @@ static inline void volk_16i_x5_add_quad_16i_x4_a_sse2(short* target0,
p_target2 += 1;
p_target3 += 1;
}
/*__VOLK_ASM __VOLK_VOLATILE
(
".%=volk_16i_x5_add_quad_16i_x4_a_sse2_L1:\n\t"
"cmp $0, %[bound]\n\t"
"je .%=volk_16i_x5_add_quad_16i_x4_a_sse2_END\n\t"
"movaps (%[src0]), %%xmm1\n\t"
"movaps (%[src1]), %%xmm2\n\t"
"movaps (%[src2]), %%xmm3\n\t"
"movaps (%[src3]), %%xmm4\n\t"
"movaps (%[src4]), %%xmm5\n\t"
"add $16, %[src0]\n\t"
"add $16, %[src1]\n\t"
"add $16, %[src2]\n\t"
"add $16, %[src3]\n\t"
"add $16, %[src4]\n\t"
"paddw %%xmm1, %%xmm2\n\t"
"paddw %%xmm1, %%xmm3\n\t"
"paddw %%xmm1, %%xmm4\n\t"
"paddw %%xmm1, %%xmm5\n\t"
"add $-1, %[bound]\n\t"
"movaps %%xmm2, (%[target0])\n\t"
"movaps %%xmm3, (%[target1])\n\t"
"movaps %%xmm4, (%[target2])\n\t"
"movaps %%xmm5, (%[target3])\n\t"
"add $16, %[target0]\n\t"
"add $16, %[target1]\n\t"
"add $16, %[target2]\n\t"
"add $16, %[target3]\n\t"
"jmp .%=volk_16i_x5_add_quad_16i_x4_a_sse2_L1\n\t"
".%=volk_16i_x5_add_quad_16i_x4_a_sse2_END:\n\t"
:
:[bound]"r"(bound), [src0]"r"(src0), [src1]"r"(src1), [src2]"r"(src2),
[src3]"r"(src3), [src4]"r"(src4), [target0]"r"(target0), [target1]"r"(target1),
[target2]"r"(target2), [target3]"r"(target3)
:"xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
*/

for (i = bound * 8; i < (bound * 8) + leftovers; ++i) {
target0[i] = src0[i] + src1[i];
Expand Down
4 changes: 2 additions & 2 deletions lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,9 @@ check_c_source_compiles("#include <arm_neon.h>\nint main(){ uint8_t *dest; uint8

if(neon_compile_result)
set(CMAKE_REQUIRED_INCLUDES ${PROJECT_SOURCE_DIR}/include)
check_c_source_compiles("#include <volk/volk_common.h>\n int main(){__VOLK_ASM __VOLK_VOLATILE(\"vrev32.8 q0, q0\");}"
check_c_source_compiles("#include <volk/volk_common.h>\n int main(){__VOLK_ASM(\"vrev32.8 q0, q0\");}"
have_neonv7_result )
check_c_source_compiles("#include <volk/volk_common.h>\n int main(){__VOLK_ASM __VOLK_VOLATILE(\"sub v1.4s,v1.4s,v1.4s\");}"
check_c_source_compiles("#include <volk/volk_common.h>\n int main(){__VOLK_ASM(\"sub v1.4s,v1.4s,v1.4s\");}"
have_neonv8_result )

if (NOT have_neonv7_result)
Expand Down

0 comments on commit c5b539e

Please sign in to comment.