Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

x86 AVX512: add support for the _mm512_alignr_epi32 instruction #1265

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ simde_avx512_families = [
'abs',
'add',
'adds',
'align',
'and',
'andnot',
'avg',
Expand Down
73 changes: 73 additions & 0 deletions simde/x86/avx512/align.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2020 Evan Nemerson <[email protected]>
* 2020 Christopher Moore <[email protected]>
* 2023 Michael R. Crusoe <[email protected]>
Comment on lines +24 to +26
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
* 2020 Evan Nemerson <evan@nemerson.com>
* 2020 Christopher Moore <moore@free.fr>
* 2023 Michael R. Crusoe <crusoe@debian.org>
* 2025 Fabian Penezic <fabian.penezic@fer.hr>

When making a new file, you should only list yourself; thanks!

*/

#if !defined(SIMDE_X86_AVX512_ALIGN_H)
#define SIMDE_X86_AVX512_ALIGN_H

#include "types.h"
#include "../avx2.h"
#include "mov.h"
#include "extract.h"

#include <stdio.h>
Comment on lines +33 to +37
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
#include "../avx2.h"
#include "mov.h"
#include "extract.h"
#include <stdio.h>
#include "setzero.h"


HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_


SIMDE_FUNCTION_ATTRIBUTES
simde__m512i
simde_mm512_alignr_epi32 (simde__m512i a, simde__m512i b, const int imm8){
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
simde_mm512_alignr_epi32 (simde__m512i a, simde__m512i b, const int imm8){
simde_mm512_alignr_epi32 (simde__m512i a, simde__m512i b, const int imm8)
SIMDE_REQUIRE_CONSTANT_RANGE(count, 0, 255) {

simde__m512i_private
a_p = simde__m512i_to_private(a),
b_p = simde__m512i_to_private(b),
r_p;
Comment on lines +48 to +50
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
a_p = simde__m512i_to_private(a),
b_p = simde__m512i_to_private(b),
r_p;
a_ = simde__m512i_to_private(a),
b_ = simde__m512i_to_private(b),
r_;

To be consistent with the other SIMDe code, we name the private versions of variables with just a _ suffix


Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (HEDLEY_UNLIKELY(count > 31))
return simde_mm512_setzero_epi32();

size_t len = sizeof(a_p)/sizeof(a_p.i32[0]);

for (size_t i = 0; i < (0xF & imm8); i++) {
r_p.i32[len-i-1] = a_p.i32[(0xF & imm8) - i - 1];
}

for (size_t i = (imm8 & 0xF), j=0; i < len; i++) {
r_p.i32[len - i - 1] = b_p.i32[len - j - 1];
j++;
}
Comment on lines +52 to +61
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please review how simde_mm256_alignr_epi8 was implemented. Please try to match the same style and include the SIMDE_VECTORIZE macro.


return simde__m512i_from_private(r_p);
}

#if defined(SIMDE_X86_AVX512F_NATIVE)
#define simde_mm512_alignr_epi32(a, b, imm8) _mm512_alignr_epi32(a, b, imm8);
#endif

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
#undef _mm512_alignr_epi32
#define _mm512_alignr_epi32(a, b, count) simde_mm512_alignr_epi32((a), (b), (count))
#endif

SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP

#endif /* !defined(SIMDE_X86_AVX512_ALGIN_H) */
78 changes: 78 additions & 0 deletions test/x86/avx512/align.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#define SIMDE_TEST_X86_AVX512_INSN align

#include <test/x86/avx512/test-avx512.h>
#include <simde/x86/avx512/set.h>
#include <simde/x86/avx512/setzero.h>
#include <simde/x86/avx512/align.h>
#include <stdio.h>

static int
test_simde_mm512_alignr_epi32(SIMDE_MUNIT_TEST_ARGS) {
const struct {
simde__m512i a;
simde__m512i b;
const simde__mmask8 imm8;
simde__m512i r;
} test_vec[] = {
{ simde_mm512_set_epi32(INT32_C(168), INT32_C( 54), INT32_C( 25), INT32_C( 16),
INT32_C( 42), INT32_C( 64), INT32_C(892), INT32_C( 79),
INT32_C( 35), INT32_C( 14), INT32_C(522), INT32_C( 49),
INT32_C( 42), INT32_C( 64), INT32_C( 7), INT32_C( 19)),
simde_mm512_set_epi32(INT32_C( 1), INT32_C( 2), INT32_C( 3), INT32_C( 4),
INT32_C( 5), INT32_C( 6), INT32_C( 7), INT32_C( 8),
INT32_C( 9), INT32_C( 10), INT32_C( 11), INT32_C( 12),
INT32_C( 13), INT32_C( 14), INT32_C( 15), INT32_C( 16)),
1,
simde_mm512_set_epi32(INT32_C( 19), INT32_C( 1), INT32_C( 2), INT32_C( 3),
INT32_C( 4), INT32_C( 5), INT32_C( 6), INT32_C( 7),
INT32_C( 8), INT32_C( 9), INT32_C( 10), INT32_C( 11),
INT32_C( 12), INT32_C( 13), INT32_C( 14), INT32_C( 15)),
},
{ simde_mm512_set_epi32(INT32_C(168), INT32_C( 54), INT32_C( 25), INT32_C( 16),
INT32_C( 42), INT32_C( 64), INT32_C(892), INT32_C( 79),
INT32_C( 35), INT32_C( 14), INT32_C(522), INT32_C( 49),
INT32_C( 42), INT32_C( 64), INT32_C( 7), INT32_C( 19)),
simde_mm512_set_epi32(INT32_C( 1), INT32_C( 2), INT32_C( 3), INT32_C( 4),
INT32_C( 5), INT32_C( 6), INT32_C( 7), INT32_C( 8),
INT32_C( 9), INT32_C( 10), INT32_C( 11), INT32_C( 12),
INT32_C( 13), INT32_C( 14), INT32_C( 15), INT32_C( 16)),
17,
simde_mm512_set_epi32(INT32_C( 19), INT32_C( 1), INT32_C( 2), INT32_C( 3),
INT32_C( 4), INT32_C( 5), INT32_C( 6), INT32_C( 7),
INT32_C( 8), INT32_C( 9), INT32_C( 10), INT32_C( 11),
INT32_C( 12), INT32_C( 13), INT32_C( 14), INT32_C( 15)),
},
{
simde_mm512_set_epi32(INT32_C(8642), INT32_C(7802), INT32_C(1252), INT32_C(1585),
INT32_C(3509), INT32_C(5362), INT32_C(8605), INT32_C(5927),
INT32_C(6701), INT32_C(3014), INT32_C(2816), INT32_C(2818),
INT32_C(6544), INT32_C(9829), INT32_C(7991), INT32_C(1111)),
simde_mm512_set_epi32(INT32_C(4513), INT32_C(3854), INT32_C(4402), INT32_C(6551),
INT32_C(5642), INT32_C(3525), INT32_C(3873), INT32_C(5884),
INT32_C(3443), INT32_C(1354), INT32_C(1070), INT32_C(5500),
INT32_C(2349), INT32_C(1754), INT32_C(8819), INT32_C( 716)),
4,
simde_mm512_set_epi32(INT32_C(6544), INT32_C(9829), INT32_C(7991), INT32_C(1111),
INT32_C(4513), INT32_C(3854), INT32_C(4402), INT32_C(6551),
INT32_C(5642), INT32_C(3525), INT32_C(3873), INT32_C(5884),
INT32_C(3443), INT32_C(1354), INT32_C(1070), INT32_C(5500)),
}
};

simde__m512i r = simde_mm512_alignr_epi32(test_vec[0].a, test_vec[0].b, 1);
simde_assert_m512i_i32(r, ==, test_vec[0].r);

r = simde_mm512_alignr_epi32(test_vec[1].a, test_vec[1].b, 17);
simde_assert_m512i_i32(r, ==, test_vec[1].r);

r = simde_mm512_alignr_epi32(test_vec[2].a, test_vec[2].b, 4);
simde_assert_m512i_i32(r, ==, test_vec[2].r);

return 0;
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for including a test! Did you generate the test value on a AVX512F capable CPU?

Please include the code that you used to generate the test values:

#else
fputc('\n', stdout);
for (int i = 0; i < 8; i++) {
simde__m512i r, a = simde_test_x86_random_i32x16();
int32_t imm8 = simde_test_codegen_random_i8() & 3;
SIMDE_CONSTIFY_4_(simde_mm512_shuffle_epi32, r, (HEDLEY_UNREACHABLE(), a), imm8, a);
simde_test_x86_write_i32x16(2, a, SIMDE_TEST_VEC_POS_FIRST);
simde_test_codegen_write_i32(2, imm8, SIMDE_TEST_VEC_POS_MIDDLE);
simde_test_x86_write_i32x16(2, r, SIMDE_TEST_VEC_POS_LAST);
}
return 1;
#endif


SIMDE_TEST_FUNC_LIST_BEGIN
SIMDE_TEST_FUNC_LIST_ENTRY(mm512_alignr_epi32)
SIMDE_TEST_FUNC_LIST_END

#include <test/x86/avx512/test-avx512-footer.h>
Loading