blob: 1d29329a2a98273365921a4dc64571553bd94f19 [file] [log] [blame]
/*
* Test load and store instructions.
* Here we check for _mm512_[mask|maskz]_[loadu|storeu] intrinsics.
*/
#include "m512_test_util.h"
V512 src_vals[2];
V512 all_ones;
volatile int vol0 = 0;
void NOINLINE init() {
volatile int i;
int j;
for (i = 0; i < sizeof(src_vals) / sizeof(src_vals[0]); i++) {
for (j = 0; j < 16; j++) {
src_vals[i].s32[j] = 16 * i + j;
}
}
for (i = 0; i < 16; i++) {
all_ones.s32[i] = -1;
}
}
void NOINLINE do_loadu() {
V512 res;
V512 expected;
__mmask64 k64 = 0xfbde79feffeeffee;
__mmask32 k32 = 0xbfde79fe;
__mmask16 k16 = 0xbfde;
__mmask8 k8 = 0xaf;
volatile int i;
signed char *p8 = &src_vals[0].s8[0];
short *p16 = &src_vals[0].s16[0];
int *p = &src_vals[0].s32[0];
__int64 *p64 = &src_vals[0].s64[0];
res.zmm = _mm512_loadu_ps(&src_vals[0].s32[1]);
for (i = 0; i < 16; i++) {
expected.s32[i] = p[i + 1];
}
check_equal_nd(&res, &expected, 16, "_mm512_loadu_ps", __LINE__);
res.zmmd = _mm512_loadu_pd(&src_vals[0].s32[2]);
for (i = 0; i < 16; i++) {
expected.s32[i] = p[i + 2];
}
check_equal_nd(&res, &expected, 16, "_mm512_loadu_pd", __LINE__);
res.zmmi = _mm512_loadu_si512(&src_vals[0].s32[3]);
for (i = 0; i < 16; i++) {
expected.s32[i] = p[i + 3];
}
check_equal_nd(&res, &expected, 16, "_mm512_loadu_si512", __LINE__);
/* Now the write-masked versions. */
res = all_ones;
expected = all_ones;
res.zmm = _mm512_mask_loadu_ps(res.zmm, k16, &src_vals[0].s32[5]);
for (i = 0; i < 16; i++) {
if ((1 << i) & k16) {
expected.s32[i] = p[i + 5];
}
}
check_equal_nd(&res, &expected, 16, "_mm512_mask_loadu_ps", __LINE__);
k64 += vol0;
res = all_ones;
expected = all_ones;
res.zmmi = _mm512_mask_loadu_epi8(res.zmmi, k64, &src_vals[0].s8[7]);
for (i = 0; i < 64; i++) {
if (((__mmask64)1 << i) & k64) {
expected.s8[i] = p8[i + 7];
}
}
check_equal_nd(&res, &expected, 16, "_mm512_mask_loadu_epi8", __LINE__);
k64 += vol0;
res = all_ones;
expected.zmmi = _mm512_setzero_epi32();
res.zmmi = _mm512_maskz_loadu_epi8(k64, &src_vals[0].s8[9]);
for (i = 0; i < 64; i++) {
if (((__mmask64)1 << i) & k64) {
expected.s8[i] = p8[i + 9];
}
}
check_equal_nd(&res, &expected, 16, "_mm512_maskz_loadu_epi8", __LINE__);
k32 += vol0;
res = all_ones;
expected = all_ones;
res.zmmi = _mm512_mask_loadu_epi16(res.zmmi, k32, &src_vals[0].s16[5]);
for (i = 0; i < 32; i++) {
if ((1 << i) & k32) {
expected.s16[i] = p16[i + 5];
}
}
check_equal_nd(&res, &expected, 16, "_mm512_mask_loadu_epi16", __LINE__);
k32 += vol0;
res = all_ones;
expected.zmmi = _mm512_setzero_epi32();
res.zmmi = _mm512_maskz_loadu_epi16(k32, &src_vals[0].s16[3]);
for (i = 0; i < 32; i++) {
if ((1 << i) & k32) {
expected.s16[i] = p16[i + 3];
}
}
check_equal_nd(&res, &expected, 16, "_mm512_maskz_loadu_epi16", __LINE__);
k16 = 0xabcd + vol0;
res = all_ones;
expected = all_ones;
res.zmmi = _mm512_mask_loadu_epi32(res.zmmi, k16, &src_vals[0].s32[7]);
for (i = 0; i < 16; i++) {
if ((1 << i) & k16) {
expected.s32[i] = p[i + 7];
}
}
check_equal_nd(&res, &expected, 16, "_mm512_mask_loadu_epi32", __LINE__);
res = all_ones;
expected = all_ones;
res.zmmd = _mm512_mask_loadu_pd(res.zmmd, k8, &src_vals[0].s64[2]);
for (i = 0; i < 8; i++) {
if ((1 << i) & k8) {
expected.s64[i] = p64[i + 2];
}
}
check_equal_nd(&res, &expected, 16, "_mm512_mask_loadu_pd", __LINE__);
k8 = 0x79 + vol0;
res = all_ones;
expected = all_ones;
res.zmmi = _mm512_mask_loadu_epi64(res.zmmi, k8, &src_vals[0].s64[3]);
for (i = 0; i < 8; i++) {
if ((1 << i) & k8) {
expected.s64[i] = p64[i + 3];
}
}
check_equal_nd(&res, &expected, 16, "_mm512_mask_loadu_epi64", __LINE__);
}
void NOINLINE do_storeu() {
V512 src;
V512 expected;
volatile int i;
static V512 dst_vals[2];
__mmask64 k64 = 0xabcdffffffffeebd;
__mmask32 k32 = 0xfefebdbd;
__mmask16 k16 = 0x79ab;
__mmask8 k8 = 0xea;
src.zmmi = src_vals[0].zmmi;
dst_vals[0].zmm = _mm512_setzero_ps();
dst_vals[1].zmm = _mm512_setzero_ps();
_mm512_storeu_si512(&dst_vals[0].s32[1], src.zmmi);
check_equal_nd(&dst_vals[0].s32[1], &src_vals, 16, "_mm512_storeu_si512",
__LINE__);
dst_vals[0].zmm = _mm512_setzero_ps();
dst_vals[1].zmm = _mm512_setzero_ps();
_mm512_storeu_ps(&dst_vals[0].s32[2], src.zmm);
check_equal_nd(&dst_vals[0].s32[2], &src_vals, 16, "_mm512_storeu_pd",
__LINE__);
dst_vals[0].zmm = _mm512_setzero_ps();
dst_vals[1].zmm = _mm512_setzero_ps();
_mm512_storeu_pd(&dst_vals[0].s32[4], src.zmmd);
check_equal_nd(&dst_vals[0].s32[4], &src_vals, 16, "_mm512_storeu_pd",
__LINE__);
/* Now the write-masked versions. */
dst_vals[0] = all_ones;
dst_vals[1] = all_ones;
_mm512_mask_storeu_epi8(&dst_vals[0].s8[3], k64, src.zmmi);
expected = all_ones;
for (i = 0; i < 64; i++) {
if (((__mmask64)1 << i) & k64) {
expected.s8[i] = src.s8[i];
}
}
check_equal_nd(&dst_vals[0].s8[3], &expected, 16, "_mm512_mask_storeu_epi8",
__LINE__);
dst_vals[0] = all_ones;
dst_vals[1] = all_ones;
_mm512_mask_storeu_epi16(&dst_vals[0].s16[3], k32, src.zmmi);
expected = all_ones;
for (i = 0; i < 32; i++) {
if (((__mmask32)1 << i) & k32) {
expected.s16[i] = src.s16[i];
}
}
check_equal_nd(&dst_vals[0].s16[3], &expected, 16, "_mm512_mask_storeu_epi16",
__LINE__);
dst_vals[0] = all_ones;
dst_vals[1] = all_ones;
_mm512_mask_storeu_epi32(&dst_vals[0].s32[1], k16, src.zmmi);
expected = all_ones;
for (i = 0; i < 16; i++) {
if ((1 << i) & k16) {
expected.s32[i] = src.s32[i];
}
}
check_equal_nd(&dst_vals[0].s32[1], &expected, 16, "_mm512_mask_storeu_epi32",
__LINE__);
k16 = 0xdcba + vol0;
dst_vals[0] = all_ones;
dst_vals[1] = all_ones;
_mm512_mask_storeu_ps(&dst_vals[0].s32[3], k16, src.zmm);
expected = all_ones;
for (i = 0; i < 16; i++) {
if ((1 << i) & k16) {
expected.s32[i] = src.s32[i];
}
}
check_equal_nd(&dst_vals[0].s32[3], &expected, 16, "_mm512_mask_storeu_ps",
__LINE__);
k8 = 0xbc;
dst_vals[0] = all_ones;
dst_vals[1] = all_ones;
_mm512_mask_storeu_pd(&dst_vals[0].s64[3], k8, src.zmmd);
expected = all_ones;
for (i = 0; i < 8; i++) {
if ((1 << i) & k8) {
expected.s64[i] = src.s64[i];
}
}
check_equal_nd(&dst_vals[0].s64[3], &expected, 16, "_mm512_mask_storeu_pd",
__LINE__);
k8 = 0xcb + vol0;
dst_vals[0] = all_ones;
dst_vals[1] = all_ones;
_mm512_mask_storeu_epi64(&dst_vals[0].s64[1], k8, src.zmmi);
expected = all_ones;
for (i = 0; i < 8; i++) {
if ((1 << i) & k8) {
expected.s64[i] = src.s64[i];
}
}
check_equal_nd(&dst_vals[0].s64[1], &expected, 16, "_mm512_mask_storeu_epi64",
__LINE__);
}
int main(int argc, char *argv[]) {
init();
do_loadu();
do_storeu();
if (n_errs != 0) {
printf("FAILED\n");
return 1;
}
printf("PASSED\n");
return 0;
}