blob: 4b6aeaaae7af20e06f82600eec2c56631cb5729a [file] [log] [blame]
/*
* Test mask_mov and maskz_mov intructions
*/
#include "m512_test_util.h"
#include <stdio.h>
__m512i i1;
__m512i i2;
__m512i i3;
__m512i i4;
__m512 f1;
__m512 f2;
__m512 f3;
__m512 f4;
__m512d d1;
__m512d d2;
__m512d d3;
__m512d d4;
volatile int vol = 0; /* To prevent optimizations */
void NOINLINE set_nonzero(void *vp, int c) {
int i;
V512 *v = (V512 *)vp;
for (i = 0; i < 16; i++) {
v->u32[i] = 10 * i * i - 3 * i + c + vol;
if (v->u32[i] == 0) {
v->u32[i] = 1234;
}
}
}
void NOINLINE check_equal32(void *vgot, void *vexpected, void *vexpected_orig,
int mask, char *banner) {
int i;
V512 *got = (V512 *)vgot;
V512 *expected = (V512 *)vexpected;
V512 *orig = (V512 *)vexpected_orig;
for (i = 0; i < 16; i++) {
int ans = (mask & (1 << i)) ? expected->s32[i] : orig->s32[i];
if (got->s32[i] != ans) {
printf("ERROR: %s failed -- 0x%0.8x != 0x%0.8x at element [%d]\n",
banner ? banner : "", got->s32[i], ans, i);
n_errs++;
break;
}
}
}
void NOINLINE check_equal64(void *vgot, void *vexpected, void *vexpected_orig,
int mask, char *banner) {
int i;
V512 *got = (V512 *)vgot;
V512 *expected = (V512 *)vexpected;
V512 *orig = (V512 *)vexpected_orig;
for (i = 0; i < 8; i++) {
__int64 ans = (mask & (1 << i)) ? expected->s64[i] : orig->s64[i];
if (got->s64[i] != ans) {
printf("ERROR: %s failed -- %0.16" PRIx64 " != %0.16" PRIx64
" at element [%d]\n",
banner ? banner : "", got->s64[i], ans, i);
n_errs++;
break;
}
}
}
void NOINLINE do_mask_mov_32() {
int k = 0xf2f3;
__m512i zeroi = _mm512_setzero_epi32();
set_nonzero(&i1, -97);
set_nonzero(&i2, 22);
set_nonzero(&i3, 22);
i3 = _mm512_mask_mov_epi32(i1, k, i2);
check_equal32(&i3, &i2, &i1, k, "_mm512_mask_mov_epi32");
i4 = _mm512_maskz_mov_epi32(k, i1);
check_equal32(&i4, &i1, &zeroi, k, "_mm512_maskz_mov_epi32");
set_nonzero(&f1, -96);
set_nonzero(&f2, 21);
set_nonzero(&f3, 1400);
f3 = _mm512_mask_mov_ps(f1, k, f2);
check_equal32(&f3, &f2, &f1, k, "_mm512_mask_mov_ps");
f4 = _mm512_maskz_mov_ps(k, f1);
check_equal32(&f4, &f1, &zeroi, k, "_mm512_maskz_mov_ps");
}
void NOINLINE do_mask_mov_64() {
__mmask8 k = 0x59;
__m512i zeroi = _mm512_setzero_epi32();
set_nonzero(&i1, -97);
set_nonzero(&i2, 22);
set_nonzero(&i3, 22);
i3 = _mm512_mask_mov_epi64(i1, k, i2);
check_equal64(&i3, &i2, &i1, k, "_mm512_mask_mov_epi64");
i4 = _mm512_maskz_mov_epi64(k, i1);
check_equal64(&i4, &i1, &zeroi, k, "_mm512_maskz_mov_epi64");
set_nonzero(&d1, -96);
set_nonzero(&d2, 21);
set_nonzero(&d3, 1400);
d3 = _mm512_mask_mov_pd(d1, k, d2);
check_equal64(&d3, &d2, &d1, k, "_mm512_mask_mov_pd");
d4 = _mm512_maskz_mov_pd(k, d1);
check_equal64(&d4, &d1, &zeroi, k, "_mm512_maskz_mov_pd");
}
int main() {
do_mask_mov_32();
do_mask_mov_64();
if (n_errs != 0) {
printf("FAILED\n");
return 1;
}
printf("PASSED\n");
return 0;
}