| /* APPLE LOCAL file 5612787 mainline sse4 */ |
| /* { dg-do run { target i?86-*-* x86_64-*-* } } */ |
| /* { dg-require-effective-target sse4a } */ |
| /* { dg-options "-O2 -msse4a" } */ |
| #include <ammintrin.h> |
| #include <stdlib.h> |
| #include "../../gcc.dg/i386-cpuid.h" |
| |
| static void sse4a_test (void); |
| |
| typedef union |
| { |
| long long i[2]; |
| __m128i vec; |
| } LI; |
| |
| int |
| main () |
| { |
| unsigned long cpu_facilities; |
| |
| cpu_facilities = i386_extended_cpuid_ecx (); |
| |
| /* Run SSE4a test only if host has SSE4a support. */ |
| if ((cpu_facilities & bit_SSE4a)) |
| sse4a_test (); |
| |
| exit (0); |
| } |
| |
| static long long |
| sse4a_test_insert (long long in1, long long in2) |
| { |
| __m128i v1,v2; |
| long long index_length, pad; |
| LI v_out; |
| index_length = 0x0000000000000810LL; |
| pad = 0x0; |
| v1 = _mm_set_epi64x (pad, in1); |
| v2 = _mm_set_epi64x (index_length, in2); |
| v_out.vec = _mm_insert_si64 (v1, v2); |
| return (v_out.i[0]); |
| } |
| |
| static long long |
| sse4a_test_inserti (long long in1, long long in2) |
| { |
| __m128i v1,v2; |
| long long pad = 0x0; |
| LI v_out; |
| v1 = _mm_set_epi64x (pad, in1); |
| v2 = _mm_set_epi64x (pad, in2); |
| v_out.vec = _mm_inserti_si64 (v1, v2, (unsigned int) 0x10, (unsigned int) 0x08); |
| return (v_out.i[0]); |
| } |
| |
| static chk (long long i1, long long i2) |
| { |
| int n_fails =0; |
| if (i1 != i2) |
| n_fails +=1; |
| return n_fails; |
| } |
| |
| long long vals_in1[5] = |
| { |
| 0x1234567887654321LL, |
| 0x1456782093002490LL, |
| 0x2340909123990390LL, |
| 0x9595959599595999LL, |
| 0x9099038798000029LL |
| }; |
| |
| long long vals_in2[5] = |
| { |
| 0x9ABCDEF00FEDCBA9LL, |
| 0x234567097289672ALL, |
| 0x45476453097BD342LL, |
| 0x23569012AE586FF0LL, |
| 0x432567ABCDEF765DLL |
| }; |
| |
| long long vals_out[5] = |
| { |
| 0x1234567887CBA921LL, |
| 0x1456782093672A90LL, |
| 0x2340909123D34290LL, |
| 0x95959595996FF099LL, |
| 0x9099038798765D29LL |
| }; |
| |
| static void |
| sse4a_test (void) |
| { |
| int i; |
| int fail = 0; |
| long long out; |
| |
| for (i = 0; i < 5; i += 1) |
| { |
| out = sse4a_test_insert (vals_in1[i], vals_in2[i]); |
| fail += chk(out, vals_out[i]); |
| |
| out = sse4a_test_inserti (vals_in1[i], vals_in2[i]); |
| fail += chk(out, vals_out[i]); |
| } |
| |
| if (fail != 0) |
| abort (); |
| |
| exit (0); |
| } |