| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX512 |
| ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2 |
| ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE41 |
| ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 |
| |
| declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1) |
| declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1) |
| declare <4 x i16> @llvm.abs.v4i16(<4 x i16>, i1) |
| declare <4 x i8> @llvm.abs.v4i8(<4 x i8>, i1) |
| |
| define <4 x i1> @illegal_abs_unchanged(<4 x i8> %x) { |
| ; AVX512-LABEL: illegal_abs_unchanged: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpabsb %xmm0, %xmm0 |
| ; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX512-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero |
| ; AVX512-NEXT: retq |
| ; |
| ; AVX2-LABEL: illegal_abs_unchanged: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpabsb %xmm0, %xmm0 |
| ; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero |
| ; AVX2-NEXT: retq |
| ; |
| ; SSE41-LABEL: illegal_abs_unchanged: |
| ; SSE41: # %bb.0: |
| ; SSE41-NEXT: pabsb %xmm0, %xmm0 |
| ; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero |
| ; SSE41-NEXT: retq |
| ; |
| ; SSE2-LABEL: illegal_abs_unchanged: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: pxor %xmm1, %xmm1 |
| ; SSE2-NEXT: psubb %xmm0, %xmm1 |
| ; SSE2-NEXT: pminub %xmm1, %xmm0 |
| ; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] |
| ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] |
| ; SSE2-NEXT: retq |
| %abs = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %x, i1 true) |
| %cmp = icmp eq <4 x i8> %abs, <i8 129, i8 129, i8 129, i8 129> |
| ret <4 x i1> %cmp |
| } |
| |
| define <4 x i1> @illegal_abs_unchanged2(<4 x i8> %x) { |
| ; AVX512-LABEL: illegal_abs_unchanged2: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpabsb %xmm0, %xmm0 |
| ; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 |
| ; AVX512-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero |
| ; AVX512-NEXT: retq |
| ; |
| ; AVX2-LABEL: illegal_abs_unchanged2: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpabsb %xmm0, %xmm0 |
| ; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 |
| ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 |
| ; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero |
| ; AVX2-NEXT: retq |
| ; |
| ; SSE41-LABEL: illegal_abs_unchanged2: |
| ; SSE41: # %bb.0: |
| ; SSE41-NEXT: pabsb %xmm0, %xmm0 |
| ; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 |
| ; SSE41-NEXT: pxor %xmm0, %xmm1 |
| ; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero |
| ; SSE41-NEXT: retq |
| ; |
| ; SSE2-LABEL: illegal_abs_unchanged2: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: pxor %xmm1, %xmm1 |
| ; SSE2-NEXT: psubb %xmm0, %xmm1 |
| ; SSE2-NEXT: pminub %xmm1, %xmm0 |
| ; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 |
| ; SSE2-NEXT: pxor %xmm1, %xmm0 |
| ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] |
| ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] |
| ; SSE2-NEXT: retq |
| %abs = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %x, i1 true) |
| %cmp = icmp ne <4 x i8> %abs, <i8 129, i8 129, i8 129, i8 129> |
| ret <4 x i1> %cmp |
| } |
| |
| define <4 x i1> @illegal_abs_to_eq_or(<4 x i64> %x) { |
| ; AVX512-LABEL: illegal_abs_to_eq_or: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpabsq %ymm0, %ymm0 |
| ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 |
| ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 |
| ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} |
| ; AVX512-NEXT: vzeroupper |
| ; AVX512-NEXT: retq |
| ; |
| ; AVX2-LABEL: illegal_abs_to_eq_or: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 |
| ; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm1 |
| ; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] |
| ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 |
| ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 |
| ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; SSE41-LABEL: illegal_abs_to_eq_or: |
| ; SSE41: # %bb.0: |
| ; SSE41-NEXT: movdqa %xmm0, %xmm2 |
| ; SSE41-NEXT: pxor %xmm3, %xmm3 |
| ; SSE41-NEXT: pxor %xmm4, %xmm4 |
| ; SSE41-NEXT: psubq %xmm0, %xmm4 |
| ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 |
| ; SSE41-NEXT: psubq %xmm1, %xmm3 |
| ; SSE41-NEXT: movdqa %xmm1, %xmm0 |
| ; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1 |
| ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [129,129] |
| ; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 |
| ; SSE41-NEXT: pcmpeqq %xmm0, %xmm2 |
| ; SSE41-NEXT: packssdw %xmm1, %xmm2 |
| ; SSE41-NEXT: movdqa %xmm2, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; SSE2-LABEL: illegal_abs_to_eq_or: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] |
| ; SSE2-NEXT: psrad $31, %xmm2 |
| ; SSE2-NEXT: pxor %xmm2, %xmm0 |
| ; SSE2-NEXT: psubq %xmm2, %xmm0 |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] |
| ; SSE2-NEXT: psrad $31, %xmm2 |
| ; SSE2-NEXT: pxor %xmm2, %xmm1 |
| ; SSE2-NEXT: psubq %xmm2, %xmm1 |
| ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129] |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm1 |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2] |
| ; SSE2-NEXT: pand %xmm1, %xmm3 |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] |
| ; SSE2-NEXT: pand %xmm1, %xmm0 |
| ; SSE2-NEXT: packssdw %xmm3, %xmm0 |
| ; SSE2-NEXT: retq |
| %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %x, i1 true) |
| %cmp = icmp eq <4 x i64> %abs, <i64 129, i64 129, i64 129, i64 129> |
| ret <4 x i1> %cmp |
| } |
| |
| define <4 x i64> @illegal_abs_to_eq_or_sext(<4 x i64> %x) { |
| ; AVX512-LABEL: illegal_abs_to_eq_or_sext: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpabsq %ymm0, %ymm0 |
| ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] |
| ; AVX512-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 |
| ; AVX512-NEXT: retq |
| ; |
| ; AVX2-LABEL: illegal_abs_to_eq_or_sext: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 |
| ; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm1 |
| ; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] |
| ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; SSE41-LABEL: illegal_abs_to_eq_or_sext: |
| ; SSE41: # %bb.0: |
| ; SSE41-NEXT: movdqa %xmm0, %xmm2 |
| ; SSE41-NEXT: pxor %xmm3, %xmm3 |
| ; SSE41-NEXT: pxor %xmm4, %xmm4 |
| ; SSE41-NEXT: psubq %xmm1, %xmm4 |
| ; SSE41-NEXT: movdqa %xmm1, %xmm0 |
| ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1 |
| ; SSE41-NEXT: psubq %xmm2, %xmm3 |
| ; SSE41-NEXT: movdqa %xmm2, %xmm0 |
| ; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2 |
| ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [129,129] |
| ; SSE41-NEXT: pcmpeqq %xmm0, %xmm2 |
| ; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 |
| ; SSE41-NEXT: movdqa %xmm2, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; SSE2-LABEL: illegal_abs_to_eq_or_sext: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] |
| ; SSE2-NEXT: psrad $31, %xmm2 |
| ; SSE2-NEXT: pxor %xmm2, %xmm1 |
| ; SSE2-NEXT: psubq %xmm2, %xmm1 |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] |
| ; SSE2-NEXT: psrad $31, %xmm2 |
| ; SSE2-NEXT: pxor %xmm2, %xmm0 |
| ; SSE2-NEXT: psubq %xmm2, %xmm0 |
| ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129] |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,0,3,2] |
| ; SSE2-NEXT: pand %xmm3, %xmm0 |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm1 |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2] |
| ; SSE2-NEXT: pand %xmm2, %xmm1 |
| ; SSE2-NEXT: retq |
| %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %x, i1 true) |
| %cmp = icmp eq <4 x i64> %abs, <i64 129, i64 129, i64 129, i64 129> |
| %r = sext <4 x i1> %cmp to <4 x i64> |
| ret <4 x i64> %r |
| } |
| |
| define <4 x i1> @illegal_abs_to_ne_and(<4 x i64> %x) { |
| ; AVX512-LABEL: illegal_abs_to_ne_and: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpabsq %ymm0, %ymm0 |
| ; AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 |
| ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 |
| ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} |
| ; AVX512-NEXT: vzeroupper |
| ; AVX512-NEXT: retq |
| ; |
| ; AVX2-LABEL: illegal_abs_to_ne_and: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 |
| ; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm1 |
| ; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] |
| ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 |
| ; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 |
| ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 |
| ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; SSE41-LABEL: illegal_abs_to_ne_and: |
| ; SSE41: # %bb.0: |
| ; SSE41-NEXT: movdqa %xmm0, %xmm2 |
| ; SSE41-NEXT: pxor %xmm3, %xmm3 |
| ; SSE41-NEXT: pxor %xmm4, %xmm4 |
| ; SSE41-NEXT: psubq %xmm0, %xmm4 |
| ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm2 |
| ; SSE41-NEXT: psubq %xmm1, %xmm3 |
| ; SSE41-NEXT: movdqa %xmm1, %xmm0 |
| ; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm1 |
| ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [129,129] |
| ; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 |
| ; SSE41-NEXT: pcmpeqd %xmm3, %xmm3 |
| ; SSE41-NEXT: pxor %xmm3, %xmm1 |
| ; SSE41-NEXT: pcmpeqq %xmm0, %xmm2 |
| ; SSE41-NEXT: pxor %xmm3, %xmm2 |
| ; SSE41-NEXT: packssdw %xmm1, %xmm2 |
| ; SSE41-NEXT: movdqa %xmm2, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; SSE2-LABEL: illegal_abs_to_ne_and: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] |
| ; SSE2-NEXT: psrad $31, %xmm2 |
| ; SSE2-NEXT: pxor %xmm2, %xmm0 |
| ; SSE2-NEXT: psubq %xmm2, %xmm0 |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] |
| ; SSE2-NEXT: psrad $31, %xmm2 |
| ; SSE2-NEXT: pxor %xmm2, %xmm1 |
| ; SSE2-NEXT: psubq %xmm2, %xmm1 |
| ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129] |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm1 |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2] |
| ; SSE2-NEXT: pand %xmm1, %xmm3 |
| ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 |
| ; SSE2-NEXT: pxor %xmm1, %xmm3 |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2] |
| ; SSE2-NEXT: pand %xmm2, %xmm0 |
| ; SSE2-NEXT: pxor %xmm1, %xmm0 |
| ; SSE2-NEXT: packssdw %xmm3, %xmm0 |
| ; SSE2-NEXT: retq |
| %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %x, i1 true) |
| %cmp = icmp ne <4 x i64> %abs, <i64 129, i64 129, i64 129, i64 129> |
| ret <4 x i1> %cmp |
| } |
| |
| define <4 x i64> @illegal_abs_to_ne_and_sext(<4 x i64> %x) { |
| ; AVX512-LABEL: illegal_abs_to_ne_and_sext: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpabsq %ymm0, %ymm0 |
| ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] |
| ; AVX512-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 |
| ; AVX512-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 |
| ; AVX512-NEXT: retq |
| ; |
| ; AVX2-LABEL: illegal_abs_to_ne_and_sext: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 |
| ; AVX2-NEXT: vpsubq %ymm0, %ymm1, %ymm1 |
| ; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] |
| ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 |
| ; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; SSE41-LABEL: illegal_abs_to_ne_and_sext: |
| ; SSE41: # %bb.0: |
| ; SSE41-NEXT: movdqa %xmm0, %xmm2 |
| ; SSE41-NEXT: pxor %xmm3, %xmm3 |
| ; SSE41-NEXT: pxor %xmm4, %xmm4 |
| ; SSE41-NEXT: psubq %xmm1, %xmm4 |
| ; SSE41-NEXT: movdqa %xmm1, %xmm0 |
| ; SSE41-NEXT: blendvpd %xmm0, %xmm4, %xmm1 |
| ; SSE41-NEXT: psubq %xmm2, %xmm3 |
| ; SSE41-NEXT: movdqa %xmm2, %xmm0 |
| ; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm2 |
| ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [129,129] |
| ; SSE41-NEXT: pcmpeqq %xmm0, %xmm2 |
| ; SSE41-NEXT: pcmpeqd %xmm3, %xmm3 |
| ; SSE41-NEXT: pxor %xmm3, %xmm2 |
| ; SSE41-NEXT: pcmpeqq %xmm0, %xmm1 |
| ; SSE41-NEXT: pxor %xmm3, %xmm1 |
| ; SSE41-NEXT: movdqa %xmm2, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; SSE2-LABEL: illegal_abs_to_ne_and_sext: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3] |
| ; SSE2-NEXT: psrad $31, %xmm2 |
| ; SSE2-NEXT: pxor %xmm2, %xmm1 |
| ; SSE2-NEXT: psubq %xmm2, %xmm1 |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] |
| ; SSE2-NEXT: psrad $31, %xmm2 |
| ; SSE2-NEXT: pxor %xmm2, %xmm0 |
| ; SSE2-NEXT: psubq %xmm2, %xmm0 |
| ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129] |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,0,3,2] |
| ; SSE2-NEXT: pand %xmm3, %xmm0 |
| ; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 |
| ; SSE2-NEXT: pxor %xmm3, %xmm0 |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm1 |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2] |
| ; SSE2-NEXT: pand %xmm2, %xmm1 |
| ; SSE2-NEXT: pxor %xmm3, %xmm1 |
| ; SSE2-NEXT: retq |
| %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %x, i1 true) |
| %cmp = icmp ne <4 x i64> %abs, <i64 129, i64 129, i64 129, i64 129> |
| %r = sext <4 x i1> %cmp to <4 x i64> |
| ret <4 x i64> %r |
| } |
| |
| define <4 x i1> @legal_abs_eq_unchanged(<4 x i32> %x) { |
| ; AVX512-LABEL: legal_abs_eq_unchanged: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpabsd %xmm0, %xmm0 |
| ; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129] |
| ; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 |
| ; AVX512-NEXT: retq |
| ; |
| ; AVX2-LABEL: legal_abs_eq_unchanged: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpabsd %xmm0, %xmm0 |
| ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129] |
| ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; SSE41-LABEL: legal_abs_eq_unchanged: |
| ; SSE41: # %bb.0: |
| ; SSE41-NEXT: pabsd %xmm0, %xmm0 |
| ; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; SSE2-LABEL: legal_abs_eq_unchanged: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: movdqa %xmm0, %xmm1 |
| ; SSE2-NEXT: psrad $31, %xmm1 |
| ; SSE2-NEXT: pxor %xmm1, %xmm0 |
| ; SSE2-NEXT: psubd %xmm1, %xmm0 |
| ; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE2-NEXT: retq |
| %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true) |
| %cmp = icmp eq <4 x i32> %abs, <i32 129, i32 129, i32 129, i32 129> |
| ret <4 x i1> %cmp |
| } |
| |
| define <4 x i32> @legal_abs_eq_unchanged_sext(<4 x i32> %x) { |
| ; AVX512-LABEL: legal_abs_eq_unchanged_sext: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpabsd %xmm0, %xmm0 |
| ; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129] |
| ; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 |
| ; AVX512-NEXT: retq |
| ; |
| ; AVX2-LABEL: legal_abs_eq_unchanged_sext: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpabsd %xmm0, %xmm0 |
| ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129] |
| ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; SSE41-LABEL: legal_abs_eq_unchanged_sext: |
| ; SSE41: # %bb.0: |
| ; SSE41-NEXT: pabsd %xmm0, %xmm0 |
| ; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; SSE2-LABEL: legal_abs_eq_unchanged_sext: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: movdqa %xmm0, %xmm1 |
| ; SSE2-NEXT: psrad $31, %xmm1 |
| ; SSE2-NEXT: pxor %xmm1, %xmm0 |
| ; SSE2-NEXT: psubd %xmm1, %xmm0 |
| ; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE2-NEXT: retq |
| %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true) |
| %cmp = icmp eq <4 x i32> %abs, <i32 129, i32 129, i32 129, i32 129> |
| %r = sext <4 x i1> %cmp to <4 x i32> |
| ret <4 x i32> %r |
| } |
| |
| define <4 x i1> @legal_abs_ne_unchangedd(<4 x i32> %x) { |
| ; AVX512-LABEL: legal_abs_ne_unchangedd: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpabsd %xmm0, %xmm0 |
| ; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129] |
| ; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 |
| ; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 |
| ; AVX512-NEXT: retq |
| ; |
| ; AVX2-LABEL: legal_abs_ne_unchangedd: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpabsd %xmm0, %xmm0 |
| ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129] |
| ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 |
| ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 |
| ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; SSE41-LABEL: legal_abs_ne_unchangedd: |
| ; SSE41: # %bb.0: |
| ; SSE41-NEXT: pabsd %xmm0, %xmm1 |
| ; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 |
| ; SSE41-NEXT: pxor %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; SSE2-LABEL: legal_abs_ne_unchangedd: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: movdqa %xmm0, %xmm1 |
| ; SSE2-NEXT: psrad $31, %xmm1 |
| ; SSE2-NEXT: pxor %xmm1, %xmm0 |
| ; SSE2-NEXT: psubd %xmm1, %xmm0 |
| ; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 |
| ; SSE2-NEXT: pxor %xmm1, %xmm0 |
| ; SSE2-NEXT: retq |
| %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true) |
| %cmp = icmp ne <4 x i32> %abs, <i32 129, i32 129, i32 129, i32 129> |
| ret <4 x i1> %cmp |
| } |
| |
| define <4 x i32> @legal_abs_ne_unchangedd_sext(<4 x i32> %x) { |
| ; AVX512-LABEL: legal_abs_ne_unchangedd_sext: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpabsd %xmm0, %xmm0 |
| ; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129] |
| ; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 |
| ; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 |
| ; AVX512-NEXT: retq |
| ; |
| ; AVX2-LABEL: legal_abs_ne_unchangedd_sext: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpabsd %xmm0, %xmm0 |
| ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [129,129,129,129] |
| ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 |
| ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 |
| ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; SSE41-LABEL: legal_abs_ne_unchangedd_sext: |
| ; SSE41: # %bb.0: |
| ; SSE41-NEXT: pabsd %xmm0, %xmm1 |
| ; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 |
| ; SSE41-NEXT: pxor %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; SSE2-LABEL: legal_abs_ne_unchangedd_sext: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: movdqa %xmm0, %xmm1 |
| ; SSE2-NEXT: psrad $31, %xmm1 |
| ; SSE2-NEXT: pxor %xmm1, %xmm0 |
| ; SSE2-NEXT: psubd %xmm1, %xmm0 |
| ; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 |
| ; SSE2-NEXT: pxor %xmm1, %xmm0 |
| ; SSE2-NEXT: retq |
| %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %x, i1 true) |
| %cmp = icmp ne <4 x i32> %abs, <i32 129, i32 129, i32 129, i32 129> |
| %r = sext <4 x i1> %cmp to <4 x i32> |
| ret <4 x i32> %r |
| } |
| |
| define <4 x i1> @eq_or_to_abs_vec4x64(<4 x i64> %x) { |
| ; AVX512-LABEL: eq_or_to_abs_vec4x64: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpabsq %ymm0, %ymm0 |
| ; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 |
| ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 |
| ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} |
| ; AVX512-NEXT: vzeroupper |
| ; AVX512-NEXT: retq |
| ; |
| ; AVX2-LABEL: eq_or_to_abs_vec4x64: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] |
| ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm1 |
| ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487] |
| ; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 |
| ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 |
| ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; SSE41-LABEL: eq_or_to_abs_vec4x64: |
| ; SSE41: # %bb.0: |
| ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [129,129] |
| ; SSE41-NEXT: movdqa %xmm1, %xmm3 |
| ; SSE41-NEXT: pcmpeqq %xmm2, %xmm3 |
| ; SSE41-NEXT: pcmpeqq %xmm0, %xmm2 |
| ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] |
| ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551487,18446744073709551487] |
| ; SSE41-NEXT: pcmpeqq %xmm3, %xmm1 |
| ; SSE41-NEXT: pcmpeqq %xmm3, %xmm0 |
| ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] |
| ; SSE41-NEXT: orps %xmm2, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; SSE2-LABEL: eq_or_to_abs_vec4x64: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129] |
| ; SSE2-NEXT: movdqa %xmm1, %xmm3 |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm3 |
| ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2 |
| ; SSE2-NEXT: movdqa %xmm2, %xmm4 |
| ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3] |
| ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] |
| ; SSE2-NEXT: andps %xmm4, %xmm2 |
| ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551487,18446744073709551487] |
| ; SSE2-NEXT: pcmpeqd %xmm3, %xmm1 |
| ; SSE2-NEXT: pcmpeqd %xmm3, %xmm0 |
| ; SSE2-NEXT: movdqa %xmm0, %xmm3 |
| ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,3],xmm1[1,3] |
| ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] |
| ; SSE2-NEXT: andps %xmm3, %xmm0 |
| ; SSE2-NEXT: orps %xmm2, %xmm0 |
| ; SSE2-NEXT: retq |
| %cmp1 = icmp eq <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129> |
| %cmp2 = icmp eq <4 x i64> %x, <i64 -129, i64 -129, i64 -129, i64 -129> |
| %cmp = or <4 x i1> %cmp1, %cmp2 |
| ret <4 x i1> %cmp |
| } |
| |
| define <4 x i64> @eq_or_to_abs_vec4x64_sext(<4 x i64> %x) { |
| ; AVX512-LABEL: eq_or_to_abs_vec4x64_sext: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] |
| ; AVX512-NEXT: vpabsq %ymm0, %ymm0 |
| ; AVX512-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 |
| ; AVX512-NEXT: retq |
| ; |
| ; AVX2-LABEL: eq_or_to_abs_vec4x64_sext: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] |
| ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm1 |
| ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487] |
| ; AVX2-NEXT: vpcmpeqq %ymm2, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; SSE41-LABEL: eq_or_to_abs_vec4x64_sext: |
| ; SSE41: # %bb.0: |
| ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [129,129] |
| ; SSE41-NEXT: movdqa %xmm1, %xmm3 |
| ; SSE41-NEXT: pcmpeqq %xmm2, %xmm3 |
| ; SSE41-NEXT: pcmpeqq %xmm0, %xmm2 |
| ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] |
| ; SSE41-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551487,18446744073709551487] |
| ; SSE41-NEXT: pcmpeqq %xmm3, %xmm1 |
| ; SSE41-NEXT: pcmpeqq %xmm3, %xmm0 |
| ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] |
| ; SSE41-NEXT: orps %xmm2, %xmm0 |
| ; SSE41-NEXT: pmovsxdq %xmm0, %xmm2 |
| ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3] |
| ; SSE41-NEXT: psllq $63, %xmm0 |
| ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] |
| ; SSE41-NEXT: psrad $31, %xmm1 |
| ; SSE41-NEXT: movdqa %xmm2, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; SSE2-LABEL: eq_or_to_abs_vec4x64_sext: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129] |
| ; SSE2-NEXT: movdqa %xmm1, %xmm3 |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm3 |
| ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2 |
| ; SSE2-NEXT: movdqa %xmm2, %xmm4 |
| ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3] |
| ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] |
| ; SSE2-NEXT: andps %xmm4, %xmm2 |
| ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551487,18446744073709551487] |
| ; SSE2-NEXT: pcmpeqd %xmm3, %xmm1 |
| ; SSE2-NEXT: pcmpeqd %xmm3, %xmm0 |
| ; SSE2-NEXT: movdqa %xmm0, %xmm3 |
| ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,3],xmm1[1,3] |
| ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] |
| ; SSE2-NEXT: andps %xmm3, %xmm0 |
| ; SSE2-NEXT: orps %xmm2, %xmm0 |
| ; SSE2-NEXT: pxor %xmm1, %xmm1 |
| ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,1,3,3] |
| ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] |
| ; SSE2-NEXT: psllq $63, %xmm2 |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] |
| ; SSE2-NEXT: psrad $31, %xmm1 |
| ; SSE2-NEXT: retq |
| %cmp1 = icmp eq <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129> |
| %cmp2 = icmp eq <4 x i64> %x, <i64 -129, i64 -129, i64 -129, i64 -129> |
| %cmp = or <4 x i1> %cmp1, %cmp2 |
| %r = sext <4 x i1> %cmp to <4 x i64> |
| ret <4 x i64> %r |
| } |
| |
| define <4 x i1> @ne_and_to_abs_vec4x64(<4 x i64> %x) { |
| ; AVX512-LABEL: ne_and_to_abs_vec4x64: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpabsq %ymm0, %ymm0 |
| ; AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 |
| ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 |
| ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} |
| ; AVX512-NEXT: vzeroupper |
| ; AVX512-NEXT: retq |
| ; |
| ; AVX2-LABEL: ne_and_to_abs_vec4x64: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] |
| ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm1 |
| ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 |
| ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487] |
| ; AVX2-NEXT: vpcmpeqq %ymm3, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0 |
| ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 |
| ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; SSE41-LABEL: ne_and_to_abs_vec4x64: |
| ; SSE41: # %bb.0: |
| ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [129,129] |
| ; SSE41-NEXT: movdqa %xmm1, %xmm3 |
| ; SSE41-NEXT: pcmpeqq %xmm2, %xmm3 |
| ; SSE41-NEXT: pcmpeqq %xmm0, %xmm2 |
| ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] |
| ; SSE41-NEXT: pcmpeqd %xmm3, %xmm3 |
| ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487] |
| ; SSE41-NEXT: pcmpeqq %xmm4, %xmm1 |
| ; SSE41-NEXT: pcmpeqq %xmm4, %xmm0 |
| ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] |
| ; SSE41-NEXT: xorps %xmm3, %xmm0 |
| ; SSE41-NEXT: andnps %xmm0, %xmm2 |
| ; SSE41-NEXT: movaps %xmm2, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; SSE2-LABEL: ne_and_to_abs_vec4x64: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129] |
| ; SSE2-NEXT: movdqa %xmm1, %xmm3 |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm3 |
| ; SSE2-NEXT: pcmpeqd %xmm0, %xmm2 |
| ; SSE2-NEXT: movdqa %xmm2, %xmm4 |
| ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3] |
| ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] |
| ; SSE2-NEXT: andps %xmm4, %xmm2 |
| ; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 |
| ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487] |
| ; SSE2-NEXT: pcmpeqd %xmm4, %xmm1 |
| ; SSE2-NEXT: pcmpeqd %xmm4, %xmm0 |
| ; SSE2-NEXT: movdqa %xmm0, %xmm4 |
| ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm1[1,3] |
| ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] |
| ; SSE2-NEXT: andps %xmm4, %xmm0 |
| ; SSE2-NEXT: xorps %xmm3, %xmm0 |
| ; SSE2-NEXT: andnps %xmm0, %xmm2 |
| ; SSE2-NEXT: movaps %xmm2, %xmm0 |
| ; SSE2-NEXT: retq |
| %cmp1 = icmp ne <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129> |
| %cmp2 = icmp ne <4 x i64> %x, <i64 -129, i64 -129, i64 -129, i64 -129> |
| %cmp = and <4 x i1> %cmp1, %cmp2 |
| ret <4 x i1> %cmp |
| } |
| |
| define <4 x i64> @ne_and_to_abs_vec4x64_sext(<4 x i64> %x) { |
| ; AVX512-LABEL: ne_and_to_abs_vec4x64_sext: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] |
| ; AVX512-NEXT: vpabsq %ymm0, %ymm0 |
| ; AVX512-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 |
| ; AVX512-NEXT: vpternlogq $15, %ymm0, %ymm0, %ymm0 |
| ; AVX512-NEXT: retq |
| ; |
| ; AVX2-LABEL: ne_and_to_abs_vec4x64_sext: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [129,129,129,129] |
| ; AVX2-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm1 |
| ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 |
| ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487] |
| ; AVX2-NEXT: vpcmpeqq %ymm3, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; SSE41-LABEL: ne_and_to_abs_vec4x64_sext: |
| ; SSE41: # %bb.0: |
| ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [129,129] |
| ; SSE41-NEXT: movdqa %xmm1, %xmm3 |
| ; SSE41-NEXT: pcmpeqq %xmm2, %xmm3 |
| ; SSE41-NEXT: pcmpeqq %xmm0, %xmm2 |
| ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] |
| ; SSE41-NEXT: pcmpeqd %xmm3, %xmm3 |
| ; SSE41-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487] |
| ; SSE41-NEXT: pcmpeqq %xmm4, %xmm1 |
| ; SSE41-NEXT: pcmpeqq %xmm4, %xmm0 |
| ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] |
| ; SSE41-NEXT: xorps %xmm3, %xmm0 |
| ; SSE41-NEXT: andnps %xmm0, %xmm2 |
| ; SSE41-NEXT: pmovsxdq %xmm2, %xmm0 |
| ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,2,3,3] |
| ; SSE41-NEXT: psllq $63, %xmm1 |
| ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] |
| ; SSE41-NEXT: psrad $31, %xmm1 |
| ; SSE41-NEXT: retq |
| ; |
| ; SSE2-LABEL: ne_and_to_abs_vec4x64_sext: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: movdqa %xmm0, %xmm2 |
| ; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [129,129] |
| ; SSE2-NEXT: movdqa %xmm1, %xmm3 |
| ; SSE2-NEXT: pcmpeqd %xmm0, %xmm3 |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 |
| ; SSE2-NEXT: movdqa %xmm0, %xmm4 |
| ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3] |
| ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2] |
| ; SSE2-NEXT: andps %xmm4, %xmm0 |
| ; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 |
| ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487] |
| ; SSE2-NEXT: pcmpeqd %xmm4, %xmm1 |
| ; SSE2-NEXT: pcmpeqd %xmm4, %xmm2 |
| ; SSE2-NEXT: movdqa %xmm2, %xmm4 |
| ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm1[1,3] |
| ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2] |
| ; SSE2-NEXT: andps %xmm4, %xmm2 |
| ; SSE2-NEXT: xorps %xmm3, %xmm2 |
| ; SSE2-NEXT: andnps %xmm2, %xmm0 |
| ; SSE2-NEXT: pxor %xmm1, %xmm1 |
| ; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,1,3,3] |
| ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] |
| ; SSE2-NEXT: psllq $63, %xmm2 |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] |
| ; SSE2-NEXT: psrad $31, %xmm1 |
| ; SSE2-NEXT: retq |
| %cmp1 = icmp ne <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129> |
| %cmp2 = icmp ne <4 x i64> %x, <i64 -129, i64 -129, i64 -129, i64 -129> |
| %cmp = and <4 x i1> %cmp1, %cmp2 |
| %r = sext <4 x i1> %cmp to <4 x i64> |
| ret <4 x i64> %r |
| } |
| |
| define <4 x i1> @eq_or_to_abs_vec4x32(<4 x i32> %x) { |
| ; AVX512-LABEL: eq_or_to_abs_vec4x32: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] |
| ; AVX512-NEXT: vpabsd %xmm0, %xmm0 |
| ; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 |
| ; AVX512-NEXT: retq |
| ; |
| ; AVX2-LABEL: eq_or_to_abs_vec4x32: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] |
| ; AVX2-NEXT: vpabsd %xmm0, %xmm0 |
| ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; SSE41-LABEL: eq_or_to_abs_vec4x32: |
| ; SSE41: # %bb.0: |
| ; SSE41-NEXT: pabsd %xmm0, %xmm0 |
| ; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; SSE2-LABEL: eq_or_to_abs_vec4x32: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1] |
| ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 |
| ; SSE2-NEXT: por %xmm1, %xmm0 |
| ; SSE2-NEXT: retq |
| %cmp1 = icmp eq <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> |
| %cmp2 = icmp eq <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> |
| %cmp = or <4 x i1> %cmp1, %cmp2 |
| ret <4 x i1> %cmp |
| } |
| |
| define <4 x i32> @eq_or_to_abs_vec4x32_sext(<4 x i32> %x) { |
| ; AVX512-LABEL: eq_or_to_abs_vec4x32_sext: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] |
| ; AVX512-NEXT: vpabsd %xmm0, %xmm0 |
| ; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 |
| ; AVX512-NEXT: retq |
| ; |
| ; AVX2-LABEL: eq_or_to_abs_vec4x32_sext: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] |
| ; AVX2-NEXT: vpabsd %xmm0, %xmm0 |
| ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; SSE41-LABEL: eq_or_to_abs_vec4x32_sext: |
| ; SSE41: # %bb.0: |
| ; SSE41-NEXT: pabsd %xmm0, %xmm0 |
| ; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; SSE2-LABEL: eq_or_to_abs_vec4x32_sext: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1] |
| ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 |
| ; SSE2-NEXT: por %xmm1, %xmm0 |
| ; SSE2-NEXT: retq |
| %cmp1 = icmp eq <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> |
| %cmp2 = icmp eq <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> |
| %cmp = or <4 x i1> %cmp1, %cmp2 |
| %r = sext <4 x i1> %cmp to <4 x i32> |
| ret <4 x i32> %r |
| } |
| |
| define <4 x i1> @ne_and_to_abs_vec4x32(<4 x i32> %x) { |
| ; AVX512-LABEL: ne_and_to_abs_vec4x32: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] |
| ; AVX512-NEXT: vpabsd %xmm0, %xmm0 |
| ; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 |
| ; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 |
| ; AVX512-NEXT: retq |
| ; |
| ; AVX2-LABEL: ne_and_to_abs_vec4x32: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] |
| ; AVX2-NEXT: vpabsd %xmm0, %xmm0 |
| ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 |
| ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 |
| ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; SSE41-LABEL: ne_and_to_abs_vec4x32: |
| ; SSE41: # %bb.0: |
| ; SSE41-NEXT: pabsd %xmm0, %xmm1 |
| ; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 |
| ; SSE41-NEXT: pxor %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; SSE2-LABEL: ne_and_to_abs_vec4x32: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1] |
| ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 |
| ; SSE2-NEXT: pxor %xmm2, %xmm0 |
| ; SSE2-NEXT: pandn %xmm0, %xmm1 |
| ; SSE2-NEXT: movdqa %xmm1, %xmm0 |
| ; SSE2-NEXT: retq |
| %cmp1 = icmp ne <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> |
| %cmp2 = icmp ne <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> |
| %cmp = and <4 x i1> %cmp1, %cmp2 |
| ret <4 x i1> %cmp |
| } |
| |
| define <4 x i32> @ne_and_to_abs_vec4x32_sext(<4 x i32> %x) { |
| ; AVX512-LABEL: ne_and_to_abs_vec4x32_sext: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] |
| ; AVX512-NEXT: vpabsd %xmm0, %xmm0 |
| ; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 |
| ; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 |
| ; AVX512-NEXT: retq |
| ; |
| ; AVX2-LABEL: ne_and_to_abs_vec4x32_sext: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] |
| ; AVX2-NEXT: vpabsd %xmm0, %xmm0 |
| ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 |
| ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 |
| ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; SSE41-LABEL: ne_and_to_abs_vec4x32_sext: |
| ; SSE41: # %bb.0: |
| ; SSE41-NEXT: pabsd %xmm0, %xmm1 |
| ; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| ; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 |
| ; SSE41-NEXT: pxor %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; SSE2-LABEL: ne_and_to_abs_vec4x32_sext: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,1,1,1] |
| ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 |
| ; SSE2-NEXT: pxor %xmm2, %xmm0 |
| ; SSE2-NEXT: pandn %xmm0, %xmm1 |
| ; SSE2-NEXT: movdqa %xmm1, %xmm0 |
| ; SSE2-NEXT: retq |
| %cmp1 = icmp ne <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1> |
| %cmp2 = icmp ne <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> |
| %cmp = and <4 x i1> %cmp1, %cmp2 |
| %r = sext <4 x i1> %cmp to <4 x i32> |
| ret <4 x i32> %r |
| } |
| |
| define <4 x i1> @eq_or_to_abs_vec4x16(<4 x i16> %x) { |
| ; AVX512-LABEL: eq_or_to_abs_vec4x16: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 |
| ; AVX512-NEXT: vpmovsxwd %xmm1, %ymm1 |
| ; AVX512-NEXT: vptestmd %ymm1, %ymm1, %k0 |
| ; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 |
| ; AVX512-NEXT: vptestmd %ymm0, %ymm0, %k1 |
| ; AVX512-NEXT: korw %k1, %k0, %k1 |
| ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 |
| ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} |
| ; AVX512-NEXT: vzeroupper |
| ; AVX512-NEXT: retq |
| ; |
| ; AVX2-LABEL: eq_or_to_abs_vec4x16: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 |
| ; AVX2-NEXT: vpmovsxwd %xmm1, %xmm1 |
| ; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX2-NEXT: vpmovsxwd %xmm0, %xmm0 |
| ; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; SSE41-LABEL: eq_or_to_abs_vec4x16: |
| ; SSE41: # %bb.0: |
| ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u> |
| ; SSE41-NEXT: pcmpeqw %xmm0, %xmm1 |
| ; SSE41-NEXT: pmovsxwd %xmm1, %xmm1 |
| ; SSE41-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 |
| ; SSE41-NEXT: por %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; SSE2-LABEL: eq_or_to_abs_vec4x16: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u> |
| ; SSE2-NEXT: pcmpeqw %xmm0, %xmm1 |
| ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] |
| ; SSE2-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] |
| ; SSE2-NEXT: por %xmm1, %xmm0 |
| ; SSE2-NEXT: psrad $16, %xmm0 |
| ; SSE2-NEXT: retq |
| %cmp1 = icmp eq <4 x i16> %x, <i16 88, i16 88, i16 88, i16 88> |
| %cmp2 = icmp eq <4 x i16> %x, <i16 -88, i16 -88, i16 -88, i16 -88> |
| %cmp = or <4 x i1> %cmp1, %cmp2 |
| ret <4 x i1> %cmp |
| } |
| |
| define <4 x i8> @eq_or_to_abs_vec4x8_sext(<4 x i8> %x) { |
| ; AVX512-LABEL: eq_or_to_abs_vec4x8_sext: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 |
| ; AVX512-NEXT: vpmovsxbd %xmm1, %zmm1 |
| ; AVX512-NEXT: vptestmd %zmm1, %zmm1, %k0 |
| ; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1 |
| ; AVX512-NEXT: korw %k1, %k0, %k1 |
| ; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} |
| ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 |
| ; AVX512-NEXT: vzeroupper |
| ; AVX512-NEXT: retq |
| ; |
| ; AVX2-LABEL: eq_or_to_abs_vec4x8_sext: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 |
| ; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; SSE41-LABEL: eq_or_to_abs_vec4x8_sext: |
| ; SSE41: # %bb.0: |
| ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u> |
| ; SSE41-NEXT: pcmpeqb %xmm0, %xmm1 |
| ; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE41-NEXT: por %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; SSE2-LABEL: eq_or_to_abs_vec4x8_sext: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u> |
| ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 |
| ; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE2-NEXT: por %xmm1, %xmm0 |
| ; SSE2-NEXT: retq |
| %cmp1 = icmp eq <4 x i8> %x, <i8 88, i8 88, i8 88, i8 88> |
| %cmp2 = icmp eq <4 x i8> %x, <i8 -88, i8 -88, i8 -88, i8 -88> |
| %cmp = or <4 x i1> %cmp1, %cmp2 |
| %r = sext <4 x i1> %cmp to <4 x i8> |
| ret <4 x i8> %r |
| } |
| |
| define <4 x i1> @ne_and_to_abs_vec4x8(<4 x i8> %x) { |
| ; AVX512-LABEL: ne_and_to_abs_vec4x8: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 |
| ; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm1 |
| ; AVX512-NEXT: vpmovsxbd %xmm1, %zmm1 |
| ; AVX512-NEXT: vptestmd %zmm1, %zmm1, %k0 |
| ; AVX512-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 |
| ; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 |
| ; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1 |
| ; AVX512-NEXT: kandw %k1, %k0, %k1 |
| ; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 |
| ; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} |
| ; AVX512-NEXT: vzeroupper |
| ; AVX512-NEXT: retq |
| ; |
| ; AVX2-LABEL: ne_and_to_abs_vec4x8: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 |
| ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 |
| ; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 |
| ; AVX2-NEXT: vpmovsxbd %xmm1, %xmm1 |
| ; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 |
| ; AVX2-NEXT: vpmovsxbd %xmm0, %xmm0 |
| ; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; SSE41-LABEL: ne_and_to_abs_vec4x8: |
| ; SSE41: # %bb.0: |
| ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u> |
| ; SSE41-NEXT: pcmpeqb %xmm0, %xmm1 |
| ; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 |
| ; SSE41-NEXT: pxor %xmm2, %xmm1 |
| ; SSE41-NEXT: pmovsxbd %xmm1, %xmm1 |
| ; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE41-NEXT: pxor %xmm2, %xmm0 |
| ; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 |
| ; SSE41-NEXT: pand %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; SSE2-LABEL: ne_and_to_abs_vec4x8: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u> |
| ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 |
| ; SSE2-NEXT: pxor %xmm2, %xmm1 |
| ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] |
| ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] |
| ; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE2-NEXT: pxor %xmm2, %xmm0 |
| ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] |
| ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] |
| ; SSE2-NEXT: pand %xmm1, %xmm0 |
| ; SSE2-NEXT: psrad $24, %xmm0 |
| ; SSE2-NEXT: retq |
| %cmp1 = icmp ne <4 x i8> %x, <i8 88, i8 88, i8 88, i8 88> |
| %cmp2 = icmp ne <4 x i8> %x, <i8 -88, i8 -88, i8 -88, i8 -88> |
| %cmp = and <4 x i1> %cmp1, %cmp2 |
| ret <4 x i1> %cmp |
| } |
| |
| define <4 x i16> @ne_and_to_abs_vec4x16_sext(<4 x i16> %x) { |
| ; AVX512-LABEL: ne_and_to_abs_vec4x16_sext: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 |
| ; AVX512-NEXT: vpternlogq $15, %xmm1, %xmm1, %xmm1 |
| ; AVX512-NEXT: vpmovsxwd %xmm1, %ymm1 |
| ; AVX512-NEXT: vptestmd %ymm1, %ymm1, %k0 |
| ; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 |
| ; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 |
| ; AVX512-NEXT: vptestmd %ymm0, %ymm0, %k1 |
| ; AVX512-NEXT: kandw %k1, %k0, %k1 |
| ; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 |
| ; AVX512-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} |
| ; AVX512-NEXT: vpmovdw %ymm0, %xmm0 |
| ; AVX512-NEXT: vzeroupper |
| ; AVX512-NEXT: retq |
| ; |
| ; AVX2-LABEL: ne_and_to_abs_vec4x16_sext: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 |
| ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 |
| ; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 |
| ; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0 |
| ; AVX2-NEXT: retq |
| ; |
| ; SSE41-LABEL: ne_and_to_abs_vec4x16_sext: |
| ; SSE41: # %bb.0: |
| ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u> |
| ; SSE41-NEXT: pcmpeqw %xmm0, %xmm1 |
| ; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 |
| ; SSE41-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE41-NEXT: pxor %xmm2, %xmm0 |
| ; SSE41-NEXT: pandn %xmm0, %xmm1 |
| ; SSE41-NEXT: movdqa %xmm1, %xmm0 |
| ; SSE41-NEXT: retq |
| ; |
| ; SSE2-LABEL: ne_and_to_abs_vec4x16_sext: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = <88,88,88,88,u,u,u,u> |
| ; SSE2-NEXT: pcmpeqw %xmm0, %xmm1 |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 |
| ; SSE2-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; SSE2-NEXT: pxor %xmm2, %xmm0 |
| ; SSE2-NEXT: pandn %xmm0, %xmm1 |
| ; SSE2-NEXT: movdqa %xmm1, %xmm0 |
| ; SSE2-NEXT: retq |
| %cmp1 = icmp ne <4 x i16> %x, <i16 88, i16 88, i16 88, i16 88> |
| %cmp2 = icmp ne <4 x i16> %x, <i16 -88, i16 -88, i16 -88, i16 -88> |
| %cmp = and <4 x i1> %cmp1, %cmp2 |
| %r = sext <4 x i1> %cmp to <4 x i16> |
| ret <4 x i16> %r |
| } |