| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86 |
| ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64 |
| |
| define <2 x i64> @freeze_insert_vector_elt(<2 x i64> %a0) { |
| ; CHECK-LABEL: freeze_insert_vector_elt: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; CHECK-NEXT: ret{{[l|q]}} |
| %idx0 = insertelement <2 x i64> %a0, i64 0, i64 0 |
| %freeze0 = freeze <2 x i64> %idx0 |
| %idx1 = insertelement <2 x i64> %freeze0, i64 0, i64 1 |
| %freeze1 = freeze <2 x i64> %idx1 |
| ret <2 x i64> %freeze1 |
| } |
| |
| define <4 x i32> @freeze_insert_subvector(<8 x i32> %a0) nounwind { |
| ; CHECK-LABEL: freeze_insert_subvector: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; CHECK-NEXT: ret{{[l|q]}} |
| %x = shufflevector <8 x i32> %a0, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> |
| %y = freeze <8 x i32> %x |
| %z = shufflevector <8 x i32> %y, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> |
| ret <4 x i32> %z |
| } |
| |
| define <2 x i64> @freeze_sign_extend_vector_inreg(<16 x i8> %a0) nounwind { |
| ; CHECK-LABEL: freeze_sign_extend_vector_inreg: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vpmovsxbq %xmm0, %xmm0 |
| ; CHECK-NEXT: ret{{[l|q]}} |
| %x = sext <16 x i8> %a0 to <16 x i32> |
| %y = shufflevector <16 x i32> %x, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| %z = freeze <4 x i32> %y |
| %w = sext <4 x i32> %z to <4 x i64> |
| %r = shufflevector <4 x i64> %w, <4 x i64> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x i64> %r |
| } |
| |
| define <2 x i64> @freeze_zero_extend_vector_inreg(<16 x i8> %a0) nounwind { |
| ; CHECK-LABEL: freeze_zero_extend_vector_inreg: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero |
| ; CHECK-NEXT: ret{{[l|q]}} |
| %x = zext <16 x i8> %a0 to <16 x i32> |
| %y = shufflevector <16 x i32> %x, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> |
| %z = freeze <4 x i32> %y |
| %w = zext <4 x i32> %z to <4 x i64> |
| %r = shufflevector <4 x i64> %w, <4 x i64> poison, <2 x i32> <i32 0, i32 1> |
| ret <2 x i64> %r |
| } |
| |
| define <4 x i32> @freeze_pshufd(<4 x i32> %a0) nounwind { |
| ; CHECK-LABEL: freeze_pshufd: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: ret{{[l|q]}} |
| %x = shufflevector <4 x i32> %a0, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> |
| %y = freeze <4 x i32> %x |
| %z = shufflevector <4 x i32> %y, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> |
| ret <4 x i32> %z |
| } |
| |
| define <4 x float> @freeze_permilps(<4 x float> %a0) nounwind { |
| ; CHECK-LABEL: freeze_permilps: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: ret{{[l|q]}} |
| %x = shufflevector <4 x float> %a0, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> |
| %y = freeze <4 x float> %x |
| %z = shufflevector <4 x float> %y, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> |
| ret <4 x float> %z |
| } |
| |
| define void @freeze_bitcast_from_wider_elt(ptr %origin, ptr %dst) nounwind { |
| ; X86-LABEL: freeze_bitcast_from_wider_elt: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero |
| ; X86-NEXT: vmovsd %xmm0, (%eax) |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: freeze_bitcast_from_wider_elt: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movq (%rdi), %rax |
| ; X64-NEXT: movq %rax, (%rsi) |
| ; X64-NEXT: retq |
| %i0 = load <4 x i16>, ptr %origin |
| %i1 = bitcast <4 x i16> %i0 to <8 x i8> |
| %i2 = freeze <8 x i8> %i1 |
| %i3 = bitcast <8 x i8> %i2 to i64 |
| store i64 %i3, ptr %dst |
| ret void |
| } |
| define void @freeze_bitcast_from_wider_elt_escape(ptr %origin, ptr %escape, ptr %dst) nounwind { |
| ; X86-LABEL: freeze_bitcast_from_wider_elt_escape: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero |
| ; X86-NEXT: vmovsd %xmm0, (%ecx) |
| ; X86-NEXT: vmovsd %xmm0, (%eax) |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: freeze_bitcast_from_wider_elt_escape: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movq (%rdi), %rax |
| ; X64-NEXT: movq %rax, (%rsi) |
| ; X64-NEXT: movq %rax, (%rdx) |
| ; X64-NEXT: retq |
| %i0 = load <4 x i16>, ptr %origin |
| %i1 = bitcast <4 x i16> %i0 to <8 x i8> |
| store <8 x i8> %i1, ptr %escape |
| %i2 = freeze <8 x i8> %i1 |
| %i3 = bitcast <8 x i8> %i2 to i64 |
| store i64 %i3, ptr %dst |
| ret void |
| } |
| |
| define void @freeze_bitcast_to_wider_elt(ptr %origin, ptr %dst) nounwind { |
| ; X86-LABEL: freeze_bitcast_to_wider_elt: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero |
| ; X86-NEXT: vmovsd %xmm0, (%eax) |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: freeze_bitcast_to_wider_elt: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movq (%rdi), %rax |
| ; X64-NEXT: movq %rax, (%rsi) |
| ; X64-NEXT: retq |
| %i0 = load <8 x i8>, ptr %origin |
| %i1 = bitcast <8 x i8> %i0 to <4 x i16> |
| %i2 = freeze <4 x i16> %i1 |
| %i3 = bitcast <4 x i16> %i2 to i64 |
| store i64 %i3, ptr %dst |
| ret void |
| } |
| define void @freeze_bitcast_to_wider_elt_escape(ptr %origin, ptr %escape, ptr %dst) nounwind { |
| ; X86-LABEL: freeze_bitcast_to_wider_elt_escape: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero |
| ; X86-NEXT: vmovsd %xmm0, (%ecx) |
| ; X86-NEXT: vmovsd %xmm0, (%eax) |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: freeze_bitcast_to_wider_elt_escape: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movq (%rdi), %rax |
| ; X64-NEXT: movq %rax, (%rsi) |
| ; X64-NEXT: movq %rax, (%rdx) |
| ; X64-NEXT: retq |
| %i0 = load <8 x i8>, ptr %origin |
| %i1 = bitcast <8 x i8> %i0 to <4 x i16> |
| store <4 x i16> %i1, ptr %escape |
| %i2 = freeze <4 x i16> %i1 |
| %i3 = bitcast <4 x i16> %i2 to i64 |
| store i64 %i3, ptr %dst |
| ret void |
| } |
| |
| define void @freeze_extractelement(ptr %origin0, ptr %origin1, ptr %dst) nounwind { |
| ; X86-LABEL: freeze_extractelement: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: vmovdqa (%edx), %xmm0 |
| ; X86-NEXT: vpand (%ecx), %xmm0, %xmm0 |
| ; X86-NEXT: vpextrb $6, %xmm0, %ecx |
| ; X86-NEXT: movb %cl, (%eax) |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: freeze_extractelement: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vmovdqa (%rdi), %xmm0 |
| ; X64-NEXT: vpand (%rsi), %xmm0, %xmm0 |
| ; X64-NEXT: vpextrb $6, %xmm0, %eax |
| ; X64-NEXT: movb %al, (%rdx) |
| ; X64-NEXT: retq |
| %i0 = load <16 x i8>, ptr %origin0 |
| %i1 = load <16 x i8>, ptr %origin1 |
| %i2 = and <16 x i8> %i0, %i1 |
| %i3 = freeze <16 x i8> %i2 |
| %i4 = extractelement <16 x i8> %i3, i64 6 |
| store i8 %i4, ptr %dst |
| ret void |
| } |
| define void @freeze_extractelement_escape(ptr %origin0, ptr %origin1, ptr %dst, ptr %escape) nounwind { |
| ; X86-LABEL: freeze_extractelement_escape: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; X86-NEXT: vmovdqa (%esi), %xmm0 |
| ; X86-NEXT: vpand (%edx), %xmm0, %xmm0 |
| ; X86-NEXT: vmovdqa %xmm0, (%ecx) |
| ; X86-NEXT: vpextrb $6, %xmm0, (%eax) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: freeze_extractelement_escape: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vmovdqa (%rdi), %xmm0 |
| ; X64-NEXT: vpand (%rsi), %xmm0, %xmm0 |
| ; X64-NEXT: vmovdqa %xmm0, (%rcx) |
| ; X64-NEXT: vpextrb $6, %xmm0, (%rdx) |
| ; X64-NEXT: retq |
| %i0 = load <16 x i8>, ptr %origin0 |
| %i1 = load <16 x i8>, ptr %origin1 |
| %i2 = and <16 x i8> %i0, %i1 |
| %i3 = freeze <16 x i8> %i2 |
| store <16 x i8> %i3, ptr %escape |
| %i4 = extractelement <16 x i8> %i3, i64 6 |
| store i8 %i4, ptr %dst |
| ret void |
| } |
| |
| ; It would be a miscompilation to pull freeze out of extractelement here. |
| define void @freeze_extractelement_extra_use(ptr %origin0, ptr %origin1, i64 %idx0, i64 %idx1, ptr %dst, ptr %escape) nounwind { |
| ; X86-LABEL: freeze_extractelement_extra_use: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: movl %esp, %ebp |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: andl $-16, %esp |
| ; X86-NEXT: subl $16, %esp |
| ; X86-NEXT: movl 24(%ebp), %eax |
| ; X86-NEXT: andl $15, %eax |
| ; X86-NEXT: movl 16(%ebp), %ecx |
| ; X86-NEXT: andl $15, %ecx |
| ; X86-NEXT: movl 32(%ebp), %edx |
| ; X86-NEXT: movl 12(%ebp), %esi |
| ; X86-NEXT: movl 8(%ebp), %edi |
| ; X86-NEXT: vmovaps (%edi), %xmm0 |
| ; X86-NEXT: vandps (%esi), %xmm0, %xmm0 |
| ; X86-NEXT: vmovaps %xmm0, (%esp) |
| ; X86-NEXT: movzbl (%esp,%ecx), %ecx |
| ; X86-NEXT: cmpb (%esp,%eax), %cl |
| ; X86-NEXT: sete (%edx) |
| ; X86-NEXT: leal -8(%ebp), %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: freeze_extractelement_extra_use: |
| ; X64: # %bb.0: |
| ; X64-NEXT: andl $15, %ecx |
| ; X64-NEXT: andl $15, %edx |
| ; X64-NEXT: vmovaps (%rdi), %xmm0 |
| ; X64-NEXT: vandps (%rsi), %xmm0, %xmm0 |
| ; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; X64-NEXT: movzbl -24(%rsp,%rdx), %eax |
| ; X64-NEXT: cmpb -24(%rsp,%rcx), %al |
| ; X64-NEXT: sete (%r8) |
| ; X64-NEXT: retq |
| %i0 = load <16 x i8>, ptr %origin0 |
| %i1 = load <16 x i8>, ptr %origin1 |
| %i2 = and <16 x i8> %i0, %i1 |
| %i3 = freeze <16 x i8> %i2 |
| %i4 = extractelement <16 x i8> %i3, i64 %idx0 |
| %i5 = extractelement <16 x i8> %i3, i64 %idx1 |
| %i6 = icmp eq i8 %i4, %i5 |
| store i1 %i6, ptr %dst |
| ret void |
| } |
| |
| define void @freeze_buildvector_single_maybe_poison_operand(ptr %origin, ptr %dst) nounwind { |
| ; X86-LABEL: freeze_buildvector_single_maybe_poison_operand: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: vbroadcastss {{.*#+}} xmm0 = [42,42,42,42] |
| ; X86-NEXT: vpinsrd $0, (%ecx), %xmm0, %xmm0 |
| ; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 |
| ; X86-NEXT: vmovdqa %xmm0, (%eax) |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: freeze_buildvector_single_maybe_poison_operand: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vpbroadcastd {{.*#+}} xmm0 = [42,42,42,42] |
| ; X64-NEXT: vpinsrd $0, (%rdi), %xmm0, %xmm0 |
| ; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] |
| ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 |
| ; X64-NEXT: vmovdqa %xmm0, (%rsi) |
| ; X64-NEXT: retq |
| %i0.src = load i32, ptr %origin |
| %i0 = and i32 %i0.src, 15 |
| %i1 = insertelement <4 x i32> poison, i32 %i0, i64 0 |
| %i2 = insertelement <4 x i32> %i1, i32 42, i64 1 |
| %i3 = insertelement <4 x i32> %i2, i32 42, i64 2 |
| %i4 = insertelement <4 x i32> %i3, i32 42, i64 3 |
| %i5 = freeze <4 x i32> %i4 |
| %i6 = and <4 x i32> %i5, <i32 7, i32 7, i32 7, i32 7> |
| store <4 x i32> %i6, ptr %dst |
| ret void |
| } |
| |
| define void @freeze_buildvector_single_repeated_maybe_poison_operand(ptr %origin, ptr %dst) nounwind { |
| ; X86-LABEL: freeze_buildvector_single_repeated_maybe_poison_operand: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl (%ecx), %ecx |
| ; X86-NEXT: andl $15, %ecx |
| ; X86-NEXT: vbroadcastss {{.*#+}} xmm0 = [42,42,42,42] |
| ; X86-NEXT: vpinsrd $0, %ecx, %xmm0, %xmm0 |
| ; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] |
| ; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 |
| ; X86-NEXT: vmovdqa %xmm0, (%eax) |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: freeze_buildvector_single_repeated_maybe_poison_operand: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vpbroadcastd {{.*#+}} xmm0 = [42,42,42,42] |
| ; X64-NEXT: vpinsrd $0, (%rdi), %xmm0, %xmm0 |
| ; X64-NEXT: vpbroadcastq %xmm0, %xmm0 |
| ; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] |
| ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 |
| ; X64-NEXT: vmovdqa %xmm0, (%rsi) |
| ; X64-NEXT: retq |
| %i0.src = load i32, ptr %origin |
| %i0 = and i32 %i0.src, 15 |
| %i1 = insertelement <4 x i32> poison, i32 %i0, i64 0 |
| %i2 = insertelement <4 x i32> %i1, i32 42, i64 1 |
| %i3 = insertelement <4 x i32> %i2, i32 %i0, i64 2 |
| %i4 = insertelement <4 x i32> %i3, i32 42, i64 3 |
| %i5 = freeze <4 x i32> %i4 |
| %i6 = and <4 x i32> %i5, <i32 7, i32 7, i32 7, i32 7> |
| store <4 x i32> %i6, ptr %dst |
| ret void |
| } |
| |
| define void @freeze_two_frozen_buildvectors(ptr %origin0, ptr %origin1, ptr %dst0, ptr %dst1) nounwind { |
| ; X86-LABEL: freeze_two_frozen_buildvectors: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl (%edx), %edx |
| ; X86-NEXT: andl $15, %edx |
| ; X86-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 |
| ; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7] |
| ; X86-NEXT: vpand %xmm1, %xmm0, %xmm0 |
| ; X86-NEXT: vmovdqa %xmm0, (%ecx) |
| ; X86-NEXT: vmovd %edx, %xmm0 |
| ; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] |
| ; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 |
| ; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7] |
| ; X86-NEXT: vpand %xmm1, %xmm0, %xmm0 |
| ; X86-NEXT: vmovdqa %xmm0, (%eax) |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: freeze_two_frozen_buildvectors: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl (%rdi), %eax |
| ; X64-NEXT: andl $15, %eax |
| ; X64-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 |
| ; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] |
| ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 |
| ; X64-NEXT: vmovdqa %xmm0, (%rdx) |
| ; X64-NEXT: vmovd %eax, %xmm0 |
| ; X64-NEXT: vpbroadcastd %xmm0, %xmm0 |
| ; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 |
| ; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3] |
| ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 |
| ; X64-NEXT: vmovdqa %xmm0, (%rcx) |
| ; X64-NEXT: retq |
| %i0.src = load i32, ptr %origin0 |
| %i0 = and i32 %i0.src, 15 |
| %i1.src = load i32, ptr %origin1 |
| %i1 = and i32 %i0.src, 15 |
| %i2 = insertelement <4 x i32> poison, i32 %i0, i64 1 |
| %i3 = and <4 x i32> %i2, <i32 7, i32 7, i32 7, i32 7> |
| %i4 = freeze <4 x i32> %i3 |
| store <4 x i32> %i4, ptr %dst0 |
| %i5 = insertelement <4 x i32> poison, i32 %i1, i64 2 |
| %i6 = and <4 x i32> %i5, <i32 7, i32 7, i32 7, i32 7> |
| %i7 = freeze <4 x i32> %i6 |
| store <4 x i32> %i7, ptr %dst1 |
| ret void |
| } |
| |
| define void @freeze_two_buildvectors_only_one_frozen(ptr %origin0, ptr %origin1, ptr %dst0, ptr %dst1) nounwind { |
| ; X86-LABEL: freeze_two_buildvectors_only_one_frozen: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl (%edx), %edx |
| ; X86-NEXT: andl $15, %edx |
| ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 |
| ; X86-NEXT: vmovd %edx, %xmm1 |
| ; X86-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,0,1,1] |
| ; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5,6,7] |
| ; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [7,7,7,7] |
| ; X86-NEXT: vpand %xmm2, %xmm0, %xmm0 |
| ; X86-NEXT: vmovdqa %xmm0, (%ecx) |
| ; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1] |
| ; X86-NEXT: vpand %xmm2, %xmm0, %xmm0 |
| ; X86-NEXT: vmovdqa %xmm0, (%eax) |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: freeze_two_buildvectors_only_one_frozen: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl (%rdi), %eax |
| ; X64-NEXT: andl $15, %eax |
| ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 |
| ; X64-NEXT: vmovd %eax, %xmm1 |
| ; X64-NEXT: vpbroadcastd %xmm1, %xmm1 |
| ; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] |
| ; X64-NEXT: vpbroadcastd {{.*#+}} xmm2 = [7,7,7,7] |
| ; X64-NEXT: vpand %xmm2, %xmm0, %xmm0 |
| ; X64-NEXT: vmovdqa %xmm0, (%rdx) |
| ; X64-NEXT: vpand %xmm2, %xmm1, %xmm0 |
| ; X64-NEXT: vmovdqa %xmm0, (%rcx) |
| ; X64-NEXT: retq |
| %i0.src = load i32, ptr %origin0 |
| %i0 = and i32 %i0.src, 15 |
| %i1.src = load i32, ptr %origin1 |
| %i1 = and i32 %i0.src, 15 |
| %i2 = insertelement <4 x i32> poison, i32 %i0, i64 1 |
| %i3 = and <4 x i32> %i2, <i32 7, i32 7, i32 7, i32 7> |
| %i4 = freeze <4 x i32> %i3 |
| store <4 x i32> %i4, ptr %dst0 |
| %i5 = insertelement <4 x i32> poison, i32 %i1, i64 2 |
| %i6 = and <4 x i32> %i5, <i32 7, i32 7, i32 7, i32 7> |
| store <4 x i32> %i6, ptr %dst1 |
| ret void |
| } |
| |
| define void @freeze_two_buildvectors_one_undef_elt(ptr %origin0, ptr %origin1, ptr %dst0, ptr %dst1) nounwind { |
| ; X86-LABEL: freeze_two_buildvectors_one_undef_elt: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl (%edx), %edx |
| ; X86-NEXT: andl $15, %edx |
| ; X86-NEXT: vmovddup {{.*#+}} xmm0 = [7,0,7,0] |
| ; X86-NEXT: # xmm0 = mem[0,0] |
| ; X86-NEXT: vmovd %edx, %xmm1 |
| ; X86-NEXT: vpand %xmm0, %xmm1, %xmm2 |
| ; X86-NEXT: vmovdqa %xmm2, (%ecx) |
| ; X86-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] |
| ; X86-NEXT: vpand %xmm0, %xmm1, %xmm0 |
| ; X86-NEXT: vmovdqa %xmm0, (%eax) |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: freeze_two_buildvectors_one_undef_elt: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movq (%rdi), %rax |
| ; X64-NEXT: vmovd %eax, %xmm0 |
| ; X64-NEXT: vpbroadcastd %xmm0, %xmm0 |
| ; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; X64-NEXT: vmovdqa %xmm0, (%rdx) |
| ; X64-NEXT: vmovdqa %xmm0, (%rcx) |
| ; X64-NEXT: retq |
| %i0.src = load i64, ptr %origin0 |
| %i0 = and i64 %i0.src, 15 |
| %i1.src = load i64, ptr %origin1 |
| %i1 = and i64 %i0.src, 15 |
| %i2 = insertelement <2 x i64> poison, i64 %i0, i64 0 |
| %i3 = and <2 x i64> %i2, <i64 7, i64 7> |
| %i4 = freeze <2 x i64> %i3 |
| store <2 x i64> %i4, ptr %dst0 |
| %i5 = insertelement <2 x i64> poison, i64 %i1, i64 1 |
| %i6 = and <2 x i64> %i5, <i64 7, i64 7> |
| store <2 x i64> %i6, ptr %dst1 |
| ret void |
| } |
| |
| define void @freeze_buildvector(ptr %origin0, ptr %origin1, ptr %origin2, ptr %origin3, ptr %dst) nounwind { |
| ; X86-LABEL: freeze_buildvector: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi |
| ; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X86-NEXT: vpinsrd $1, (%esi), %xmm0, %xmm0 |
| ; X86-NEXT: vpinsrd $2, (%edx), %xmm0, %xmm0 |
| ; X86-NEXT: vpinsrd $3, (%ecx), %xmm0, %xmm0 |
| ; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 |
| ; X86-NEXT: vmovdqa %xmm0, (%eax) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: freeze_buildvector: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X64-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm0 |
| ; X64-NEXT: vpinsrd $2, (%rdx), %xmm0, %xmm0 |
| ; X64-NEXT: vpinsrd $3, (%rcx), %xmm0, %xmm0 |
| ; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] |
| ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 |
| ; X64-NEXT: vmovdqa %xmm0, (%r8) |
| ; X64-NEXT: retq |
| %i0.src = load i32, ptr %origin0 |
| %i1.src = load i32, ptr %origin1 |
| %i2.src = load i32, ptr %origin2 |
| %i3.src = load i32, ptr %origin3 |
| %i0 = and i32 %i0.src, 15 |
| %i1 = and i32 %i1.src, 15 |
| %i2 = and i32 %i2.src, 15 |
| %i3 = and i32 %i3.src, 15 |
| %i4 = insertelement <4 x i32> poison, i32 %i0, i64 0 |
| %i5 = insertelement <4 x i32> %i4, i32 %i1, i64 1 |
| %i6 = insertelement <4 x i32> %i5, i32 %i2, i64 2 |
| %i7 = insertelement <4 x i32> %i6, i32 %i3, i64 3 |
| %i8 = freeze <4 x i32> %i7 |
| %i9 = and <4 x i32> %i8, <i32 7, i32 7, i32 7, i32 7> |
| store <4 x i32> %i9, ptr %dst |
| ret void |
| } |
| |
| define void @freeze_buildvector_one_undef_elt(ptr %origin0, ptr %origin1, ptr %origin2, ptr %origin3, ptr %dst) nounwind { |
| ; X86-LABEL: freeze_buildvector_one_undef_elt: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X86-NEXT: vpinsrd $1, (%edx), %xmm0, %xmm0 |
| ; X86-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 |
| ; X86-NEXT: vpinsrd $3, (%ecx), %xmm0, %xmm0 |
| ; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 |
| ; X86-NEXT: vmovdqa %xmm0, (%eax) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: freeze_buildvector_one_undef_elt: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X64-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm0 |
| ; X64-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 |
| ; X64-NEXT: vpinsrd $3, (%rcx), %xmm0, %xmm0 |
| ; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] |
| ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 |
| ; X64-NEXT: vmovdqa %xmm0, (%r8) |
| ; X64-NEXT: retq |
| %i0.src = load i32, ptr %origin0 |
| %i1.src = load i32, ptr %origin1 |
| %i3.src = load i32, ptr %origin3 |
| %i0 = and i32 %i0.src, 15 |
| %i1 = and i32 %i1.src, 15 |
| %i3 = and i32 %i3.src, 15 |
| %i4 = insertelement <4 x i32> poison, i32 %i0, i64 0 |
| %i5 = insertelement <4 x i32> %i4, i32 %i1, i64 1 |
| %i7 = insertelement <4 x i32> %i5, i32 %i3, i64 3 |
| %i8 = freeze <4 x i32> %i7 |
| %i9 = and <4 x i32> %i8, <i32 7, i32 7, i32 7, i32 7> |
| store <4 x i32> %i9, ptr %dst |
| ret void |
| } |
| |
| define void @freeze_buildvector_extrause(ptr %origin0, ptr %origin1, ptr %origin2, ptr %origin3, ptr %dst, ptr %escape) nounwind { |
| ; X86-LABEL: freeze_buildvector_extrause: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx |
| ; X86-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X86-NEXT: vpinsrd $1, (%edi), %xmm0, %xmm0 |
| ; X86-NEXT: vpinsrd $2, (%esi), %xmm0, %xmm0 |
| ; X86-NEXT: vpinsrd $3, (%edx), %xmm0, %xmm0 |
| ; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 |
| ; X86-NEXT: vmovdqa %xmm0, (%ecx) |
| ; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 |
| ; X86-NEXT: vmovdqa %xmm0, (%eax) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: freeze_buildvector_extrause: |
| ; X64: # %bb.0: |
| ; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero |
| ; X64-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm0 |
| ; X64-NEXT: vpinsrd $2, (%rdx), %xmm0, %xmm0 |
| ; X64-NEXT: vpinsrd $3, (%rcx), %xmm0, %xmm0 |
| ; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15] |
| ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 |
| ; X64-NEXT: vmovdqa %xmm0, (%r9) |
| ; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7] |
| ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 |
| ; X64-NEXT: vmovdqa %xmm0, (%r8) |
| ; X64-NEXT: retq |
| %i0.src = load i32, ptr %origin0 |
| %i1.src = load i32, ptr %origin1 |
| %i2.src = load i32, ptr %origin2 |
| %i3.src = load i32, ptr %origin3 |
| %i0 = and i32 %i0.src, 15 |
| %i1 = and i32 %i1.src, 15 |
| %i2 = and i32 %i2.src, 15 |
| %i3 = and i32 %i3.src, 15 |
| %i4 = insertelement <4 x i32> poison, i32 %i0, i64 0 |
| %i5 = insertelement <4 x i32> %i4, i32 %i1, i64 1 |
| %i6 = insertelement <4 x i32> %i5, i32 %i2, i64 2 |
| %i7 = insertelement <4 x i32> %i6, i32 %i3, i64 3 |
| store <4 x i32> %i7, ptr %escape |
| %i8 = freeze <4 x i32> %i7 |
| %i9 = and <4 x i32> %i8, <i32 7, i32 7, i32 7, i32 7> |
| store <4 x i32> %i9, ptr %dst |
| ret void |
| } |
| |
| define void @pr59677(i32 %x, ptr %out) nounwind { |
| ; X86-LABEL: pr59677: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: pushl %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; X86-NEXT: vmovd %eax, %xmm0 |
| ; X86-NEXT: orl $1, %eax |
| ; X86-NEXT: vmovd %eax, %xmm1 |
| ; X86-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] |
| ; X86-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero |
| ; X86-NEXT: vpaddd %xmm0, %xmm0, %xmm0 |
| ; X86-NEXT: vcvtdq2ps %xmm0, %xmm0 |
| ; X86-NEXT: vmovss %xmm0, (%esp) |
| ; X86-NEXT: calll sinf |
| ; X86-NEXT: fstps (%esi) |
| ; X86-NEXT: addl $4, %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: pr59677: |
| ; X64: # %bb.0: |
| ; X64-NEXT: pushq %rbx |
| ; X64-NEXT: movq %rsi, %rbx |
| ; X64-NEXT: vmovd %edi, %xmm0 |
| ; X64-NEXT: orl $1, %edi |
| ; X64-NEXT: vmovd %edi, %xmm1 |
| ; X64-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] |
| ; X64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero |
| ; X64-NEXT: vpaddd %xmm0, %xmm0, %xmm0 |
| ; X64-NEXT: vcvtdq2ps %xmm0, %xmm0 |
| ; X64-NEXT: callq sinf@PLT |
| ; X64-NEXT: vmovss %xmm0, (%rbx) |
| ; X64-NEXT: popq %rbx |
| ; X64-NEXT: retq |
| %i0 = or i32 %x, 1 |
| %i1 = insertelement <4 x i32> zeroinitializer, i32 %x, i64 0 |
| %i2 = insertelement <4 x i32> %i1, i32 %i0, i64 1 |
| %i3 = shl <4 x i32> %i2, <i32 1, i32 1, i32 1, i32 1> |
| %i4 = sitofp <4 x i32> %i3 to <4 x float> |
| %i5 = tail call <4 x float> @llvm.sin.v4f32(<4 x float> %i4) |
| %i6 = extractelement <4 x float> %i5, i64 0 |
| store float %i6, ptr %out, align 4 |
| ret void |
| } |
| declare <4 x float> @llvm.sin.v4f32(<4 x float>) |