| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefixes=SSE,SSE2 |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE,SSE4 |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,AVX1 |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX,AVX2 |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefixes=AVX512F |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX512VL |
| |
| ; |
| ; LOGIC |
| ; |
| |
| define void @test_not_i128(ptr %p0, ptr %p1, i1 zeroext %a2, ptr %p3) nounwind { |
| ; SSE2-LABEL: test_not_i128: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: negl %edx |
| ; SSE2-NEXT: movd %edx, %xmm0 |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] |
| ; SSE2-NEXT: movdqa (%rdi), %xmm1 |
| ; SSE2-NEXT: pand %xmm0, %xmm1 |
| ; SSE2-NEXT: por (%rsi), %xmm0 |
| ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 |
| ; SSE2-NEXT: pxor %xmm0, %xmm2 |
| ; SSE2-NEXT: por %xmm1, %xmm2 |
| ; SSE2-NEXT: movdqa %xmm2, (%rcx) |
| ; SSE2-NEXT: retq |
| ; |
| ; SSE4-LABEL: test_not_i128: |
| ; SSE4: # %bb.0: |
| ; SSE4-NEXT: pcmpeqd %xmm1, %xmm1 |
| ; SSE4-NEXT: pxor (%rsi), %xmm1 |
| ; SSE4-NEXT: negl %edx |
| ; SSE4-NEXT: movd %edx, %xmm0 |
| ; SSE4-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] |
| ; SSE4-NEXT: blendvps %xmm0, (%rdi), %xmm1 |
| ; SSE4-NEXT: movaps %xmm1, (%rcx) |
| ; SSE4-NEXT: retq |
| ; |
| ; AVX1-LABEL: test_not_i128: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 |
| ; AVX1-NEXT: vpxor (%rsi), %xmm0, %xmm0 |
| ; AVX1-NEXT: negl %edx |
| ; AVX1-NEXT: vmovd %edx, %xmm1 |
| ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] |
| ; AVX1-NEXT: vblendvps %xmm1, (%rdi), %xmm0, %xmm0 |
| ; AVX1-NEXT: vmovaps %xmm0, (%rcx) |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: test_not_i128: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vpxor (%rsi), %xmm0, %xmm0 |
| ; AVX2-NEXT: negl %edx |
| ; AVX2-NEXT: vmovd %edx, %xmm1 |
| ; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 |
| ; AVX2-NEXT: vblendvps %xmm1, (%rdi), %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovaps %xmm0, (%rcx) |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: test_not_i128: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vmovdqa (%rdi), %xmm0 |
| ; AVX512F-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 |
| ; AVX512F-NEXT: vpxor (%rsi), %xmm1, %xmm1 |
| ; AVX512F-NEXT: negb %dl |
| ; AVX512F-NEXT: kmovw %edx, %k1 |
| ; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} |
| ; AVX512F-NEXT: vmovdqa %xmm1, (%rcx) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: test_not_i128: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vpxor (%rsi), %xmm0, %xmm0 |
| ; AVX512VL-NEXT: negb %dl |
| ; AVX512VL-NEXT: kmovd %edx, %k1 |
| ; AVX512VL-NEXT: vmovdqa32 (%rdi), %xmm0 {%k1} |
| ; AVX512VL-NEXT: vmovdqa %xmm0, (%rcx) |
| ; AVX512VL-NEXT: retq |
| %ld0 = load i128, ptr %p0 |
| %ld1 = load i128, ptr %p1 |
| %neg1 = xor i128 %ld1, -1 |
| %sel = select i1 %a2, i128 %ld0, i128 %neg1 |
| store i128 %sel, ptr %p3 |
| ret void |
| } |
| |
| define void @test_not_i256(ptr %p0, ptr %p1, i1 zeroext %a2, ptr %p3) nounwind { |
| ; SSE2-LABEL: test_not_i256: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: movdqa (%rsi), %xmm0 |
| ; SSE2-NEXT: movdqa 16(%rsi), %xmm1 |
| ; SSE2-NEXT: negl %edx |
| ; SSE2-NEXT: movd %edx, %xmm2 |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] |
| ; SSE2-NEXT: movdqa (%rdi), %xmm3 |
| ; SSE2-NEXT: pand %xmm2, %xmm3 |
| ; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 |
| ; SSE2-NEXT: pxor %xmm2, %xmm4 |
| ; SSE2-NEXT: pandn %xmm4, %xmm0 |
| ; SSE2-NEXT: por %xmm3, %xmm0 |
| ; SSE2-NEXT: pand 16(%rdi), %xmm2 |
| ; SSE2-NEXT: pandn %xmm4, %xmm1 |
| ; SSE2-NEXT: por %xmm2, %xmm1 |
| ; SSE2-NEXT: movdqa %xmm1, 16(%rcx) |
| ; SSE2-NEXT: movdqa %xmm0, (%rcx) |
| ; SSE2-NEXT: retq |
| ; |
| ; SSE4-LABEL: test_not_i256: |
| ; SSE4: # %bb.0: |
| ; SSE4-NEXT: pcmpeqd %xmm1, %xmm1 |
| ; SSE4-NEXT: movdqa 16(%rsi), %xmm2 |
| ; SSE4-NEXT: pxor %xmm1, %xmm2 |
| ; SSE4-NEXT: pxor (%rsi), %xmm1 |
| ; SSE4-NEXT: negl %edx |
| ; SSE4-NEXT: movd %edx, %xmm0 |
| ; SSE4-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] |
| ; SSE4-NEXT: blendvps %xmm0, (%rdi), %xmm1 |
| ; SSE4-NEXT: blendvps %xmm0, 16(%rdi), %xmm2 |
| ; SSE4-NEXT: movaps %xmm2, 16(%rcx) |
| ; SSE4-NEXT: movaps %xmm1, (%rcx) |
| ; SSE4-NEXT: retq |
| ; |
| ; AVX1-LABEL: test_not_i256: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vmovaps (%rdi), %xmm0 |
| ; AVX1-NEXT: vmovaps (%rsi), %xmm1 |
| ; AVX1-NEXT: vinsertf128 $1, 16(%rsi), %ymm1, %ymm1 |
| ; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2 |
| ; AVX1-NEXT: vcmptrueps %ymm2, %ymm2, %ymm2 |
| ; AVX1-NEXT: vxorps %ymm2, %ymm1, %ymm1 |
| ; AVX1-NEXT: negl %edx |
| ; AVX1-NEXT: vmovd %edx, %xmm2 |
| ; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] |
| ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 |
| ; AVX1-NEXT: vblendvps %xmm2, 16(%rdi), %xmm3, %xmm3 |
| ; AVX1-NEXT: vmovaps %xmm3, 16(%rcx) |
| ; AVX1-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 |
| ; AVX1-NEXT: vmovaps %xmm0, (%rcx) |
| ; AVX1-NEXT: vzeroupper |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: test_not_i256: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 |
| ; AVX2-NEXT: negl %edx |
| ; AVX2-NEXT: vmovd %edx, %xmm1 |
| ; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1 |
| ; AVX2-NEXT: vblendvps %ymm1, (%rdi), %ymm0, %ymm0 |
| ; AVX2-NEXT: vmovups %ymm0, (%rcx) |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: test_not_i256: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vmovdqu (%rdi), %ymm0 |
| ; AVX512F-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 |
| ; AVX512F-NEXT: vpxor (%rsi), %ymm1, %ymm1 |
| ; AVX512F-NEXT: negb %dl |
| ; AVX512F-NEXT: kmovw %edx, %k1 |
| ; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1} |
| ; AVX512F-NEXT: vmovdqu %ymm1, (%rcx) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: test_not_i256: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 |
| ; AVX512VL-NEXT: vpxor (%rsi), %ymm0, %ymm0 |
| ; AVX512VL-NEXT: negb %dl |
| ; AVX512VL-NEXT: kmovd %edx, %k1 |
| ; AVX512VL-NEXT: vmovdqu32 (%rdi), %ymm0 {%k1} |
| ; AVX512VL-NEXT: vmovdqu %ymm0, (%rcx) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| %ld0 = load i256, ptr %p0 |
| %ld1 = load i256, ptr %p1 |
| %neg1 = xor i256 %ld1, -1 |
| %sel = select i1 %a2, i256 %ld0, i256 %neg1 |
| store i256 %sel, ptr %p3 |
| ret void |
| } |
| |
| define void @test_not_i512(ptr %p0, ptr %p1, i1 zeroext %a2, ptr %p3) nounwind { |
| ; SSE2-LABEL: test_not_i512: |
| ; SSE2: # %bb.0: |
| ; SSE2-NEXT: movdqa (%rsi), %xmm3 |
| ; SSE2-NEXT: movdqa 16(%rsi), %xmm2 |
| ; SSE2-NEXT: movdqa 32(%rsi), %xmm1 |
| ; SSE2-NEXT: movdqa 48(%rsi), %xmm0 |
| ; SSE2-NEXT: negl %edx |
| ; SSE2-NEXT: movd %edx, %xmm4 |
| ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,0,0,0] |
| ; SSE2-NEXT: movdqa 48(%rdi), %xmm5 |
| ; SSE2-NEXT: pand %xmm4, %xmm5 |
| ; SSE2-NEXT: pcmpeqd %xmm6, %xmm6 |
| ; SSE2-NEXT: pxor %xmm4, %xmm6 |
| ; SSE2-NEXT: pandn %xmm6, %xmm0 |
| ; SSE2-NEXT: por %xmm5, %xmm0 |
| ; SSE2-NEXT: movdqa 32(%rdi), %xmm5 |
| ; SSE2-NEXT: pand %xmm4, %xmm5 |
| ; SSE2-NEXT: pandn %xmm6, %xmm1 |
| ; SSE2-NEXT: por %xmm5, %xmm1 |
| ; SSE2-NEXT: movdqa 16(%rdi), %xmm5 |
| ; SSE2-NEXT: pand %xmm4, %xmm5 |
| ; SSE2-NEXT: pandn %xmm6, %xmm2 |
| ; SSE2-NEXT: por %xmm5, %xmm2 |
| ; SSE2-NEXT: pand (%rdi), %xmm4 |
| ; SSE2-NEXT: pandn %xmm6, %xmm3 |
| ; SSE2-NEXT: por %xmm4, %xmm3 |
| ; SSE2-NEXT: movdqa %xmm3, (%rcx) |
| ; SSE2-NEXT: movdqa %xmm2, 16(%rcx) |
| ; SSE2-NEXT: movdqa %xmm1, 32(%rcx) |
| ; SSE2-NEXT: movdqa %xmm0, 48(%rcx) |
| ; SSE2-NEXT: retq |
| ; |
| ; SSE4-LABEL: test_not_i512: |
| ; SSE4: # %bb.0: |
| ; SSE4-NEXT: pcmpeqd %xmm1, %xmm1 |
| ; SSE4-NEXT: movdqa (%rsi), %xmm2 |
| ; SSE4-NEXT: pxor %xmm1, %xmm2 |
| ; SSE4-NEXT: movdqa 16(%rsi), %xmm3 |
| ; SSE4-NEXT: pxor %xmm1, %xmm3 |
| ; SSE4-NEXT: movdqa 32(%rsi), %xmm4 |
| ; SSE4-NEXT: pxor %xmm1, %xmm4 |
| ; SSE4-NEXT: pxor 48(%rsi), %xmm1 |
| ; SSE4-NEXT: negl %edx |
| ; SSE4-NEXT: movd %edx, %xmm0 |
| ; SSE4-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] |
| ; SSE4-NEXT: blendvps %xmm0, 48(%rdi), %xmm1 |
| ; SSE4-NEXT: blendvps %xmm0, 32(%rdi), %xmm4 |
| ; SSE4-NEXT: blendvps %xmm0, 16(%rdi), %xmm3 |
| ; SSE4-NEXT: blendvps %xmm0, (%rdi), %xmm2 |
| ; SSE4-NEXT: movaps %xmm2, (%rcx) |
| ; SSE4-NEXT: movaps %xmm3, 16(%rcx) |
| ; SSE4-NEXT: movaps %xmm4, 32(%rcx) |
| ; SSE4-NEXT: movaps %xmm1, 48(%rcx) |
| ; SSE4-NEXT: retq |
| ; |
| ; AVX1-LABEL: test_not_i512: |
| ; AVX1: # %bb.0: |
| ; AVX1-NEXT: vmovaps (%rdi), %xmm0 |
| ; AVX1-NEXT: vmovaps (%rsi), %xmm1 |
| ; AVX1-NEXT: vmovaps 32(%rsi), %xmm2 |
| ; AVX1-NEXT: vinsertf128 $1, 48(%rsi), %ymm2, %ymm2 |
| ; AVX1-NEXT: vinsertf128 $1, 16(%rsi), %ymm1, %ymm1 |
| ; AVX1-NEXT: vmovaps 16(%rdi), %xmm3 |
| ; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4 |
| ; AVX1-NEXT: vcmptrueps %ymm4, %ymm4, %ymm4 |
| ; AVX1-NEXT: vxorps %ymm4, %ymm1, %ymm1 |
| ; AVX1-NEXT: vxorps %ymm4, %ymm2, %ymm2 |
| ; AVX1-NEXT: negl %edx |
| ; AVX1-NEXT: vmovd %edx, %xmm4 |
| ; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[0,0,0,0] |
| ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 |
| ; AVX1-NEXT: vblendvps %xmm4, 48(%rdi), %xmm5, %xmm5 |
| ; AVX1-NEXT: vmovaps 32(%rdi), %xmm6 |
| ; AVX1-NEXT: vmovaps %xmm5, 48(%rcx) |
| ; AVX1-NEXT: vblendvps %xmm4, %xmm6, %xmm2, %xmm2 |
| ; AVX1-NEXT: vmovaps %xmm2, 32(%rcx) |
| ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 |
| ; AVX1-NEXT: vblendvps %xmm4, %xmm3, %xmm2, %xmm2 |
| ; AVX1-NEXT: vmovaps %xmm2, 16(%rcx) |
| ; AVX1-NEXT: vblendvps %xmm4, %xmm0, %xmm1, %xmm0 |
| ; AVX1-NEXT: vmovaps %xmm0, (%rcx) |
| ; AVX1-NEXT: vzeroupper |
| ; AVX1-NEXT: retq |
| ; |
| ; AVX2-LABEL: test_not_i512: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 |
| ; AVX2-NEXT: vpxor 32(%rsi), %ymm0, %ymm1 |
| ; AVX2-NEXT: vpxor (%rsi), %ymm0, %ymm0 |
| ; AVX2-NEXT: negl %edx |
| ; AVX2-NEXT: vmovd %edx, %xmm2 |
| ; AVX2-NEXT: vpbroadcastd %xmm2, %ymm2 |
| ; AVX2-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 |
| ; AVX2-NEXT: vblendvps %ymm2, 32(%rdi), %ymm1, %ymm1 |
| ; AVX2-NEXT: vmovups %ymm1, 32(%rcx) |
| ; AVX2-NEXT: vmovups %ymm0, (%rcx) |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: test_not_i512: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vpternlogd {{.*#+}} zmm0 = -1 |
| ; AVX512F-NEXT: vpxord (%rsi), %zmm0, %zmm0 |
| ; AVX512F-NEXT: negl %edx |
| ; AVX512F-NEXT: kmovw %edx, %k1 |
| ; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} |
| ; AVX512F-NEXT: vmovdqu64 %zmm0, (%rcx) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: test_not_i512: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vpternlogd {{.*#+}} zmm0 = -1 |
| ; AVX512VL-NEXT: vpxord (%rsi), %zmm0, %zmm0 |
| ; AVX512VL-NEXT: negl %edx |
| ; AVX512VL-NEXT: kmovd %edx, %k1 |
| ; AVX512VL-NEXT: vmovdqu32 (%rdi), %zmm0 {%k1} |
| ; AVX512VL-NEXT: vmovdqu64 %zmm0, (%rcx) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| %ld0 = load i512, ptr %p0 |
| %ld1 = load i512, ptr %p1 |
| %neg1 = xor i512 %ld1, -1 |
| %sel = select i1 %a2, i512 %ld0, i512 %neg1 |
| store i512 %sel, ptr %p3 |
| ret void |
| } |
| |
| ; |
| ; ADD/SUB |
| ; |
| |
| define void @test_neg_i512(ptr %p0, ptr %p1, i1 zeroext %a2, ptr %p3) nounwind { |
| ; SSE-LABEL: test_neg_i512: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: pushq %r15 |
| ; SSE-NEXT: pushq %r14 |
| ; SSE-NEXT: pushq %rbx |
| ; SSE-NEXT: xorl %r8d, %r8d |
| ; SSE-NEXT: xorl %r10d, %r10d |
| ; SSE-NEXT: subq (%rsi), %r10 |
| ; SSE-NEXT: movl $0, %eax |
| ; SSE-NEXT: sbbq 8(%rsi), %rax |
| ; SSE-NEXT: movl $0, %ebx |
| ; SSE-NEXT: sbbq 16(%rsi), %rbx |
| ; SSE-NEXT: movl $0, %r9d |
| ; SSE-NEXT: sbbq 24(%rsi), %r9 |
| ; SSE-NEXT: movl $0, %r14d |
| ; SSE-NEXT: sbbq 32(%rsi), %r14 |
| ; SSE-NEXT: movl $0, %r11d |
| ; SSE-NEXT: sbbq 40(%rsi), %r11 |
| ; SSE-NEXT: movl $0, %r15d |
| ; SSE-NEXT: sbbq 48(%rsi), %r15 |
| ; SSE-NEXT: sbbq 56(%rsi), %r8 |
| ; SSE-NEXT: testl %edx, %edx |
| ; SSE-NEXT: je .LBB3_2 |
| ; SSE-NEXT: # %bb.1: |
| ; SSE-NEXT: movq 8(%rdi), %rax |
| ; SSE-NEXT: movq 24(%rdi), %r9 |
| ; SSE-NEXT: movq 40(%rdi), %r11 |
| ; SSE-NEXT: movq 56(%rdi), %r8 |
| ; SSE-NEXT: movq (%rdi), %r10 |
| ; SSE-NEXT: movq 16(%rdi), %rbx |
| ; SSE-NEXT: movq 32(%rdi), %r14 |
| ; SSE-NEXT: movq 48(%rdi), %r15 |
| ; SSE-NEXT: .LBB3_2: |
| ; SSE-NEXT: movq %r15, 48(%rcx) |
| ; SSE-NEXT: movq %r14, 32(%rcx) |
| ; SSE-NEXT: movq %rbx, 16(%rcx) |
| ; SSE-NEXT: movq %r10, (%rcx) |
| ; SSE-NEXT: movq %r8, 56(%rcx) |
| ; SSE-NEXT: movq %r11, 40(%rcx) |
| ; SSE-NEXT: movq %r9, 24(%rcx) |
| ; SSE-NEXT: movq %rax, 8(%rcx) |
| ; SSE-NEXT: popq %rbx |
| ; SSE-NEXT: popq %r14 |
| ; SSE-NEXT: popq %r15 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX-LABEL: test_neg_i512: |
| ; AVX: # %bb.0: |
| ; AVX-NEXT: pushq %r15 |
| ; AVX-NEXT: pushq %r14 |
| ; AVX-NEXT: pushq %rbx |
| ; AVX-NEXT: xorl %r8d, %r8d |
| ; AVX-NEXT: xorl %r10d, %r10d |
| ; AVX-NEXT: subq (%rsi), %r10 |
| ; AVX-NEXT: movl $0, %eax |
| ; AVX-NEXT: sbbq 8(%rsi), %rax |
| ; AVX-NEXT: movl $0, %ebx |
| ; AVX-NEXT: sbbq 16(%rsi), %rbx |
| ; AVX-NEXT: movl $0, %r9d |
| ; AVX-NEXT: sbbq 24(%rsi), %r9 |
| ; AVX-NEXT: movl $0, %r14d |
| ; AVX-NEXT: sbbq 32(%rsi), %r14 |
| ; AVX-NEXT: movl $0, %r11d |
| ; AVX-NEXT: sbbq 40(%rsi), %r11 |
| ; AVX-NEXT: movl $0, %r15d |
| ; AVX-NEXT: sbbq 48(%rsi), %r15 |
| ; AVX-NEXT: sbbq 56(%rsi), %r8 |
| ; AVX-NEXT: testl %edx, %edx |
| ; AVX-NEXT: je .LBB3_2 |
| ; AVX-NEXT: # %bb.1: |
| ; AVX-NEXT: movq 8(%rdi), %rax |
| ; AVX-NEXT: movq 24(%rdi), %r9 |
| ; AVX-NEXT: movq 40(%rdi), %r11 |
| ; AVX-NEXT: movq 56(%rdi), %r8 |
| ; AVX-NEXT: movq (%rdi), %r10 |
| ; AVX-NEXT: movq 16(%rdi), %rbx |
| ; AVX-NEXT: movq 32(%rdi), %r14 |
| ; AVX-NEXT: movq 48(%rdi), %r15 |
| ; AVX-NEXT: .LBB3_2: |
| ; AVX-NEXT: movq %r15, 48(%rcx) |
| ; AVX-NEXT: movq %r14, 32(%rcx) |
| ; AVX-NEXT: movq %rbx, 16(%rcx) |
| ; AVX-NEXT: movq %r10, (%rcx) |
| ; AVX-NEXT: movq %r8, 56(%rcx) |
| ; AVX-NEXT: movq %r11, 40(%rcx) |
| ; AVX-NEXT: movq %r9, 24(%rcx) |
| ; AVX-NEXT: movq %rax, 8(%rcx) |
| ; AVX-NEXT: popq %rbx |
| ; AVX-NEXT: popq %r14 |
| ; AVX-NEXT: popq %r15 |
| ; AVX-NEXT: retq |
| ; |
| ; AVX512F-LABEL: test_neg_i512: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vmovdqu64 (%rsi), %zmm0 |
| ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0 |
| ; AVX512F-NEXT: kmovw %k0, %eax |
| ; AVX512F-NEXT: vptestnmq %zmm0, %zmm0, %k0 |
| ; AVX512F-NEXT: kmovw %k0, %esi |
| ; AVX512F-NEXT: movzbl %sil, %esi |
| ; AVX512F-NEXT: leal (%rsi,%rax,2), %eax |
| ; AVX512F-NEXT: xorl %esi, %eax |
| ; AVX512F-NEXT: kmovw %eax, %k1 |
| ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 |
| ; AVX512F-NEXT: vpsubq %zmm0, %zmm1, %zmm1 |
| ; AVX512F-NEXT: vpternlogd {{.*#+}} zmm2 = -1 |
| ; AVX512F-NEXT: vpxorq %zmm2, %zmm0, %zmm1 {%k1} |
| ; AVX512F-NEXT: negl %edx |
| ; AVX512F-NEXT: kmovw %edx, %k1 |
| ; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm1 {%k1} |
| ; AVX512F-NEXT: vmovdqu64 %zmm1, (%rcx) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: test_neg_i512: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vmovdqu64 (%rsi), %zmm0 |
| ; AVX512VL-NEXT: vptestmq %zmm0, %zmm0, %k0 |
| ; AVX512VL-NEXT: kmovd %k0, %eax |
| ; AVX512VL-NEXT: vptestnmq %zmm0, %zmm0, %k0 |
| ; AVX512VL-NEXT: kmovb %k0, %esi |
| ; AVX512VL-NEXT: leal (%rsi,%rax,2), %eax |
| ; AVX512VL-NEXT: xorl %esi, %eax |
| ; AVX512VL-NEXT: kmovd %eax, %k1 |
| ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 |
| ; AVX512VL-NEXT: vpsubq %zmm0, %zmm1, %zmm1 |
| ; AVX512VL-NEXT: vpternlogd {{.*#+}} zmm2 = -1 |
| ; AVX512VL-NEXT: vpxorq %zmm2, %zmm0, %zmm1 {%k1} |
| ; AVX512VL-NEXT: negl %edx |
| ; AVX512VL-NEXT: kmovd %edx, %k1 |
| ; AVX512VL-NEXT: vmovdqu32 (%rdi), %zmm1 {%k1} |
| ; AVX512VL-NEXT: vmovdqu64 %zmm1, (%rcx) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| %ld0 = load i512, ptr %p0 |
| %ld1 = load i512, ptr %p1 |
| %neg1 = sub i512 0, %ld1 |
| %sel = select i1 %a2, i512 %ld0, i512 %neg1 |
| store i512 %sel, ptr %p3 |
| ret void |
| } |
| |
| |