| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s -check-prefixes=CHECK,SSE |
| ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s -check-prefixes=CHECK,SSE |
| ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s -check-prefixes=CHECK,AVX2 |
| ; RUN: llc < %s -mtriple=x86_64-- -mcpu=knl | FileCheck %s -check-prefixes=CHECK,AVX512,AVX512F |
| ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s -check-prefixes=CHECK,AVX512,AVX512VL |
| ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 -mattr=+avx512vbmi2 | FileCheck %s -check-prefixes=CHECK,AVX512,AVX512VBMI |
| |
| define i512 @shl_i512(i512 %a0, i512 %a1) nounwind { |
| ; SSE-LABEL: shl_i512: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: pushq %r14 |
| ; SSE-NEXT: pushq %rbx |
| ; SSE-NEXT: pushq %rax |
| ; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax |
| ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 |
| ; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| ; SSE-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %r9, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %eax, %ecx |
| ; SSE-NEXT: andl $63, %ecx |
| ; SSE-NEXT: shrl $3, %eax |
| ; SSE-NEXT: andl $56, %eax |
| ; SSE-NEXT: negl %eax |
| ; SSE-NEXT: cltq |
| ; SSE-NEXT: movq -56(%rsp,%rax), %rdx |
| ; SSE-NEXT: movq -48(%rsp,%rax), %r9 |
| ; SSE-NEXT: movq %r9, %rsi |
| ; SSE-NEXT: shldq %cl, %rdx, %rsi |
| ; SSE-NEXT: movq -40(%rsp,%rax), %r10 |
| ; SSE-NEXT: movq %r10, %r8 |
| ; SSE-NEXT: shldq %cl, %r9, %r8 |
| ; SSE-NEXT: movq -32(%rsp,%rax), %r9 |
| ; SSE-NEXT: movq %r9, %r11 |
| ; SSE-NEXT: shldq %cl, %r10, %r11 |
| ; SSE-NEXT: movq -24(%rsp,%rax), %r10 |
| ; SSE-NEXT: movq %r10, %rbx |
| ; SSE-NEXT: shldq %cl, %r9, %rbx |
| ; SSE-NEXT: movq -16(%rsp,%rax), %r9 |
| ; SSE-NEXT: movq %r9, %r14 |
| ; SSE-NEXT: shldq %cl, %r10, %r14 |
| ; SSE-NEXT: movq -8(%rsp,%rax), %r10 |
| ; SSE-NEXT: shldq %cl, %r9, %r10 |
| ; SSE-NEXT: movq -64(%rsp,%rax), %rax |
| ; SSE-NEXT: movq %rax, %r9 |
| ; SSE-NEXT: shlq %cl, %r9 |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; SSE-NEXT: shldq %cl, %rax, %rdx |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: movq %r10, 56(%rdi) |
| ; SSE-NEXT: movq %r14, 48(%rdi) |
| ; SSE-NEXT: movq %rbx, 40(%rdi) |
| ; SSE-NEXT: movq %r11, 32(%rdi) |
| ; SSE-NEXT: movq %r8, 24(%rdi) |
| ; SSE-NEXT: movq %rsi, 16(%rdi) |
| ; SSE-NEXT: movq %rdx, 8(%rdi) |
| ; SSE-NEXT: movq %r9, (%rdi) |
| ; SSE-NEXT: addq $8, %rsp |
| ; SSE-NEXT: popq %rbx |
| ; SSE-NEXT: popq %r14 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: shl_i512: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: pushq %r14 |
| ; AVX2-NEXT: pushq %rbx |
| ; AVX2-NEXT: pushq %rax |
| ; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax |
| ; AVX2-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 |
| ; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| ; AVX2-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %r9, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %eax, %ecx |
| ; AVX2-NEXT: andl $63, %ecx |
| ; AVX2-NEXT: shrl $3, %eax |
| ; AVX2-NEXT: andl $56, %eax |
| ; AVX2-NEXT: negl %eax |
| ; AVX2-NEXT: movslq %eax, %r8 |
| ; AVX2-NEXT: movq -56(%rsp,%r8), %rdx |
| ; AVX2-NEXT: movq -48(%rsp,%r8), %rax |
| ; AVX2-NEXT: movq %rax, %rsi |
| ; AVX2-NEXT: shldq %cl, %rdx, %rsi |
| ; AVX2-NEXT: movq -40(%rsp,%r8), %r10 |
| ; AVX2-NEXT: movq %r10, %r9 |
| ; AVX2-NEXT: shldq %cl, %rax, %r9 |
| ; AVX2-NEXT: movq -32(%rsp,%r8), %rax |
| ; AVX2-NEXT: movq %rax, %r11 |
| ; AVX2-NEXT: shldq %cl, %r10, %r11 |
| ; AVX2-NEXT: movq -24(%rsp,%r8), %r10 |
| ; AVX2-NEXT: movq %r10, %rbx |
| ; AVX2-NEXT: shldq %cl, %rax, %rbx |
| ; AVX2-NEXT: movq -16(%rsp,%r8), %rax |
| ; AVX2-NEXT: movq %rax, %r14 |
| ; AVX2-NEXT: shldq %cl, %r10, %r14 |
| ; AVX2-NEXT: movq -8(%rsp,%r8), %r10 |
| ; AVX2-NEXT: shldq %cl, %rax, %r10 |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: movq -64(%rsp,%r8), %rdi |
| ; AVX2-NEXT: shlxq %rcx, %rdi, %r8 |
| ; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX2-NEXT: shldq %cl, %rdi, %rdx |
| ; AVX2-NEXT: movq %r10, 56(%rax) |
| ; AVX2-NEXT: movq %r14, 48(%rax) |
| ; AVX2-NEXT: movq %rbx, 40(%rax) |
| ; AVX2-NEXT: movq %r11, 32(%rax) |
| ; AVX2-NEXT: movq %r9, 24(%rax) |
| ; AVX2-NEXT: movq %rsi, 16(%rax) |
| ; AVX2-NEXT: movq %rdx, 8(%rax) |
| ; AVX2-NEXT: movq %r8, (%rax) |
| ; AVX2-NEXT: addq $8, %rsp |
| ; AVX2-NEXT: popq %rbx |
| ; AVX2-NEXT: popq %r14 |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: shl_i512: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: pushq %rax |
| ; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %rax |
| ; AVX512F-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm0 |
| ; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| ; AVX512F-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: vmovdqu %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %r9, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vmovdqu64 %zmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movl %eax, %ecx |
| ; AVX512F-NEXT: andl $63, %ecx |
| ; AVX512F-NEXT: vmovq %rcx, %xmm1 |
| ; AVX512F-NEXT: vpbroadcastq %xmm1, %xmm1 |
| ; AVX512F-NEXT: shrl $3, %eax |
| ; AVX512F-NEXT: andl $56, %eax |
| ; AVX512F-NEXT: negl %eax |
| ; AVX512F-NEXT: cltq |
| ; AVX512F-NEXT: vmovdqu64 -64(%rsp,%rax), %zmm2 |
| ; AVX512F-NEXT: vpsllq %xmm1, %zmm2, %zmm3 |
| ; AVX512F-NEXT: valignq {{.*#+}} zmm0 = zmm0[7],zmm2[0,1,2,3,4,5,6] |
| ; AVX512F-NEXT: vpsrlq $1, %zmm0, %zmm0 |
| ; AVX512F-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 |
| ; AVX512F-NEXT: vpsrlq %xmm1, %zmm0, %zmm0 |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: vporq %zmm0, %zmm3, %zmm0 |
| ; AVX512F-NEXT: vmovdqu64 %zmm0, (%rdi) |
| ; AVX512F-NEXT: popq %rcx |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: shl_i512: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: pushq %rax |
| ; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rax |
| ; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 |
| ; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| ; AVX512VL-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %r9, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movl %eax, %ecx |
| ; AVX512VL-NEXT: andl $63, %ecx |
| ; AVX512VL-NEXT: vpbroadcastq %rcx, %xmm0 |
| ; AVX512VL-NEXT: shrl $3, %eax |
| ; AVX512VL-NEXT: andl $56, %eax |
| ; AVX512VL-NEXT: negl %eax |
| ; AVX512VL-NEXT: cltq |
| ; AVX512VL-NEXT: vmovdqu64 -64(%rsp,%rax), %zmm1 |
| ; AVX512VL-NEXT: vpsllq %xmm0, %zmm1, %zmm2 |
| ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512VL-NEXT: valignq {{.*#+}} zmm1 = zmm3[7],zmm1[0,1,2,3,4,5,6] |
| ; AVX512VL-NEXT: vpsrlq $1, %zmm1, %zmm1 |
| ; AVX512VL-NEXT: vpandnq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vpsrlq %xmm0, %zmm1, %zmm0 |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: vporq %zmm0, %zmm2, %zmm0 |
| ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, 32(%rdi) |
| ; AVX512VL-NEXT: vmovdqu %ymm0, (%rdi) |
| ; AVX512VL-NEXT: popq %rcx |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: shl_i512: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: pushq %rax |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: movq {{[0-9]+}}(%rsp), %rdi |
| ; AVX512VBMI-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 |
| ; AVX512VBMI-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| ; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %r9, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: vpbroadcastq %rdi, %zmm0 |
| ; AVX512VBMI-NEXT: movl %edi, %ecx |
| ; AVX512VBMI-NEXT: shrl $3, %ecx |
| ; AVX512VBMI-NEXT: andl $56, %ecx |
| ; AVX512VBMI-NEXT: negl %ecx |
| ; AVX512VBMI-NEXT: movslq %ecx, %rcx |
| ; AVX512VBMI-NEXT: vmovdqu64 -64(%rsp,%rcx), %zmm1 |
| ; AVX512VBMI-NEXT: vpxor %xmm2, %xmm2, %xmm2 |
| ; AVX512VBMI-NEXT: valignq {{.*#+}} zmm2 = zmm2[7],zmm1[0,1,2,3,4,5,6] |
| ; AVX512VBMI-NEXT: vpshldvq %zmm0, %zmm2, %zmm1 |
| ; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm1, 32(%rax) |
| ; AVX512VBMI-NEXT: vmovdqu %ymm1, (%rax) |
| ; AVX512VBMI-NEXT: popq %rcx |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| %r = shl i512 %a0, %a1 |
| ret i512 %r |
| } |
| |
| define i512 @lshr_i512(i512 %a0, i512 %a1) nounwind { |
| ; SSE-LABEL: lshr_i512: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: pushq %r15 |
| ; SSE-NEXT: pushq %r14 |
| ; SSE-NEXT: pushq %rbx |
| ; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax |
| ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 |
| ; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| ; SSE-NEXT: xorps %xmm1, %xmm1 |
| ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %r9, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %eax, %ecx |
| ; SSE-NEXT: andl $63, %ecx |
| ; SSE-NEXT: shrl $3, %eax |
| ; SSE-NEXT: andl $56, %eax |
| ; SSE-NEXT: movq -112(%rsp,%rax), %rdx |
| ; SSE-NEXT: movq -120(%rsp,%rax), %r9 |
| ; SSE-NEXT: movq %r9, %rsi |
| ; SSE-NEXT: shrdq %cl, %rdx, %rsi |
| ; SSE-NEXT: movq -104(%rsp,%rax), %r8 |
| ; SSE-NEXT: shrdq %cl, %r8, %rdx |
| ; SSE-NEXT: movq -96(%rsp,%rax), %r10 |
| ; SSE-NEXT: shrdq %cl, %r10, %r8 |
| ; SSE-NEXT: movq -88(%rsp,%rax), %r11 |
| ; SSE-NEXT: shrdq %cl, %r11, %r10 |
| ; SSE-NEXT: movq -80(%rsp,%rax), %rbx |
| ; SSE-NEXT: shrdq %cl, %rbx, %r11 |
| ; SSE-NEXT: movq -72(%rsp,%rax), %r14 |
| ; SSE-NEXT: shrdq %cl, %r14, %rbx |
| ; SSE-NEXT: movq -128(%rsp,%rax), %r15 |
| ; SSE-NEXT: shrdq %cl, %r9, %r15 |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; SSE-NEXT: shrq %cl, %r14 |
| ; SSE-NEXT: movq %r14, 56(%rdi) |
| ; SSE-NEXT: movq %rbx, 48(%rdi) |
| ; SSE-NEXT: movq %r11, 40(%rdi) |
| ; SSE-NEXT: movq %r10, 32(%rdi) |
| ; SSE-NEXT: movq %r8, 24(%rdi) |
| ; SSE-NEXT: movq %rdx, 16(%rdi) |
| ; SSE-NEXT: movq %rsi, 8(%rdi) |
| ; SSE-NEXT: movq %r15, (%rdi) |
| ; SSE-NEXT: popq %rbx |
| ; SSE-NEXT: popq %r14 |
| ; SSE-NEXT: popq %r15 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: lshr_i512: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: pushq %r15 |
| ; AVX2-NEXT: pushq %r14 |
| ; AVX2-NEXT: pushq %rbx |
| ; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax |
| ; AVX2-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 |
| ; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 |
| ; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| ; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %r9, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %eax, %ecx |
| ; AVX2-NEXT: andl $63, %ecx |
| ; AVX2-NEXT: shrl $3, %eax |
| ; AVX2-NEXT: andl $56, %eax |
| ; AVX2-NEXT: movq -112(%rsp,%rax), %rdx |
| ; AVX2-NEXT: movq -120(%rsp,%rax), %r9 |
| ; AVX2-NEXT: movq %r9, %rsi |
| ; AVX2-NEXT: shrdq %cl, %rdx, %rsi |
| ; AVX2-NEXT: movq -104(%rsp,%rax), %r8 |
| ; AVX2-NEXT: shrdq %cl, %r8, %rdx |
| ; AVX2-NEXT: movq -96(%rsp,%rax), %r10 |
| ; AVX2-NEXT: shrdq %cl, %r10, %r8 |
| ; AVX2-NEXT: movq -88(%rsp,%rax), %r11 |
| ; AVX2-NEXT: shrdq %cl, %r11, %r10 |
| ; AVX2-NEXT: movq -80(%rsp,%rax), %rbx |
| ; AVX2-NEXT: shrdq %cl, %rbx, %r11 |
| ; AVX2-NEXT: movq -128(%rsp,%rax), %r14 |
| ; AVX2-NEXT: movq -72(%rsp,%rax), %r15 |
| ; AVX2-NEXT: shrdq %cl, %r15, %rbx |
| ; AVX2-NEXT: shrdq %cl, %r9, %r14 |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: shrxq %rcx, %r15, %rcx |
| ; AVX2-NEXT: movq %rcx, 56(%rdi) |
| ; AVX2-NEXT: movq %rbx, 48(%rdi) |
| ; AVX2-NEXT: movq %r11, 40(%rdi) |
| ; AVX2-NEXT: movq %r10, 32(%rdi) |
| ; AVX2-NEXT: movq %r8, 24(%rdi) |
| ; AVX2-NEXT: movq %rdx, 16(%rdi) |
| ; AVX2-NEXT: movq %rsi, 8(%rdi) |
| ; AVX2-NEXT: movq %r14, (%rdi) |
| ; AVX2-NEXT: popq %rbx |
| ; AVX2-NEXT: popq %r14 |
| ; AVX2-NEXT: popq %r15 |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: lshr_i512: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: pushq %rax |
| ; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 |
| ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 |
| ; AVX512F-NEXT: vmovdqu64 %zmm1, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %rax |
| ; AVX512F-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %rax |
| ; AVX512F-NEXT: movq %r9, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movl %eax, %ecx |
| ; AVX512F-NEXT: andl $63, %ecx |
| ; AVX512F-NEXT: vmovq %rcx, %xmm0 |
| ; AVX512F-NEXT: vpbroadcastq %xmm0, %xmm0 |
| ; AVX512F-NEXT: shrl $3, %eax |
| ; AVX512F-NEXT: andl $56, %eax |
| ; AVX512F-NEXT: vmovdqu64 -128(%rsp,%rax), %zmm2 |
| ; AVX512F-NEXT: vpsrlq %xmm0, %zmm2, %zmm3 |
| ; AVX512F-NEXT: valignq {{.*#+}} zmm1 = zmm2[1,2,3,4,5,6,7],zmm1[0] |
| ; AVX512F-NEXT: vpaddq %zmm1, %zmm1, %zmm1 |
| ; AVX512F-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX512F-NEXT: vpsllq %xmm0, %zmm1, %zmm0 |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0 |
| ; AVX512F-NEXT: vmovdqu64 %zmm0, (%rdi) |
| ; AVX512F-NEXT: popq %rcx |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: lshr_i512: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: pushq %rax |
| ; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 |
| ; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rax |
| ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 |
| ; AVX512VL-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| ; AVX512VL-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %r9, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movl %eax, %ecx |
| ; AVX512VL-NEXT: andl $63, %ecx |
| ; AVX512VL-NEXT: vpbroadcastq %rcx, %xmm0 |
| ; AVX512VL-NEXT: shrl $3, %eax |
| ; AVX512VL-NEXT: andl $56, %eax |
| ; AVX512VL-NEXT: vmovdqu64 -128(%rsp,%rax), %zmm1 |
| ; AVX512VL-NEXT: vpsrlq %xmm0, %zmm1, %zmm2 |
| ; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 |
| ; AVX512VL-NEXT: valignq {{.*#+}} zmm1 = zmm1[1,2,3,4,5,6,7],zmm3[0] |
| ; AVX512VL-NEXT: vpaddq %zmm1, %zmm1, %zmm1 |
| ; AVX512VL-NEXT: vpandnq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vpsllq %xmm0, %zmm1, %zmm0 |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm0 |
| ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, 32(%rdi) |
| ; AVX512VL-NEXT: vmovdqu %ymm0, (%rdi) |
| ; AVX512VL-NEXT: popq %rcx |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: lshr_i512: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: pushq %rax |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 |
| ; AVX512VBMI-NEXT: vxorps %xmm1, %xmm1, %xmm1 |
| ; AVX512VBMI-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq {{[0-9]+}}(%rsp), %rdi |
| ; AVX512VBMI-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq {{[0-9]+}}(%rsp), %rdi |
| ; AVX512VBMI-NEXT: movq %r9, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: vpbroadcastq %rdi, %zmm0 |
| ; AVX512VBMI-NEXT: # kill: def $edi killed $edi killed $rdi def $rdi |
| ; AVX512VBMI-NEXT: shrl $3, %edi |
| ; AVX512VBMI-NEXT: andl $56, %edi |
| ; AVX512VBMI-NEXT: vmovdqu64 -128(%rsp,%rdi), %zmm1 |
| ; AVX512VBMI-NEXT: vpxor %xmm2, %xmm2, %xmm2 |
| ; AVX512VBMI-NEXT: valignq {{.*#+}} zmm2 = zmm1[1,2,3,4,5,6,7],zmm2[0] |
| ; AVX512VBMI-NEXT: vpshrdvq %zmm0, %zmm2, %zmm1 |
| ; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm1, 32(%rax) |
| ; AVX512VBMI-NEXT: vmovdqu %ymm1, (%rax) |
| ; AVX512VBMI-NEXT: popq %rcx |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| %r = lshr i512 %a0, %a1 |
| ret i512 %r |
| } |
| |
| define i512 @ashr_i512(i512 %a0, i512 %a1) nounwind { |
| ; SSE-LABEL: ashr_i512: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: pushq %r15 |
| ; SSE-NEXT: pushq %r14 |
| ; SSE-NEXT: pushq %rbx |
| ; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax |
| ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 |
| ; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| ; SSE-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %r9, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: sarq $63, %r10 |
| ; SSE-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %eax, %ecx |
| ; SSE-NEXT: andl $63, %ecx |
| ; SSE-NEXT: shrl $3, %eax |
| ; SSE-NEXT: andl $56, %eax |
| ; SSE-NEXT: movq -112(%rsp,%rax), %rdx |
| ; SSE-NEXT: movq -120(%rsp,%rax), %r9 |
| ; SSE-NEXT: movq %r9, %rsi |
| ; SSE-NEXT: shrdq %cl, %rdx, %rsi |
| ; SSE-NEXT: movq -104(%rsp,%rax), %r8 |
| ; SSE-NEXT: shrdq %cl, %r8, %rdx |
| ; SSE-NEXT: movq -96(%rsp,%rax), %r10 |
| ; SSE-NEXT: shrdq %cl, %r10, %r8 |
| ; SSE-NEXT: movq -88(%rsp,%rax), %r11 |
| ; SSE-NEXT: shrdq %cl, %r11, %r10 |
| ; SSE-NEXT: movq -80(%rsp,%rax), %rbx |
| ; SSE-NEXT: shrdq %cl, %rbx, %r11 |
| ; SSE-NEXT: movq -72(%rsp,%rax), %r14 |
| ; SSE-NEXT: shrdq %cl, %r14, %rbx |
| ; SSE-NEXT: movq -128(%rsp,%rax), %r15 |
| ; SSE-NEXT: shrdq %cl, %r9, %r15 |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; SSE-NEXT: sarq %cl, %r14 |
| ; SSE-NEXT: movq %r14, 56(%rdi) |
| ; SSE-NEXT: movq %rbx, 48(%rdi) |
| ; SSE-NEXT: movq %r11, 40(%rdi) |
| ; SSE-NEXT: movq %r10, 32(%rdi) |
| ; SSE-NEXT: movq %r8, 24(%rdi) |
| ; SSE-NEXT: movq %rdx, 16(%rdi) |
| ; SSE-NEXT: movq %rsi, 8(%rdi) |
| ; SSE-NEXT: movq %r15, (%rdi) |
| ; SSE-NEXT: popq %rbx |
| ; SSE-NEXT: popq %r14 |
| ; SSE-NEXT: popq %r15 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: ashr_i512: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: pushq %r15 |
| ; AVX2-NEXT: pushq %r14 |
| ; AVX2-NEXT: pushq %rbx |
| ; AVX2-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 |
| ; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| ; AVX2-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rax |
| ; AVX2-NEXT: movq %r9, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: sarq $63, %r10 |
| ; AVX2-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %eax, %ecx |
| ; AVX2-NEXT: andl $63, %ecx |
| ; AVX2-NEXT: shrl $3, %eax |
| ; AVX2-NEXT: andl $56, %eax |
| ; AVX2-NEXT: movq -112(%rsp,%rax), %rdx |
| ; AVX2-NEXT: movq -120(%rsp,%rax), %r9 |
| ; AVX2-NEXT: movq %r9, %rsi |
| ; AVX2-NEXT: shrdq %cl, %rdx, %rsi |
| ; AVX2-NEXT: movq -104(%rsp,%rax), %r8 |
| ; AVX2-NEXT: shrdq %cl, %r8, %rdx |
| ; AVX2-NEXT: movq -96(%rsp,%rax), %r10 |
| ; AVX2-NEXT: shrdq %cl, %r10, %r8 |
| ; AVX2-NEXT: movq -88(%rsp,%rax), %r11 |
| ; AVX2-NEXT: shrdq %cl, %r11, %r10 |
| ; AVX2-NEXT: movq -80(%rsp,%rax), %rbx |
| ; AVX2-NEXT: shrdq %cl, %rbx, %r11 |
| ; AVX2-NEXT: movq -128(%rsp,%rax), %r14 |
| ; AVX2-NEXT: movq -72(%rsp,%rax), %r15 |
| ; AVX2-NEXT: shrdq %cl, %r15, %rbx |
| ; AVX2-NEXT: shrdq %cl, %r9, %r14 |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: sarxq %rcx, %r15, %rcx |
| ; AVX2-NEXT: movq %rcx, 56(%rdi) |
| ; AVX2-NEXT: movq %rbx, 48(%rdi) |
| ; AVX2-NEXT: movq %r11, 40(%rdi) |
| ; AVX2-NEXT: movq %r10, 32(%rdi) |
| ; AVX2-NEXT: movq %r8, 24(%rdi) |
| ; AVX2-NEXT: movq %rdx, 16(%rdi) |
| ; AVX2-NEXT: movq %rsi, 8(%rdi) |
| ; AVX2-NEXT: movq %r14, (%rdi) |
| ; AVX2-NEXT: popq %rbx |
| ; AVX2-NEXT: popq %r14 |
| ; AVX2-NEXT: popq %r15 |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: ashr_i512: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: pushq %rax |
| ; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %rax |
| ; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 |
| ; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| ; AVX512F-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %r9, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: sarq $63, %r10 |
| ; AVX512F-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movl %eax, %ecx |
| ; AVX512F-NEXT: andl $63, %ecx |
| ; AVX512F-NEXT: vmovq %rcx, %xmm0 |
| ; AVX512F-NEXT: vpbroadcastq %xmm0, %xmm0 |
| ; AVX512F-NEXT: shrl $3, %eax |
| ; AVX512F-NEXT: andl $56, %eax |
| ; AVX512F-NEXT: vmovdqu64 -128(%rsp,%rax), %zmm1 |
| ; AVX512F-NEXT: vpsrlq %xmm0, %zmm1, %zmm2 |
| ; AVX512F-NEXT: vpsraq $63, -72(%rsp,%rax){1to8}, %zmm3 |
| ; AVX512F-NEXT: valignq {{.*#+}} zmm1 = zmm1[1,2,3,4,5,6,7],zmm3[0] |
| ; AVX512F-NEXT: vpaddq %zmm1, %zmm1, %zmm1 |
| ; AVX512F-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; AVX512F-NEXT: vpsllq %xmm0, %zmm1, %zmm0 |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: vporq %zmm2, %zmm0, %zmm0 |
| ; AVX512F-NEXT: vmovdqu64 %zmm0, (%rdi) |
| ; AVX512F-NEXT: popq %rcx |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: ashr_i512: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: pushq %rax |
| ; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rax |
| ; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 |
| ; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| ; AVX512VL-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %r9, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: sarq $63, %r10 |
| ; AVX512VL-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movl %eax, %ecx |
| ; AVX512VL-NEXT: andl $63, %ecx |
| ; AVX512VL-NEXT: vpbroadcastq %rcx, %xmm0 |
| ; AVX512VL-NEXT: shrl $3, %eax |
| ; AVX512VL-NEXT: andl $56, %eax |
| ; AVX512VL-NEXT: vmovdqu64 -128(%rsp,%rax), %zmm1 |
| ; AVX512VL-NEXT: vpsrlq %xmm0, %zmm1, %zmm2 |
| ; AVX512VL-NEXT: vpsraq $63, -72(%rsp,%rax){1to2}, %xmm3 |
| ; AVX512VL-NEXT: valignq {{.*#+}} zmm1 = zmm1[1,2,3,4,5,6,7],zmm3[0] |
| ; AVX512VL-NEXT: vpaddq %zmm1, %zmm1, %zmm1 |
| ; AVX512VL-NEXT: vpandnq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vpsllq %xmm0, %zmm1, %zmm0 |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: vporq %zmm2, %zmm0, %zmm0 |
| ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, 32(%rdi) |
| ; AVX512VL-NEXT: vmovdqu %ymm0, (%rdi) |
| ; AVX512VL-NEXT: popq %rcx |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: ashr_i512: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: pushq %rax |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: movq {{[0-9]+}}(%rsp), %rdi |
| ; AVX512VBMI-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 |
| ; AVX512VBMI-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| ; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: vmovups %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %r9, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: sarq $63, %r10 |
| ; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: vpbroadcastq %rdi, %zmm0 |
| ; AVX512VBMI-NEXT: # kill: def $edi killed $edi killed $rdi def $rdi |
| ; AVX512VBMI-NEXT: shrl $3, %edi |
| ; AVX512VBMI-NEXT: andl $56, %edi |
| ; AVX512VBMI-NEXT: vpsraq $63, -72(%rsp,%rdi){1to2}, %xmm1 |
| ; AVX512VBMI-NEXT: vmovdqu64 -128(%rsp,%rdi), %zmm2 |
| ; AVX512VBMI-NEXT: valignq {{.*#+}} zmm1 = zmm2[1,2,3,4,5,6,7],zmm1[0] |
| ; AVX512VBMI-NEXT: vpshrdvq %zmm0, %zmm1, %zmm2 |
| ; AVX512VBMI-NEXT: vextracti64x4 $1, %zmm2, 32(%rax) |
| ; AVX512VBMI-NEXT: vmovdqu %ymm2, (%rax) |
| ; AVX512VBMI-NEXT: popq %rcx |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| %r = ashr i512 %a0, %a1 |
| ret i512 %r |
| } |
| |
| define i512 @shl_i512_load(ptr %p0, i512 %a1) nounwind { |
| ; SSE-LABEL: shl_i512_load: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: pushq %r14 |
| ; SSE-NEXT: pushq %rbx |
| ; SSE-NEXT: pushq %rax |
| ; SSE-NEXT: movaps (%rsi), %xmm0 |
| ; SSE-NEXT: movaps 16(%rsi), %xmm1 |
| ; SSE-NEXT: movaps 32(%rsi), %xmm2 |
| ; SSE-NEXT: movaps 48(%rsi), %xmm3 |
| ; SSE-NEXT: xorps %xmm4, %xmm4 |
| ; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %edx, %ecx |
| ; SSE-NEXT: andl $63, %ecx |
| ; SSE-NEXT: shrl $3, %edx |
| ; SSE-NEXT: andl $56, %edx |
| ; SSE-NEXT: negl %edx |
| ; SSE-NEXT: movslq %edx, %rax |
| ; SSE-NEXT: movq -56(%rsp,%rax), %rdx |
| ; SSE-NEXT: movq -48(%rsp,%rax), %r9 |
| ; SSE-NEXT: movq %r9, %rsi |
| ; SSE-NEXT: shldq %cl, %rdx, %rsi |
| ; SSE-NEXT: movq -40(%rsp,%rax), %r10 |
| ; SSE-NEXT: movq %r10, %r8 |
| ; SSE-NEXT: shldq %cl, %r9, %r8 |
| ; SSE-NEXT: movq -32(%rsp,%rax), %r9 |
| ; SSE-NEXT: movq %r9, %r11 |
| ; SSE-NEXT: shldq %cl, %r10, %r11 |
| ; SSE-NEXT: movq -24(%rsp,%rax), %r10 |
| ; SSE-NEXT: movq %r10, %rbx |
| ; SSE-NEXT: shldq %cl, %r9, %rbx |
| ; SSE-NEXT: movq -16(%rsp,%rax), %r9 |
| ; SSE-NEXT: movq %r9, %r14 |
| ; SSE-NEXT: shldq %cl, %r10, %r14 |
| ; SSE-NEXT: movq -8(%rsp,%rax), %r10 |
| ; SSE-NEXT: shldq %cl, %r9, %r10 |
| ; SSE-NEXT: movq -64(%rsp,%rax), %rax |
| ; SSE-NEXT: movq %rax, %r9 |
| ; SSE-NEXT: shlq %cl, %r9 |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; SSE-NEXT: shldq %cl, %rax, %rdx |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: movq %r10, 56(%rdi) |
| ; SSE-NEXT: movq %r14, 48(%rdi) |
| ; SSE-NEXT: movq %rbx, 40(%rdi) |
| ; SSE-NEXT: movq %r11, 32(%rdi) |
| ; SSE-NEXT: movq %r8, 24(%rdi) |
| ; SSE-NEXT: movq %rsi, 16(%rdi) |
| ; SSE-NEXT: movq %rdx, 8(%rdi) |
| ; SSE-NEXT: movq %r9, (%rdi) |
| ; SSE-NEXT: addq $8, %rsp |
| ; SSE-NEXT: popq %rbx |
| ; SSE-NEXT: popq %r14 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: shl_i512_load: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: pushq %r14 |
| ; AVX2-NEXT: pushq %rbx |
| ; AVX2-NEXT: pushq %rax |
| ; AVX2-NEXT: vmovups (%rsi), %ymm0 |
| ; AVX2-NEXT: vmovups 32(%rsi), %ymm1 |
| ; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 |
| ; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %edx, %ecx |
| ; AVX2-NEXT: andl $63, %ecx |
| ; AVX2-NEXT: shrl $3, %edx |
| ; AVX2-NEXT: andl $56, %edx |
| ; AVX2-NEXT: negl %edx |
| ; AVX2-NEXT: movslq %edx, %r8 |
| ; AVX2-NEXT: movq -56(%rsp,%r8), %rdx |
| ; AVX2-NEXT: movq -48(%rsp,%r8), %rax |
| ; AVX2-NEXT: movq %rax, %rsi |
| ; AVX2-NEXT: shldq %cl, %rdx, %rsi |
| ; AVX2-NEXT: movq -40(%rsp,%r8), %r10 |
| ; AVX2-NEXT: movq %r10, %r9 |
| ; AVX2-NEXT: shldq %cl, %rax, %r9 |
| ; AVX2-NEXT: movq -32(%rsp,%r8), %rax |
| ; AVX2-NEXT: movq %rax, %r11 |
| ; AVX2-NEXT: shldq %cl, %r10, %r11 |
| ; AVX2-NEXT: movq -24(%rsp,%r8), %r10 |
| ; AVX2-NEXT: movq %r10, %rbx |
| ; AVX2-NEXT: shldq %cl, %rax, %rbx |
| ; AVX2-NEXT: movq -16(%rsp,%r8), %rax |
| ; AVX2-NEXT: movq %rax, %r14 |
| ; AVX2-NEXT: shldq %cl, %r10, %r14 |
| ; AVX2-NEXT: movq -8(%rsp,%r8), %r10 |
| ; AVX2-NEXT: shldq %cl, %rax, %r10 |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: movq -64(%rsp,%r8), %rdi |
| ; AVX2-NEXT: shlxq %rcx, %rdi, %r8 |
| ; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX2-NEXT: shldq %cl, %rdi, %rdx |
| ; AVX2-NEXT: movq %r10, 56(%rax) |
| ; AVX2-NEXT: movq %r14, 48(%rax) |
| ; AVX2-NEXT: movq %rbx, 40(%rax) |
| ; AVX2-NEXT: movq %r11, 32(%rax) |
| ; AVX2-NEXT: movq %r9, 24(%rax) |
| ; AVX2-NEXT: movq %rsi, 16(%rax) |
| ; AVX2-NEXT: movq %rdx, 8(%rax) |
| ; AVX2-NEXT: movq %r8, (%rax) |
| ; AVX2-NEXT: addq $8, %rsp |
| ; AVX2-NEXT: popq %rbx |
| ; AVX2-NEXT: popq %r14 |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: shl_i512_load: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: movl %edx, %eax |
| ; AVX512F-NEXT: vmovq %rax, %xmm0 |
| ; AVX512F-NEXT: vpbroadcastq %xmm0, %xmm0 |
| ; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm1 = [63,63] |
| ; AVX512F-NEXT: vpand %xmm1, %xmm0, %xmm2 |
| ; AVX512F-NEXT: shrl $6, %edx |
| ; AVX512F-NEXT: movl $-1, %eax |
| ; AVX512F-NEXT: shlxl %edx, %eax, %eax |
| ; AVX512F-NEXT: kmovw %eax, %k1 |
| ; AVX512F-NEXT: vpexpandq (%rsi), %zmm3 {%k1} {z} |
| ; AVX512F-NEXT: vpsllq %xmm2, %zmm3, %zmm2 |
| ; AVX512F-NEXT: vpandn %xmm1, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 |
| ; AVX512F-NEXT: valignq {{.*#+}} zmm1 = zmm1[7],zmm3[0,1,2,3,4,5,6] |
| ; AVX512F-NEXT: vpsrlq $1, %zmm1, %zmm1 |
| ; AVX512F-NEXT: vpsrlq %xmm0, %zmm1, %zmm0 |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: vporq %zmm0, %zmm2, %zmm0 |
| ; AVX512F-NEXT: vmovdqu64 %zmm0, (%rdi) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: shl_i512_load: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: movl %edx, %ecx |
| ; AVX512VL-NEXT: vpbroadcastq %rcx, %xmm0 |
| ; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [63,63] |
| ; AVX512VL-NEXT: shrl $6, %edx |
| ; AVX512VL-NEXT: movl $-1, %ecx |
| ; AVX512VL-NEXT: shlxl %edx, %ecx, %ecx |
| ; AVX512VL-NEXT: kmovd %ecx, %k1 |
| ; AVX512VL-NEXT: vpexpandq (%rsi), %zmm2 {%k1} {z} |
| ; AVX512VL-NEXT: vpand %xmm1, %xmm0, %xmm3 |
| ; AVX512VL-NEXT: vpsllq %xmm3, %zmm2, %zmm3 |
| ; AVX512VL-NEXT: vpandn %xmm1, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 |
| ; AVX512VL-NEXT: valignq {{.*#+}} zmm1 = zmm1[7],zmm2[0,1,2,3,4,5,6] |
| ; AVX512VL-NEXT: vpsrlq $1, %zmm1, %zmm1 |
| ; AVX512VL-NEXT: vpsrlq %xmm0, %zmm1, %zmm0 |
| ; AVX512VL-NEXT: vporq %zmm0, %zmm3, %zmm0 |
| ; AVX512VL-NEXT: vmovdqu64 %zmm0, (%rdi) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: shl_i512_load: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: movl %edx, %ecx |
| ; AVX512VBMI-NEXT: shrl $6, %ecx |
| ; AVX512VBMI-NEXT: movl $-1, %edi |
| ; AVX512VBMI-NEXT: shlxl %ecx, %edi, %ecx |
| ; AVX512VBMI-NEXT: kmovd %ecx, %k1 |
| ; AVX512VBMI-NEXT: vpexpandq (%rsi), %zmm0 {%k1} {z} |
| ; AVX512VBMI-NEXT: vpbroadcastq %rdx, %zmm1 |
| ; AVX512VBMI-NEXT: vpxor %xmm2, %xmm2, %xmm2 |
| ; AVX512VBMI-NEXT: valignq {{.*#+}} zmm2 = zmm2[7],zmm0[0,1,2,3,4,5,6] |
| ; AVX512VBMI-NEXT: vpshldvq %zmm1, %zmm2, %zmm0 |
| ; AVX512VBMI-NEXT: vmovdqu64 %zmm0, (%rax) |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| %a0 = load i512, ptr %p0 |
| %r = shl i512 %a0, %a1 |
| ret i512 %r |
| } |
| |
| define i512 @lshr_i512_load(ptr %p0, i512 %a1) nounwind { |
| ; SSE-LABEL: lshr_i512_load: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: pushq %r14 |
| ; SSE-NEXT: pushq %rbx |
| ; SSE-NEXT: pushq %rax |
| ; SSE-NEXT: movaps (%rsi), %xmm0 |
| ; SSE-NEXT: movaps 16(%rsi), %xmm1 |
| ; SSE-NEXT: movaps 32(%rsi), %xmm2 |
| ; SSE-NEXT: movaps 48(%rsi), %xmm3 |
| ; SSE-NEXT: xorps %xmm4, %xmm4 |
| ; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %edx, %ecx |
| ; SSE-NEXT: andl $63, %ecx |
| ; SSE-NEXT: shrl $3, %edx |
| ; SSE-NEXT: andl $56, %edx |
| ; SSE-NEXT: movq -112(%rsp,%rdx), %rsi |
| ; SSE-NEXT: movq -120(%rsp,%rdx), %rax |
| ; SSE-NEXT: movq %rax, %r8 |
| ; SSE-NEXT: shrdq %cl, %rsi, %r8 |
| ; SSE-NEXT: movq -104(%rsp,%rdx), %r9 |
| ; SSE-NEXT: shrdq %cl, %r9, %rsi |
| ; SSE-NEXT: movq -96(%rsp,%rdx), %r10 |
| ; SSE-NEXT: shrdq %cl, %r10, %r9 |
| ; SSE-NEXT: movq -88(%rsp,%rdx), %r11 |
| ; SSE-NEXT: shrdq %cl, %r11, %r10 |
| ; SSE-NEXT: movq -80(%rsp,%rdx), %rbx |
| ; SSE-NEXT: shrdq %cl, %rbx, %r11 |
| ; SSE-NEXT: movq -72(%rsp,%rdx), %r14 |
| ; SSE-NEXT: shrdq %cl, %r14, %rbx |
| ; SSE-NEXT: movq -128(%rsp,%rdx), %rdx |
| ; SSE-NEXT: shrdq %cl, %rax, %rdx |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; SSE-NEXT: shrq %cl, %r14 |
| ; SSE-NEXT: movq %r14, 56(%rdi) |
| ; SSE-NEXT: movq %rbx, 48(%rdi) |
| ; SSE-NEXT: movq %r11, 40(%rdi) |
| ; SSE-NEXT: movq %r10, 32(%rdi) |
| ; SSE-NEXT: movq %r9, 24(%rdi) |
| ; SSE-NEXT: movq %rsi, 16(%rdi) |
| ; SSE-NEXT: movq %r8, 8(%rdi) |
| ; SSE-NEXT: movq %rdx, (%rdi) |
| ; SSE-NEXT: addq $8, %rsp |
| ; SSE-NEXT: popq %rbx |
| ; SSE-NEXT: popq %r14 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: lshr_i512_load: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: pushq %r14 |
| ; AVX2-NEXT: pushq %rbx |
| ; AVX2-NEXT: pushq %rax |
| ; AVX2-NEXT: vmovups (%rsi), %ymm0 |
| ; AVX2-NEXT: vmovups 32(%rsi), %ymm1 |
| ; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 |
| ; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %edx, %ecx |
| ; AVX2-NEXT: andl $63, %ecx |
| ; AVX2-NEXT: shrl $3, %edx |
| ; AVX2-NEXT: andl $56, %edx |
| ; AVX2-NEXT: movq -112(%rsp,%rdx), %rsi |
| ; AVX2-NEXT: movq -120(%rsp,%rdx), %rax |
| ; AVX2-NEXT: movq %rax, %r8 |
| ; AVX2-NEXT: shrdq %cl, %rsi, %r8 |
| ; AVX2-NEXT: movq -104(%rsp,%rdx), %r9 |
| ; AVX2-NEXT: shrdq %cl, %r9, %rsi |
| ; AVX2-NEXT: movq -96(%rsp,%rdx), %r10 |
| ; AVX2-NEXT: shrdq %cl, %r10, %r9 |
| ; AVX2-NEXT: movq -88(%rsp,%rdx), %r11 |
| ; AVX2-NEXT: shrdq %cl, %r11, %r10 |
| ; AVX2-NEXT: movq -80(%rsp,%rdx), %rbx |
| ; AVX2-NEXT: shrdq %cl, %rbx, %r11 |
| ; AVX2-NEXT: movq -128(%rsp,%rdx), %r14 |
| ; AVX2-NEXT: movq -72(%rsp,%rdx), %rdx |
| ; AVX2-NEXT: shrdq %cl, %rdx, %rbx |
| ; AVX2-NEXT: shrdq %cl, %rax, %r14 |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: shrxq %rcx, %rdx, %rcx |
| ; AVX2-NEXT: movq %rcx, 56(%rdi) |
| ; AVX2-NEXT: movq %rbx, 48(%rdi) |
| ; AVX2-NEXT: movq %r11, 40(%rdi) |
| ; AVX2-NEXT: movq %r10, 32(%rdi) |
| ; AVX2-NEXT: movq %r9, 24(%rdi) |
| ; AVX2-NEXT: movq %rsi, 16(%rdi) |
| ; AVX2-NEXT: movq %r8, 8(%rdi) |
| ; AVX2-NEXT: movq %r14, (%rdi) |
| ; AVX2-NEXT: addq $8, %rsp |
| ; AVX2-NEXT: popq %rbx |
| ; AVX2-NEXT: popq %r14 |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: lshr_i512_load: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vmovdqu64 (%rsi), %zmm0 |
| ; AVX512F-NEXT: movl %edx, %eax |
| ; AVX512F-NEXT: vmovq %rax, %xmm1 |
| ; AVX512F-NEXT: vpbroadcastq %xmm1, %xmm1 |
| ; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm2 = [63,63] |
| ; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm3 |
| ; AVX512F-NEXT: shrl $6, %edx |
| ; AVX512F-NEXT: movl $-1, %eax |
| ; AVX512F-NEXT: shlxl %edx, %eax, %eax |
| ; AVX512F-NEXT: kmovw %eax, %k1 |
| ; AVX512F-NEXT: vpcompressq %zmm0, %zmm0 {%k1} {z} |
| ; AVX512F-NEXT: vpsrlq %xmm3, %zmm0, %zmm3 |
| ; AVX512F-NEXT: vpandn %xmm2, %xmm1, %xmm1 |
| ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 |
| ; AVX512F-NEXT: valignq {{.*#+}} zmm0 = zmm0[1,2,3,4,5,6,7],zmm2[0] |
| ; AVX512F-NEXT: vpaddq %zmm0, %zmm0, %zmm0 |
| ; AVX512F-NEXT: vpsllq %xmm1, %zmm0, %zmm0 |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: vporq %zmm3, %zmm0, %zmm0 |
| ; AVX512F-NEXT: vmovdqu64 %zmm0, (%rdi) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: lshr_i512_load: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: vmovdqu64 (%rsi), %zmm0 |
| ; AVX512VL-NEXT: movl %edx, %ecx |
| ; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [63,63] |
| ; AVX512VL-NEXT: vpbroadcastq %rcx, %xmm2 |
| ; AVX512VL-NEXT: shrl $6, %edx |
| ; AVX512VL-NEXT: movl $-1, %ecx |
| ; AVX512VL-NEXT: shlxl %edx, %ecx, %ecx |
| ; AVX512VL-NEXT: kmovd %ecx, %k1 |
| ; AVX512VL-NEXT: vpcompressq %zmm0, %zmm0 {%k1} {z} |
| ; AVX512VL-NEXT: vpand %xmm1, %xmm2, %xmm3 |
| ; AVX512VL-NEXT: vpsrlq %xmm3, %zmm0, %zmm3 |
| ; AVX512VL-NEXT: vpandn %xmm1, %xmm2, %xmm1 |
| ; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 |
| ; AVX512VL-NEXT: valignq {{.*#+}} zmm0 = zmm0[1,2,3,4,5,6,7],zmm2[0] |
| ; AVX512VL-NEXT: vpaddq %zmm0, %zmm0, %zmm0 |
| ; AVX512VL-NEXT: vpsllq %xmm1, %zmm0, %zmm0 |
| ; AVX512VL-NEXT: vporq %zmm3, %zmm0, %zmm0 |
| ; AVX512VL-NEXT: vmovdqu64 %zmm0, (%rdi) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: lshr_i512_load: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: vmovdqu64 (%rsi), %zmm0 |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: movl %edx, %ecx |
| ; AVX512VBMI-NEXT: shrl $6, %ecx |
| ; AVX512VBMI-NEXT: movl $-1, %esi |
| ; AVX512VBMI-NEXT: shlxl %ecx, %esi, %ecx |
| ; AVX512VBMI-NEXT: kmovd %ecx, %k1 |
| ; AVX512VBMI-NEXT: vpcompressq %zmm0, %zmm0 {%k1} {z} |
| ; AVX512VBMI-NEXT: vpbroadcastq %rdx, %zmm1 |
| ; AVX512VBMI-NEXT: vpxor %xmm2, %xmm2, %xmm2 |
| ; AVX512VBMI-NEXT: valignq {{.*#+}} zmm2 = zmm0[1,2,3,4,5,6,7],zmm2[0] |
| ; AVX512VBMI-NEXT: vpshrdvq %zmm1, %zmm2, %zmm0 |
| ; AVX512VBMI-NEXT: vmovdqu64 %zmm0, (%rdi) |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| %a0 = load i512, ptr %p0 |
| %r = lshr i512 %a0, %a1 |
| ret i512 %r |
| } |
| |
| define i512 @ashr_i512_load(ptr %p0, i512 %a1) nounwind { |
| ; SSE-LABEL: ashr_i512_load: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: pushq %r14 |
| ; SSE-NEXT: pushq %rbx |
| ; SSE-NEXT: pushq %rax |
| ; SSE-NEXT: movaps (%rsi), %xmm0 |
| ; SSE-NEXT: movaps 16(%rsi), %xmm1 |
| ; SSE-NEXT: movaps 32(%rsi), %xmm2 |
| ; SSE-NEXT: movq 48(%rsi), %rax |
| ; SSE-NEXT: movq 56(%rsi), %rcx |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: sarq $63, %rcx |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %edx, %ecx |
| ; SSE-NEXT: andl $63, %ecx |
| ; SSE-NEXT: shrl $3, %edx |
| ; SSE-NEXT: andl $56, %edx |
| ; SSE-NEXT: movq -112(%rsp,%rdx), %rsi |
| ; SSE-NEXT: movq -120(%rsp,%rdx), %rax |
| ; SSE-NEXT: movq %rax, %r8 |
| ; SSE-NEXT: shrdq %cl, %rsi, %r8 |
| ; SSE-NEXT: movq -104(%rsp,%rdx), %r9 |
| ; SSE-NEXT: shrdq %cl, %r9, %rsi |
| ; SSE-NEXT: movq -96(%rsp,%rdx), %r10 |
| ; SSE-NEXT: shrdq %cl, %r10, %r9 |
| ; SSE-NEXT: movq -88(%rsp,%rdx), %r11 |
| ; SSE-NEXT: shrdq %cl, %r11, %r10 |
| ; SSE-NEXT: movq -80(%rsp,%rdx), %rbx |
| ; SSE-NEXT: shrdq %cl, %rbx, %r11 |
| ; SSE-NEXT: movq -72(%rsp,%rdx), %r14 |
| ; SSE-NEXT: shrdq %cl, %r14, %rbx |
| ; SSE-NEXT: movq -128(%rsp,%rdx), %rdx |
| ; SSE-NEXT: shrdq %cl, %rax, %rdx |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; SSE-NEXT: sarq %cl, %r14 |
| ; SSE-NEXT: movq %r14, 56(%rdi) |
| ; SSE-NEXT: movq %rbx, 48(%rdi) |
| ; SSE-NEXT: movq %r11, 40(%rdi) |
| ; SSE-NEXT: movq %r10, 32(%rdi) |
| ; SSE-NEXT: movq %r9, 24(%rdi) |
| ; SSE-NEXT: movq %rsi, 16(%rdi) |
| ; SSE-NEXT: movq %r8, 8(%rdi) |
| ; SSE-NEXT: movq %rdx, (%rdi) |
| ; SSE-NEXT: addq $8, %rsp |
| ; SSE-NEXT: popq %rbx |
| ; SSE-NEXT: popq %r14 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: ashr_i512_load: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: pushq %r14 |
| ; AVX2-NEXT: pushq %rbx |
| ; AVX2-NEXT: pushq %rax |
| ; AVX2-NEXT: vmovups (%rsi), %ymm0 |
| ; AVX2-NEXT: vmovaps 32(%rsi), %xmm1 |
| ; AVX2-NEXT: movq 48(%rsi), %rax |
| ; AVX2-NEXT: movq 56(%rsi), %rcx |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: sarq $63, %rcx |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %edx, %ecx |
| ; AVX2-NEXT: andl $63, %ecx |
| ; AVX2-NEXT: shrl $3, %edx |
| ; AVX2-NEXT: andl $56, %edx |
| ; AVX2-NEXT: movq -112(%rsp,%rdx), %rsi |
| ; AVX2-NEXT: movq -120(%rsp,%rdx), %rax |
| ; AVX2-NEXT: movq %rax, %r8 |
| ; AVX2-NEXT: shrdq %cl, %rsi, %r8 |
| ; AVX2-NEXT: movq -104(%rsp,%rdx), %r9 |
| ; AVX2-NEXT: shrdq %cl, %r9, %rsi |
| ; AVX2-NEXT: movq -96(%rsp,%rdx), %r10 |
| ; AVX2-NEXT: shrdq %cl, %r10, %r9 |
| ; AVX2-NEXT: movq -88(%rsp,%rdx), %r11 |
| ; AVX2-NEXT: shrdq %cl, %r11, %r10 |
| ; AVX2-NEXT: movq -80(%rsp,%rdx), %rbx |
| ; AVX2-NEXT: shrdq %cl, %rbx, %r11 |
| ; AVX2-NEXT: movq -128(%rsp,%rdx), %r14 |
| ; AVX2-NEXT: movq -72(%rsp,%rdx), %rdx |
| ; AVX2-NEXT: shrdq %cl, %rdx, %rbx |
| ; AVX2-NEXT: shrdq %cl, %rax, %r14 |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: sarxq %rcx, %rdx, %rcx |
| ; AVX2-NEXT: movq %rcx, 56(%rdi) |
| ; AVX2-NEXT: movq %rbx, 48(%rdi) |
| ; AVX2-NEXT: movq %r11, 40(%rdi) |
| ; AVX2-NEXT: movq %r10, 32(%rdi) |
| ; AVX2-NEXT: movq %r9, 24(%rdi) |
| ; AVX2-NEXT: movq %rsi, 16(%rdi) |
| ; AVX2-NEXT: movq %r8, 8(%rdi) |
| ; AVX2-NEXT: movq %r14, (%rdi) |
| ; AVX2-NEXT: addq $8, %rsp |
| ; AVX2-NEXT: popq %rbx |
| ; AVX2-NEXT: popq %r14 |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: ashr_i512_load: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vmovdqu64 (%rsi), %zmm0 |
| ; AVX512F-NEXT: vpsraq $63, %zmm0, %zmm1 |
| ; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm2 = [7,7,7,7,7,7,7,7] |
| ; AVX512F-NEXT: vpermq %zmm1, %zmm2, %zmm1 |
| ; AVX512F-NEXT: movl %edx, %eax |
| ; AVX512F-NEXT: shrl $6, %edx |
| ; AVX512F-NEXT: movl $-1, %ecx |
| ; AVX512F-NEXT: shlxl %edx, %ecx, %ecx |
| ; AVX512F-NEXT: kmovw %ecx, %k1 |
| ; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm2 |
| ; AVX512F-NEXT: vpcompressq %zmm0, %zmm2 {%k1} |
| ; AVX512F-NEXT: vmovq %rax, %xmm0 |
| ; AVX512F-NEXT: vpbroadcastq %xmm0, %xmm0 |
| ; AVX512F-NEXT: vpbroadcastq {{.*#+}} xmm3 = [63,63] |
| ; AVX512F-NEXT: vpand %xmm3, %xmm0, %xmm4 |
| ; AVX512F-NEXT: vpsrlq %xmm4, %zmm2, %zmm4 |
| ; AVX512F-NEXT: valignq {{.*#+}} zmm1 = zmm2[1,2,3,4,5,6,7],zmm1[0] |
| ; AVX512F-NEXT: vpaddq %zmm1, %zmm1, %zmm1 |
| ; AVX512F-NEXT: vpandn %xmm3, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vpsllq %xmm0, %zmm1, %zmm0 |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: vporq %zmm4, %zmm0, %zmm0 |
| ; AVX512F-NEXT: vmovdqu64 %zmm0, (%rdi) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: ashr_i512_load: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: vmovdqu64 (%rsi), %zmm0 |
| ; AVX512VL-NEXT: vpsraq $63, %zmm0, %zmm1 |
| ; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm2 = [7,7,7,7,7,7,7,7] |
| ; AVX512VL-NEXT: vpermq %zmm1, %zmm2, %zmm1 |
| ; AVX512VL-NEXT: movl %edx, %ecx |
| ; AVX512VL-NEXT: shrl $6, %edx |
| ; AVX512VL-NEXT: movl $-1, %esi |
| ; AVX512VL-NEXT: shlxl %edx, %esi, %edx |
| ; AVX512VL-NEXT: kmovd %edx, %k1 |
| ; AVX512VL-NEXT: vmovdqa64 %zmm1, %zmm2 |
| ; AVX512VL-NEXT: vpcompressq %zmm0, %zmm2 {%k1} |
| ; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm0 = [63,63] |
| ; AVX512VL-NEXT: vpbroadcastq %rcx, %xmm3 |
| ; AVX512VL-NEXT: vpand %xmm0, %xmm3, %xmm4 |
| ; AVX512VL-NEXT: vpsrlq %xmm4, %zmm2, %zmm4 |
| ; AVX512VL-NEXT: valignq {{.*#+}} zmm1 = zmm2[1,2,3,4,5,6,7],zmm1[0] |
| ; AVX512VL-NEXT: vpaddq %zmm1, %zmm1, %zmm1 |
| ; AVX512VL-NEXT: vpandn %xmm0, %xmm3, %xmm0 |
| ; AVX512VL-NEXT: vpsllq %xmm0, %zmm1, %zmm0 |
| ; AVX512VL-NEXT: vporq %zmm4, %zmm0, %zmm0 |
| ; AVX512VL-NEXT: vmovdqu64 %zmm0, (%rdi) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: ashr_i512_load: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: vmovdqu64 (%rsi), %zmm0 |
| ; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} zmm1 = [7,7,7,7,7,7,7,7] |
| ; AVX512VBMI-NEXT: vpsraq $63, %zmm0, %zmm2 |
| ; AVX512VBMI-NEXT: vpermq %zmm2, %zmm1, %zmm1 |
| ; AVX512VBMI-NEXT: vpbroadcastq %rdx, %zmm2 |
| ; AVX512VBMI-NEXT: movl %edx, %ecx |
| ; AVX512VBMI-NEXT: shrl $6, %ecx |
| ; AVX512VBMI-NEXT: movl $-1, %edx |
| ; AVX512VBMI-NEXT: shlxl %ecx, %edx, %ecx |
| ; AVX512VBMI-NEXT: kmovd %ecx, %k1 |
| ; AVX512VBMI-NEXT: vmovdqa64 %zmm1, %zmm3 |
| ; AVX512VBMI-NEXT: vpcompressq %zmm0, %zmm3 {%k1} |
| ; AVX512VBMI-NEXT: valignq {{.*#+}} zmm0 = zmm3[1,2,3,4,5,6,7],zmm1[0] |
| ; AVX512VBMI-NEXT: vpshrdvq %zmm2, %zmm0, %zmm3 |
| ; AVX512VBMI-NEXT: vmovdqu64 %zmm3, (%rdi) |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| %a0 = load i512, ptr %p0 |
| %r = ashr i512 %a0, %a1 |
| ret i512 %r |
| } |
| |
| define i512 @shl_i512_1(i512 %a0) nounwind { |
| ; CHECK-LABEL: shl_i512_1: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movq %rdi, %rax |
| ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi |
| ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r11 |
| ; CHECK-NEXT: shldq $1, %rdi, %r10 |
| ; CHECK-NEXT: shldq $1, %r11, %rdi |
| ; CHECK-NEXT: shldq $1, %r9, %r11 |
| ; CHECK-NEXT: shldq $1, %r8, %r9 |
| ; CHECK-NEXT: shldq $1, %rcx, %r8 |
| ; CHECK-NEXT: shldq $1, %rdx, %rcx |
| ; CHECK-NEXT: shldq $1, %rsi, %rdx |
| ; CHECK-NEXT: addq %rsi, %rsi |
| ; CHECK-NEXT: movq %r10, 56(%rax) |
| ; CHECK-NEXT: movq %rdi, 48(%rax) |
| ; CHECK-NEXT: movq %r11, 40(%rax) |
| ; CHECK-NEXT: movq %r9, 32(%rax) |
| ; CHECK-NEXT: movq %r8, 24(%rax) |
| ; CHECK-NEXT: movq %rcx, 16(%rax) |
| ; CHECK-NEXT: movq %rdx, 8(%rax) |
| ; CHECK-NEXT: movq %rsi, (%rax) |
| ; CHECK-NEXT: retq |
| %r = shl i512 %a0, 1 |
| ret i512 %r |
| } |
| |
| define i512 @lshr_i512_1(i512 %a0) nounwind { |
| ; CHECK-LABEL: lshr_i512_1: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movq %rdi, %rax |
| ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi |
| ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r11 |
| ; CHECK-NEXT: shrdq $1, %rdx, %rsi |
| ; CHECK-NEXT: shrdq $1, %rcx, %rdx |
| ; CHECK-NEXT: shrdq $1, %r8, %rcx |
| ; CHECK-NEXT: shrdq $1, %r9, %r8 |
| ; CHECK-NEXT: shrdq $1, %r11, %r9 |
| ; CHECK-NEXT: shrdq $1, %rdi, %r11 |
| ; CHECK-NEXT: shrdq $1, %r10, %rdi |
| ; CHECK-NEXT: shrq %r10 |
| ; CHECK-NEXT: movq %r10, 56(%rax) |
| ; CHECK-NEXT: movq %rdi, 48(%rax) |
| ; CHECK-NEXT: movq %r11, 40(%rax) |
| ; CHECK-NEXT: movq %r9, 32(%rax) |
| ; CHECK-NEXT: movq %r8, 24(%rax) |
| ; CHECK-NEXT: movq %rcx, 16(%rax) |
| ; CHECK-NEXT: movq %rdx, 8(%rax) |
| ; CHECK-NEXT: movq %rsi, (%rax) |
| ; CHECK-NEXT: retq |
| %r = lshr i512 %a0, 1 |
| ret i512 %r |
| } |
| |
| define i512 @ashr_i512_1(i512 %a0) nounwind { |
| ; CHECK-LABEL: ashr_i512_1: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movq %rdi, %rax |
| ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi |
| ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r11 |
| ; CHECK-NEXT: shrdq $1, %rdx, %rsi |
| ; CHECK-NEXT: shrdq $1, %rcx, %rdx |
| ; CHECK-NEXT: shrdq $1, %r8, %rcx |
| ; CHECK-NEXT: shrdq $1, %r9, %r8 |
| ; CHECK-NEXT: shrdq $1, %r11, %r9 |
| ; CHECK-NEXT: shrdq $1, %rdi, %r11 |
| ; CHECK-NEXT: shrdq $1, %r10, %rdi |
| ; CHECK-NEXT: sarq %r10 |
| ; CHECK-NEXT: movq %r10, 56(%rax) |
| ; CHECK-NEXT: movq %rdi, 48(%rax) |
| ; CHECK-NEXT: movq %r11, 40(%rax) |
| ; CHECK-NEXT: movq %r9, 32(%rax) |
| ; CHECK-NEXT: movq %r8, 24(%rax) |
| ; CHECK-NEXT: movq %rcx, 16(%rax) |
| ; CHECK-NEXT: movq %rdx, 8(%rax) |
| ; CHECK-NEXT: movq %rsi, (%rax) |
| ; CHECK-NEXT: retq |
| %r = ashr i512 %a0, 1 |
| ret i512 %r |
| } |
| |
| define i512 @shl_i512_200(i512 %a0) nounwind { |
| ; SSE-LABEL: shl_i512_200: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: movq %rsi, %rdi |
| ; SSE-NEXT: shrdq $56, %rdx, %rdi |
| ; SSE-NEXT: shrdq $56, %rcx, %rdx |
| ; SSE-NEXT: shrdq $56, %r8, %rcx |
| ; SSE-NEXT: shldq $8, %r8, %r9 |
| ; SSE-NEXT: shlq $8, %rsi |
| ; SSE-NEXT: movq %r9, 56(%rax) |
| ; SSE-NEXT: movq %rcx, 48(%rax) |
| ; SSE-NEXT: movq %rdx, 40(%rax) |
| ; SSE-NEXT: movq %rdi, 32(%rax) |
| ; SSE-NEXT: movq %rsi, 24(%rax) |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: movaps %xmm0, (%rax) |
| ; SSE-NEXT: movq $0, 16(%rax) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: shl_i512_200: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: movq %rsi, %rdi |
| ; AVX2-NEXT: shrdq $56, %rdx, %rdi |
| ; AVX2-NEXT: shrdq $56, %rcx, %rdx |
| ; AVX2-NEXT: shrdq $56, %r8, %rcx |
| ; AVX2-NEXT: shldq $8, %r8, %r9 |
| ; AVX2-NEXT: shlq $8, %rsi |
| ; AVX2-NEXT: movq %r9, 56(%rax) |
| ; AVX2-NEXT: movq %rcx, 48(%rax) |
| ; AVX2-NEXT: movq %rdx, 40(%rax) |
| ; AVX2-NEXT: movq %rdi, 32(%rax) |
| ; AVX2-NEXT: movq %rsi, 24(%rax) |
| ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovaps %xmm0, (%rax) |
| ; AVX2-NEXT: movq $0, 16(%rax) |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512-LABEL: shl_i512_200: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: movq %rdi, %rax |
| ; AVX512-NEXT: movq %rsi, %rdi |
| ; AVX512-NEXT: shrdq $56, %rdx, %rdi |
| ; AVX512-NEXT: shrdq $56, %rcx, %rdx |
| ; AVX512-NEXT: shrdq $56, %r8, %rcx |
| ; AVX512-NEXT: shldq $8, %r8, %r9 |
| ; AVX512-NEXT: shlq $8, %rsi |
| ; AVX512-NEXT: movq %r9, 56(%rax) |
| ; AVX512-NEXT: movq %rcx, 48(%rax) |
| ; AVX512-NEXT: movq %rdx, 40(%rax) |
| ; AVX512-NEXT: movq %rdi, 32(%rax) |
| ; AVX512-NEXT: movq %rsi, 24(%rax) |
| ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512-NEXT: vmovaps %xmm0, (%rax) |
| ; AVX512-NEXT: movq $0, 16(%rax) |
| ; AVX512-NEXT: retq |
| %r = shl i512 %a0, 200 |
| ret i512 %r |
| } |
| |
| define i512 @lshr_i512_200(i512 %a0) nounwind { |
| ; SSE-LABEL: lshr_i512_200: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rcx |
| ; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rdx |
| ; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rsi |
| ; SSE-NEXT: shrdq $8, %r9, %r8 |
| ; SSE-NEXT: shrdq $8, %rsi, %r9 |
| ; SSE-NEXT: shrdq $8, %rcx, %rsi |
| ; SSE-NEXT: shrdq $8, %rdx, %rcx |
| ; SSE-NEXT: shrq $8, %rdx |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: movups %xmm0, 40(%rdi) |
| ; SSE-NEXT: movq %rdx, 32(%rdi) |
| ; SSE-NEXT: movq %rcx, 24(%rdi) |
| ; SSE-NEXT: movq %rsi, 16(%rdi) |
| ; SSE-NEXT: movq %r9, 8(%rdi) |
| ; SSE-NEXT: movq %r8, (%rdi) |
| ; SSE-NEXT: movq $0, 56(%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: lshr_i512_200: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rcx |
| ; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rdx |
| ; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rsi |
| ; AVX2-NEXT: shrdq $8, %r9, %r8 |
| ; AVX2-NEXT: shrdq $8, %rsi, %r9 |
| ; AVX2-NEXT: shrdq $8, %rcx, %rsi |
| ; AVX2-NEXT: shrdq $8, %rdx, %rcx |
| ; AVX2-NEXT: shrq $8, %rdx |
| ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovups %xmm0, 40(%rdi) |
| ; AVX2-NEXT: movq %rdx, 32(%rdi) |
| ; AVX2-NEXT: movq %rcx, 24(%rdi) |
| ; AVX2-NEXT: movq %rsi, 16(%rdi) |
| ; AVX2-NEXT: movq %r9, 8(%rdi) |
| ; AVX2-NEXT: movq %r8, (%rdi) |
| ; AVX2-NEXT: movq $0, 56(%rdi) |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512-LABEL: lshr_i512_200: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: movq %rdi, %rax |
| ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx |
| ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx |
| ; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rsi |
| ; AVX512-NEXT: shrdq $8, %r9, %r8 |
| ; AVX512-NEXT: shrdq $8, %rsi, %r9 |
| ; AVX512-NEXT: shrdq $8, %rcx, %rsi |
| ; AVX512-NEXT: shrdq $8, %rdx, %rcx |
| ; AVX512-NEXT: shrq $8, %rdx |
| ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512-NEXT: vmovups %xmm0, 40(%rdi) |
| ; AVX512-NEXT: movq %rdx, 32(%rdi) |
| ; AVX512-NEXT: movq %rcx, 24(%rdi) |
| ; AVX512-NEXT: movq %rsi, 16(%rdi) |
| ; AVX512-NEXT: movq %r9, 8(%rdi) |
| ; AVX512-NEXT: movq %r8, (%rdi) |
| ; AVX512-NEXT: movq $0, 56(%rdi) |
| ; AVX512-NEXT: retq |
| %r = lshr i512 %a0, 200 |
| ret i512 %r |
| } |
| |
| define i512 @ashr_i512_200(i512 %a0) nounwind { |
| ; CHECK-LABEL: ashr_i512_200: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movq %rdi, %rax |
| ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx |
| ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdx |
| ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rsi |
| ; CHECK-NEXT: shrdq $8, %r9, %r8 |
| ; CHECK-NEXT: shrdq $8, %rsi, %r9 |
| ; CHECK-NEXT: shrdq $8, %rcx, %rsi |
| ; CHECK-NEXT: shrdq $8, %rdx, %rcx |
| ; CHECK-NEXT: movq %rdx, %rdi |
| ; CHECK-NEXT: sarq $8, %rdi |
| ; CHECK-NEXT: sarq $63, %rdx |
| ; CHECK-NEXT: movq %rdx, 56(%rax) |
| ; CHECK-NEXT: movq %rdx, 48(%rax) |
| ; CHECK-NEXT: movq %rdx, 40(%rax) |
| ; CHECK-NEXT: movq %rdi, 32(%rax) |
| ; CHECK-NEXT: movq %rcx, 24(%rax) |
| ; CHECK-NEXT: movq %rsi, 16(%rax) |
| ; CHECK-NEXT: movq %r9, 8(%rax) |
| ; CHECK-NEXT: movq %r8, (%rax) |
| ; CHECK-NEXT: retq |
| %r = ashr i512 %a0, 200 |
| ret i512 %r |
| } |
| |
| define i512 @shl_i512_511(i512 %a0) nounwind { |
| ; SSE-LABEL: shl_i512_511: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: shlq $63, %rsi |
| ; SSE-NEXT: movq %rsi, 56(%rdi) |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: movaps %xmm0, 32(%rdi) |
| ; SSE-NEXT: movaps %xmm0, 16(%rdi) |
| ; SSE-NEXT: movaps %xmm0, (%rdi) |
| ; SSE-NEXT: movq $0, 48(%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: shl_i512_511: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: shlq $63, %rsi |
| ; AVX2-NEXT: movq %rsi, 56(%rdi) |
| ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovaps %xmm0, 32(%rdi) |
| ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovups %ymm0, (%rdi) |
| ; AVX2-NEXT: movq $0, 48(%rdi) |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: shl_i512_511: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: shlq $63, %rsi |
| ; AVX512F-NEXT: movq %rsi, 56(%rdi) |
| ; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vmovaps %xmm0, 32(%rdi) |
| ; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vmovups %ymm0, (%rdi) |
| ; AVX512F-NEXT: movq $0, 48(%rdi) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: shl_i512_511: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: shlq $63, %rsi |
| ; AVX512VL-NEXT: movq %rsi, 56(%rdi) |
| ; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vmovaps %xmm0, 32(%rdi) |
| ; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vmovups %ymm0, (%rdi) |
| ; AVX512VL-NEXT: movq $0, 48(%rdi) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: shl_i512_511: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: shlq $63, %rsi |
| ; AVX512VBMI-NEXT: movq %rsi, 56(%rdi) |
| ; AVX512VBMI-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512VBMI-NEXT: vmovaps %xmm0, 32(%rdi) |
| ; AVX512VBMI-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512VBMI-NEXT: vmovups %ymm0, (%rdi) |
| ; AVX512VBMI-NEXT: movq $0, 48(%rdi) |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| %r = shl i512 %a0, 511 |
| ret i512 %r |
| } |
| |
| define i512 @lshr_i512_511(i512 %a0) nounwind { |
| ; SSE-LABEL: lshr_i512_511: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rcx |
| ; SSE-NEXT: shrq $63, %rcx |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: movups %xmm0, 40(%rdi) |
| ; SSE-NEXT: movups %xmm0, 24(%rdi) |
| ; SSE-NEXT: movups %xmm0, 8(%rdi) |
| ; SSE-NEXT: movq %rcx, (%rdi) |
| ; SSE-NEXT: movq $0, 56(%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: lshr_i512_511: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %rcx |
| ; AVX2-NEXT: shrq $63, %rcx |
| ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovups %xmm0, 40(%rdi) |
| ; AVX2-NEXT: movq %rcx, (%rdi) |
| ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovups %ymm0, 8(%rdi) |
| ; AVX2-NEXT: movq $0, 56(%rdi) |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: lshr_i512_511: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %rcx |
| ; AVX512F-NEXT: shrq $63, %rcx |
| ; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vmovups %xmm0, 40(%rdi) |
| ; AVX512F-NEXT: movq %rcx, (%rdi) |
| ; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vmovups %ymm0, 8(%rdi) |
| ; AVX512F-NEXT: movq $0, 56(%rdi) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: lshr_i512_511: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %rcx |
| ; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vmovups %xmm0, 40(%rdi) |
| ; AVX512VL-NEXT: shrq $63, %rcx |
| ; AVX512VL-NEXT: movq %rcx, (%rdi) |
| ; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vmovups %ymm0, 8(%rdi) |
| ; AVX512VL-NEXT: movq $0, 56(%rdi) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: lshr_i512_511: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: movq {{[0-9]+}}(%rsp), %rcx |
| ; AVX512VBMI-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512VBMI-NEXT: vmovups %xmm0, 40(%rdi) |
| ; AVX512VBMI-NEXT: shrq $63, %rcx |
| ; AVX512VBMI-NEXT: movq %rcx, (%rdi) |
| ; AVX512VBMI-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512VBMI-NEXT: vmovups %ymm0, 8(%rdi) |
| ; AVX512VBMI-NEXT: movq $0, 56(%rdi) |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| %r = lshr i512 %a0, 511 |
| ret i512 %r |
| } |
| |
| define i512 @ashr_i512_511(i512 %a0) nounwind { |
| ; CHECK-LABEL: ashr_i512_511: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movq %rdi, %rax |
| ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx |
| ; CHECK-NEXT: sarq $63, %rcx |
| ; CHECK-NEXT: movq %rcx, 56(%rdi) |
| ; CHECK-NEXT: movq %rcx, 48(%rdi) |
| ; CHECK-NEXT: movq %rcx, 40(%rdi) |
| ; CHECK-NEXT: movq %rcx, 32(%rdi) |
| ; CHECK-NEXT: movq %rcx, 24(%rdi) |
| ; CHECK-NEXT: movq %rcx, 16(%rdi) |
| ; CHECK-NEXT: movq %rcx, 8(%rdi) |
| ; CHECK-NEXT: movq %rcx, (%rdi) |
| ; CHECK-NEXT: retq |
| %r = ashr i512 %a0, 511 |
| ret i512 %r |
| } |
| |
| define i512 @shl_1_i512(i512 %a0) nounwind { |
| ; SSE-LABEL: shl_1_i512: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: pushq %r14 |
| ; SSE-NEXT: pushq %rbx |
| ; SSE-NEXT: pushq %rax |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %esi, %ecx |
| ; SSE-NEXT: andl $63, %ecx |
| ; SSE-NEXT: shrl $3, %esi |
| ; SSE-NEXT: andl $56, %esi |
| ; SSE-NEXT: negl %esi |
| ; SSE-NEXT: movslq %esi, %rax |
| ; SSE-NEXT: movq -56(%rsp,%rax), %rdx |
| ; SSE-NEXT: movq -48(%rsp,%rax), %r9 |
| ; SSE-NEXT: movq %r9, %rsi |
| ; SSE-NEXT: shldq %cl, %rdx, %rsi |
| ; SSE-NEXT: movq -40(%rsp,%rax), %r10 |
| ; SSE-NEXT: movq %r10, %r8 |
| ; SSE-NEXT: shldq %cl, %r9, %r8 |
| ; SSE-NEXT: movq -32(%rsp,%rax), %r9 |
| ; SSE-NEXT: movq %r9, %r11 |
| ; SSE-NEXT: shldq %cl, %r10, %r11 |
| ; SSE-NEXT: movq -24(%rsp,%rax), %r10 |
| ; SSE-NEXT: movq %r10, %rbx |
| ; SSE-NEXT: shldq %cl, %r9, %rbx |
| ; SSE-NEXT: movq -16(%rsp,%rax), %r9 |
| ; SSE-NEXT: movq %r9, %r14 |
| ; SSE-NEXT: shldq %cl, %r10, %r14 |
| ; SSE-NEXT: movq -8(%rsp,%rax), %r10 |
| ; SSE-NEXT: shldq %cl, %r9, %r10 |
| ; SSE-NEXT: movq -64(%rsp,%rax), %rax |
| ; SSE-NEXT: movq %rax, %r9 |
| ; SSE-NEXT: shlq %cl, %r9 |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; SSE-NEXT: shldq %cl, %rax, %rdx |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: movq %r10, 56(%rdi) |
| ; SSE-NEXT: movq %r14, 48(%rdi) |
| ; SSE-NEXT: movq %rbx, 40(%rdi) |
| ; SSE-NEXT: movq %r11, 32(%rdi) |
| ; SSE-NEXT: movq %r8, 24(%rdi) |
| ; SSE-NEXT: movq %rsi, 16(%rdi) |
| ; SSE-NEXT: movq %rdx, 8(%rdi) |
| ; SSE-NEXT: movq %r9, (%rdi) |
| ; SSE-NEXT: addq $8, %rsp |
| ; SSE-NEXT: popq %rbx |
| ; SSE-NEXT: popq %r14 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: shl_1_i512: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: pushq %r14 |
| ; AVX2-NEXT: pushq %rbx |
| ; AVX2-NEXT: pushq %rax |
| ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [1,0,0,0] |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %esi, %ecx |
| ; AVX2-NEXT: andl $63, %ecx |
| ; AVX2-NEXT: shrl $3, %esi |
| ; AVX2-NEXT: andl $56, %esi |
| ; AVX2-NEXT: negl %esi |
| ; AVX2-NEXT: movslq %esi, %r8 |
| ; AVX2-NEXT: movq -56(%rsp,%r8), %rdx |
| ; AVX2-NEXT: movq -48(%rsp,%r8), %rax |
| ; AVX2-NEXT: movq %rax, %rsi |
| ; AVX2-NEXT: shldq %cl, %rdx, %rsi |
| ; AVX2-NEXT: movq -40(%rsp,%r8), %r10 |
| ; AVX2-NEXT: movq %r10, %r9 |
| ; AVX2-NEXT: shldq %cl, %rax, %r9 |
| ; AVX2-NEXT: movq -32(%rsp,%r8), %rax |
| ; AVX2-NEXT: movq %rax, %r11 |
| ; AVX2-NEXT: shldq %cl, %r10, %r11 |
| ; AVX2-NEXT: movq -24(%rsp,%r8), %r10 |
| ; AVX2-NEXT: movq %r10, %rbx |
| ; AVX2-NEXT: shldq %cl, %rax, %rbx |
| ; AVX2-NEXT: movq -16(%rsp,%r8), %rax |
| ; AVX2-NEXT: movq %rax, %r14 |
| ; AVX2-NEXT: shldq %cl, %r10, %r14 |
| ; AVX2-NEXT: movq -8(%rsp,%r8), %r10 |
| ; AVX2-NEXT: shldq %cl, %rax, %r10 |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: movq -64(%rsp,%r8), %rdi |
| ; AVX2-NEXT: shlxq %rcx, %rdi, %r8 |
| ; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX2-NEXT: shldq %cl, %rdi, %rdx |
| ; AVX2-NEXT: movq %r10, 56(%rax) |
| ; AVX2-NEXT: movq %r14, 48(%rax) |
| ; AVX2-NEXT: movq %rbx, 40(%rax) |
| ; AVX2-NEXT: movq %r11, 32(%rax) |
| ; AVX2-NEXT: movq %r9, 24(%rax) |
| ; AVX2-NEXT: movq %rsi, 16(%rax) |
| ; AVX2-NEXT: movq %rdx, 8(%rax) |
| ; AVX2-NEXT: movq %r8, (%rax) |
| ; AVX2-NEXT: addq $8, %rsp |
| ; AVX2-NEXT: popq %rbx |
| ; AVX2-NEXT: popq %r14 |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: shl_1_i512: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: movl $1, %ecx |
| ; AVX512F-NEXT: shlxq %rsi, %rcx, %rdx |
| ; AVX512F-NEXT: shrl $6, %esi |
| ; AVX512F-NEXT: shlxq %rsi, %rcx, %rcx |
| ; AVX512F-NEXT: kmovw %ecx, %k1 |
| ; AVX512F-NEXT: vpbroadcastq %rdx, %zmm0 {%k1} {z} |
| ; AVX512F-NEXT: vmovdqu64 %zmm0, (%rdi) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: shl_1_i512: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: movl $1, %ecx |
| ; AVX512VL-NEXT: shlxq %rsi, %rcx, %rdx |
| ; AVX512VL-NEXT: shrl $6, %esi |
| ; AVX512VL-NEXT: shlxq %rsi, %rcx, %rcx |
| ; AVX512VL-NEXT: kmovd %ecx, %k1 |
| ; AVX512VL-NEXT: vpbroadcastq %rdx, %zmm0 {%k1} {z} |
| ; AVX512VL-NEXT: vmovdqu64 %zmm0, (%rdi) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: shl_1_i512: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: movl $1, %ecx |
| ; AVX512VBMI-NEXT: shlxq %rsi, %rcx, %rdx |
| ; AVX512VBMI-NEXT: shrl $6, %esi |
| ; AVX512VBMI-NEXT: shlxq %rsi, %rcx, %rcx |
| ; AVX512VBMI-NEXT: kmovd %ecx, %k1 |
| ; AVX512VBMI-NEXT: vpbroadcastq %rdx, %zmm0 {%k1} {z} |
| ; AVX512VBMI-NEXT: vmovdqu64 %zmm0, (%rdi) |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| %r = shl i512 1, %a0 |
| ret i512 %r |
| } |
| |
| define i512 @lshr_signbit_i512(i512 %a0) nounwind { |
| ; SSE-LABEL: lshr_signbit_i512: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: pushq %r14 |
| ; SSE-NEXT: pushq %rbx |
| ; SSE-NEXT: pushq %rax |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 |
| ; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %esi, %ecx |
| ; SSE-NEXT: andl $63, %ecx |
| ; SSE-NEXT: shrl $3, %esi |
| ; SSE-NEXT: andl $56, %esi |
| ; SSE-NEXT: movq -112(%rsp,%rsi), %rdx |
| ; SSE-NEXT: movq -120(%rsp,%rsi), %rax |
| ; SSE-NEXT: movq %rax, %r8 |
| ; SSE-NEXT: shrdq %cl, %rdx, %r8 |
| ; SSE-NEXT: movq -104(%rsp,%rsi), %r9 |
| ; SSE-NEXT: shrdq %cl, %r9, %rdx |
| ; SSE-NEXT: movq -96(%rsp,%rsi), %r10 |
| ; SSE-NEXT: shrdq %cl, %r10, %r9 |
| ; SSE-NEXT: movq -88(%rsp,%rsi), %r11 |
| ; SSE-NEXT: shrdq %cl, %r11, %r10 |
| ; SSE-NEXT: movq -80(%rsp,%rsi), %rbx |
| ; SSE-NEXT: shrdq %cl, %rbx, %r11 |
| ; SSE-NEXT: movq -72(%rsp,%rsi), %r14 |
| ; SSE-NEXT: shrdq %cl, %r14, %rbx |
| ; SSE-NEXT: movq -128(%rsp,%rsi), %rsi |
| ; SSE-NEXT: shrdq %cl, %rax, %rsi |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; SSE-NEXT: shrq %cl, %r14 |
| ; SSE-NEXT: movq %r14, 56(%rdi) |
| ; SSE-NEXT: movq %rbx, 48(%rdi) |
| ; SSE-NEXT: movq %r11, 40(%rdi) |
| ; SSE-NEXT: movq %r10, 32(%rdi) |
| ; SSE-NEXT: movq %r9, 24(%rdi) |
| ; SSE-NEXT: movq %rdx, 16(%rdi) |
| ; SSE-NEXT: movq %r8, 8(%rdi) |
| ; SSE-NEXT: movq %rsi, (%rdi) |
| ; SSE-NEXT: addq $8, %rsp |
| ; SSE-NEXT: popq %rbx |
| ; SSE-NEXT: popq %r14 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: lshr_signbit_i512: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: pushq %r14 |
| ; AVX2-NEXT: pushq %rbx |
| ; AVX2-NEXT: pushq %rax |
| ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [0,0,0,9223372036854775808] |
| ; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %esi, %ecx |
| ; AVX2-NEXT: andl $63, %ecx |
| ; AVX2-NEXT: shrl $3, %esi |
| ; AVX2-NEXT: andl $56, %esi |
| ; AVX2-NEXT: movq -112(%rsp,%rsi), %rdx |
| ; AVX2-NEXT: movq -120(%rsp,%rsi), %rax |
| ; AVX2-NEXT: movq %rax, %r8 |
| ; AVX2-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX2-NEXT: movq -104(%rsp,%rsi), %r9 |
| ; AVX2-NEXT: shrdq %cl, %r9, %rdx |
| ; AVX2-NEXT: movq -96(%rsp,%rsi), %r10 |
| ; AVX2-NEXT: shrdq %cl, %r10, %r9 |
| ; AVX2-NEXT: movq -88(%rsp,%rsi), %r11 |
| ; AVX2-NEXT: shrdq %cl, %r11, %r10 |
| ; AVX2-NEXT: movq -80(%rsp,%rsi), %rbx |
| ; AVX2-NEXT: shrdq %cl, %rbx, %r11 |
| ; AVX2-NEXT: movq -128(%rsp,%rsi), %r14 |
| ; AVX2-NEXT: movq -72(%rsp,%rsi), %rsi |
| ; AVX2-NEXT: shrdq %cl, %rsi, %rbx |
| ; AVX2-NEXT: shrdq %cl, %rax, %r14 |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: shrxq %rcx, %rsi, %rcx |
| ; AVX2-NEXT: movq %rcx, 56(%rdi) |
| ; AVX2-NEXT: movq %rbx, 48(%rdi) |
| ; AVX2-NEXT: movq %r11, 40(%rdi) |
| ; AVX2-NEXT: movq %r10, 32(%rdi) |
| ; AVX2-NEXT: movq %r9, 24(%rdi) |
| ; AVX2-NEXT: movq %rdx, 16(%rdi) |
| ; AVX2-NEXT: movq %r8, 8(%rdi) |
| ; AVX2-NEXT: movq %r14, (%rdi) |
| ; AVX2-NEXT: addq $8, %rsp |
| ; AVX2-NEXT: popq %rbx |
| ; AVX2-NEXT: popq %r14 |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: lshr_signbit_i512: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 |
| ; AVX512F-NEXT: shrxq %rsi, %rcx, %rcx |
| ; AVX512F-NEXT: shrl $6, %esi |
| ; AVX512F-NEXT: movl $128, %edx |
| ; AVX512F-NEXT: shrxq %rsi, %rdx, %rdx |
| ; AVX512F-NEXT: kmovw %edx, %k1 |
| ; AVX512F-NEXT: vpbroadcastq %rcx, %zmm0 {%k1} {z} |
| ; AVX512F-NEXT: vmovdqu64 %zmm0, (%rdi) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: lshr_signbit_i512: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 |
| ; AVX512VL-NEXT: shrxq %rsi, %rcx, %rcx |
| ; AVX512VL-NEXT: shrl $6, %esi |
| ; AVX512VL-NEXT: movl $128, %edx |
| ; AVX512VL-NEXT: shrxq %rsi, %rdx, %rdx |
| ; AVX512VL-NEXT: kmovd %edx, %k1 |
| ; AVX512VL-NEXT: vpbroadcastq %rcx, %zmm0 {%k1} {z} |
| ; AVX512VL-NEXT: vmovdqu64 %zmm0, (%rdi) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: lshr_signbit_i512: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 |
| ; AVX512VBMI-NEXT: shrxq %rsi, %rcx, %rcx |
| ; AVX512VBMI-NEXT: shrl $6, %esi |
| ; AVX512VBMI-NEXT: movl $128, %edx |
| ; AVX512VBMI-NEXT: shrxq %rsi, %rdx, %rdx |
| ; AVX512VBMI-NEXT: kmovd %edx, %k1 |
| ; AVX512VBMI-NEXT: vpbroadcastq %rcx, %zmm0 {%k1} {z} |
| ; AVX512VBMI-NEXT: vmovdqu64 %zmm0, (%rdi) |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| %s = shl i512 1, 511 |
| %r = lshr i512 %s, %a0 |
| ret i512 %r |
| } |
| |
| define i512 @ashr_signbit_i512(i512 %a0) nounwind { |
| ; SSE-LABEL: ashr_signbit_i512: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: pushq %r14 |
| ; SSE-NEXT: pushq %rbx |
| ; SSE-NEXT: pushq %rax |
| ; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 |
| ; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %esi, %ecx |
| ; SSE-NEXT: andl $63, %ecx |
| ; SSE-NEXT: shrl $3, %esi |
| ; SSE-NEXT: andl $56, %esi |
| ; SSE-NEXT: movq -112(%rsp,%rsi), %rdx |
| ; SSE-NEXT: movq -120(%rsp,%rsi), %rax |
| ; SSE-NEXT: movq %rax, %r8 |
| ; SSE-NEXT: shrdq %cl, %rdx, %r8 |
| ; SSE-NEXT: movq -104(%rsp,%rsi), %r9 |
| ; SSE-NEXT: shrdq %cl, %r9, %rdx |
| ; SSE-NEXT: movq -96(%rsp,%rsi), %r10 |
| ; SSE-NEXT: shrdq %cl, %r10, %r9 |
| ; SSE-NEXT: movq -88(%rsp,%rsi), %r11 |
| ; SSE-NEXT: shrdq %cl, %r11, %r10 |
| ; SSE-NEXT: movq -80(%rsp,%rsi), %rbx |
| ; SSE-NEXT: shrdq %cl, %rbx, %r11 |
| ; SSE-NEXT: movq -72(%rsp,%rsi), %r14 |
| ; SSE-NEXT: shrdq %cl, %r14, %rbx |
| ; SSE-NEXT: movq -128(%rsp,%rsi), %rsi |
| ; SSE-NEXT: shrdq %cl, %rax, %rsi |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; SSE-NEXT: sarq %cl, %r14 |
| ; SSE-NEXT: movq %r14, 56(%rdi) |
| ; SSE-NEXT: movq %rbx, 48(%rdi) |
| ; SSE-NEXT: movq %r11, 40(%rdi) |
| ; SSE-NEXT: movq %r10, 32(%rdi) |
| ; SSE-NEXT: movq %r9, 24(%rdi) |
| ; SSE-NEXT: movq %rdx, 16(%rdi) |
| ; SSE-NEXT: movq %r8, 8(%rdi) |
| ; SSE-NEXT: movq %rsi, (%rdi) |
| ; SSE-NEXT: addq $8, %rsp |
| ; SSE-NEXT: popq %rbx |
| ; SSE-NEXT: popq %r14 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: ashr_signbit_i512: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: pushq %r14 |
| ; AVX2-NEXT: pushq %rbx |
| ; AVX2-NEXT: pushq %rax |
| ; AVX2-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 |
| ; AVX2-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,9223372036854775808] |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %esi, %ecx |
| ; AVX2-NEXT: andl $63, %ecx |
| ; AVX2-NEXT: shrl $3, %esi |
| ; AVX2-NEXT: andl $56, %esi |
| ; AVX2-NEXT: movq -112(%rsp,%rsi), %rdx |
| ; AVX2-NEXT: movq -120(%rsp,%rsi), %rax |
| ; AVX2-NEXT: movq %rax, %r8 |
| ; AVX2-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX2-NEXT: movq -104(%rsp,%rsi), %r9 |
| ; AVX2-NEXT: shrdq %cl, %r9, %rdx |
| ; AVX2-NEXT: movq -96(%rsp,%rsi), %r10 |
| ; AVX2-NEXT: shrdq %cl, %r10, %r9 |
| ; AVX2-NEXT: movq -88(%rsp,%rsi), %r11 |
| ; AVX2-NEXT: shrdq %cl, %r11, %r10 |
| ; AVX2-NEXT: movq -80(%rsp,%rsi), %rbx |
| ; AVX2-NEXT: shrdq %cl, %rbx, %r11 |
| ; AVX2-NEXT: movq -128(%rsp,%rsi), %r14 |
| ; AVX2-NEXT: movq -72(%rsp,%rsi), %rsi |
| ; AVX2-NEXT: shrdq %cl, %rsi, %rbx |
| ; AVX2-NEXT: shrdq %cl, %rax, %r14 |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: sarxq %rcx, %rsi, %rcx |
| ; AVX2-NEXT: movq %rcx, 56(%rdi) |
| ; AVX2-NEXT: movq %rbx, 48(%rdi) |
| ; AVX2-NEXT: movq %r11, 40(%rdi) |
| ; AVX2-NEXT: movq %r10, 32(%rdi) |
| ; AVX2-NEXT: movq %r9, 24(%rdi) |
| ; AVX2-NEXT: movq %rdx, 16(%rdi) |
| ; AVX2-NEXT: movq %r8, 8(%rdi) |
| ; AVX2-NEXT: movq %r14, (%rdi) |
| ; AVX2-NEXT: addq $8, %rsp |
| ; AVX2-NEXT: popq %rbx |
| ; AVX2-NEXT: popq %r14 |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: ashr_signbit_i512: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: movl $511, %ecx # imm = 0x1FF |
| ; AVX512F-NEXT: subl %esi, %ecx |
| ; AVX512F-NEXT: movq $-1, %rdx |
| ; AVX512F-NEXT: shlxq %rcx, %rdx, %rsi |
| ; AVX512F-NEXT: shrl $6, %ecx |
| ; AVX512F-NEXT: movl $1, %edi |
| ; AVX512F-NEXT: shlxq %rcx, %rdi, %rdi |
| ; AVX512F-NEXT: kmovw %edi, %k1 |
| ; AVX512F-NEXT: shlxq %rcx, %rdx, %rcx |
| ; AVX512F-NEXT: kmovw %ecx, %k2 |
| ; AVX512F-NEXT: vpternlogq {{.*#+}} zmm0 {%k2} {z} = -1 |
| ; AVX512F-NEXT: vpbroadcastq %rsi, %zmm0 {%k1} |
| ; AVX512F-NEXT: vmovdqu64 %zmm0, (%rax) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: ashr_signbit_i512: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: movl $511, %ecx # imm = 0x1FF |
| ; AVX512VL-NEXT: subl %esi, %ecx |
| ; AVX512VL-NEXT: movq $-1, %rdx |
| ; AVX512VL-NEXT: shlxq %rcx, %rdx, %rsi |
| ; AVX512VL-NEXT: shrl $6, %ecx |
| ; AVX512VL-NEXT: movl $1, %edi |
| ; AVX512VL-NEXT: shlxq %rcx, %rdi, %rdi |
| ; AVX512VL-NEXT: kmovd %edi, %k1 |
| ; AVX512VL-NEXT: shlxq %rcx, %rdx, %rcx |
| ; AVX512VL-NEXT: kmovd %ecx, %k2 |
| ; AVX512VL-NEXT: vpternlogq {{.*#+}} zmm0 {%k2} {z} = -1 |
| ; AVX512VL-NEXT: vpbroadcastq %rsi, %zmm0 {%k1} |
| ; AVX512VL-NEXT: vmovdqu64 %zmm0, (%rax) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: ashr_signbit_i512: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: movl $511, %ecx # imm = 0x1FF |
| ; AVX512VBMI-NEXT: subl %esi, %ecx |
| ; AVX512VBMI-NEXT: movq $-1, %rdx |
| ; AVX512VBMI-NEXT: shlxq %rcx, %rdx, %rsi |
| ; AVX512VBMI-NEXT: shrl $6, %ecx |
| ; AVX512VBMI-NEXT: movl $1, %edi |
| ; AVX512VBMI-NEXT: shlxq %rcx, %rdi, %rdi |
| ; AVX512VBMI-NEXT: kmovd %edi, %k1 |
| ; AVX512VBMI-NEXT: shlxq %rcx, %rdx, %rcx |
| ; AVX512VBMI-NEXT: kmovd %ecx, %k2 |
| ; AVX512VBMI-NEXT: vpternlogq {{.*#+}} zmm0 {%k2} {z} = -1 |
| ; AVX512VBMI-NEXT: vpbroadcastq %rsi, %zmm0 {%k1} |
| ; AVX512VBMI-NEXT: vmovdqu64 %zmm0, (%rax) |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| %s = shl i512 1, 511 |
| %r = ashr i512 %s, %a0 |
| ret i512 %r |
| } |
| |
| define i512 @shl_allbits_i512(i512 %a0) nounwind { |
| ; SSE-LABEL: shl_allbits_i512: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: pushq %r14 |
| ; SSE-NEXT: pushq %rbx |
| ; SSE-NEXT: pushq %rax |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %esi, %ecx |
| ; SSE-NEXT: andl $63, %ecx |
| ; SSE-NEXT: shrl $3, %esi |
| ; SSE-NEXT: andl $56, %esi |
| ; SSE-NEXT: negl %esi |
| ; SSE-NEXT: movslq %esi, %rax |
| ; SSE-NEXT: movq -56(%rsp,%rax), %rdx |
| ; SSE-NEXT: movq -48(%rsp,%rax), %r9 |
| ; SSE-NEXT: movq %r9, %rsi |
| ; SSE-NEXT: shldq %cl, %rdx, %rsi |
| ; SSE-NEXT: movq -40(%rsp,%rax), %r10 |
| ; SSE-NEXT: movq %r10, %r8 |
| ; SSE-NEXT: shldq %cl, %r9, %r8 |
| ; SSE-NEXT: movq -32(%rsp,%rax), %r9 |
| ; SSE-NEXT: movq %r9, %r11 |
| ; SSE-NEXT: shldq %cl, %r10, %r11 |
| ; SSE-NEXT: movq -24(%rsp,%rax), %r10 |
| ; SSE-NEXT: movq %r10, %rbx |
| ; SSE-NEXT: shldq %cl, %r9, %rbx |
| ; SSE-NEXT: movq -16(%rsp,%rax), %r9 |
| ; SSE-NEXT: movq %r9, %r14 |
| ; SSE-NEXT: shldq %cl, %r10, %r14 |
| ; SSE-NEXT: movq -8(%rsp,%rax), %r10 |
| ; SSE-NEXT: shldq %cl, %r9, %r10 |
| ; SSE-NEXT: movq -64(%rsp,%rax), %rax |
| ; SSE-NEXT: movq %rax, %r9 |
| ; SSE-NEXT: shlq %cl, %r9 |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; SSE-NEXT: shldq %cl, %rax, %rdx |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: movq %r10, 56(%rdi) |
| ; SSE-NEXT: movq %r14, 48(%rdi) |
| ; SSE-NEXT: movq %rbx, 40(%rdi) |
| ; SSE-NEXT: movq %r11, 32(%rdi) |
| ; SSE-NEXT: movq %r8, 24(%rdi) |
| ; SSE-NEXT: movq %rsi, 16(%rdi) |
| ; SSE-NEXT: movq %rdx, 8(%rdi) |
| ; SSE-NEXT: movq %r9, (%rdi) |
| ; SSE-NEXT: addq $8, %rsp |
| ; SSE-NEXT: popq %rbx |
| ; SSE-NEXT: popq %r14 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: shl_allbits_i512: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: pushq %r14 |
| ; AVX2-NEXT: pushq %rbx |
| ; AVX2-NEXT: pushq %rax |
| ; AVX2-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 |
| ; AVX2-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %esi, %ecx |
| ; AVX2-NEXT: andl $63, %ecx |
| ; AVX2-NEXT: shrl $3, %esi |
| ; AVX2-NEXT: andl $56, %esi |
| ; AVX2-NEXT: negl %esi |
| ; AVX2-NEXT: movslq %esi, %r8 |
| ; AVX2-NEXT: movq -56(%rsp,%r8), %rdx |
| ; AVX2-NEXT: movq -48(%rsp,%r8), %rax |
| ; AVX2-NEXT: movq %rax, %rsi |
| ; AVX2-NEXT: shldq %cl, %rdx, %rsi |
| ; AVX2-NEXT: movq -40(%rsp,%r8), %r10 |
| ; AVX2-NEXT: movq %r10, %r9 |
| ; AVX2-NEXT: shldq %cl, %rax, %r9 |
| ; AVX2-NEXT: movq -32(%rsp,%r8), %rax |
| ; AVX2-NEXT: movq %rax, %r11 |
| ; AVX2-NEXT: shldq %cl, %r10, %r11 |
| ; AVX2-NEXT: movq -24(%rsp,%r8), %r10 |
| ; AVX2-NEXT: movq %r10, %rbx |
| ; AVX2-NEXT: shldq %cl, %rax, %rbx |
| ; AVX2-NEXT: movq -16(%rsp,%r8), %rax |
| ; AVX2-NEXT: movq %rax, %r14 |
| ; AVX2-NEXT: shldq %cl, %r10, %r14 |
| ; AVX2-NEXT: movq -8(%rsp,%r8), %r10 |
| ; AVX2-NEXT: shldq %cl, %rax, %r10 |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: movq -64(%rsp,%r8), %rdi |
| ; AVX2-NEXT: shlxq %rcx, %rdi, %r8 |
| ; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX2-NEXT: shldq %cl, %rdi, %rdx |
| ; AVX2-NEXT: movq %r10, 56(%rax) |
| ; AVX2-NEXT: movq %r14, 48(%rax) |
| ; AVX2-NEXT: movq %rbx, 40(%rax) |
| ; AVX2-NEXT: movq %r11, 32(%rax) |
| ; AVX2-NEXT: movq %r9, 24(%rax) |
| ; AVX2-NEXT: movq %rsi, 16(%rax) |
| ; AVX2-NEXT: movq %rdx, 8(%rax) |
| ; AVX2-NEXT: movq %r8, (%rax) |
| ; AVX2-NEXT: addq $8, %rsp |
| ; AVX2-NEXT: popq %rbx |
| ; AVX2-NEXT: popq %r14 |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: shl_allbits_i512: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: movq $-1, %rcx |
| ; AVX512F-NEXT: shlxq %rsi, %rcx, %rdx |
| ; AVX512F-NEXT: shrl $6, %esi |
| ; AVX512F-NEXT: movl $1, %edi |
| ; AVX512F-NEXT: shlxq %rsi, %rdi, %rdi |
| ; AVX512F-NEXT: kmovw %edi, %k1 |
| ; AVX512F-NEXT: shlxq %rsi, %rcx, %rcx |
| ; AVX512F-NEXT: kmovw %ecx, %k2 |
| ; AVX512F-NEXT: vpternlogq {{.*#+}} zmm0 {%k2} {z} = -1 |
| ; AVX512F-NEXT: vpbroadcastq %rdx, %zmm0 {%k1} |
| ; AVX512F-NEXT: vmovdqu64 %zmm0, (%rax) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: shl_allbits_i512: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: movq $-1, %rcx |
| ; AVX512VL-NEXT: shlxq %rsi, %rcx, %rdx |
| ; AVX512VL-NEXT: shrl $6, %esi |
| ; AVX512VL-NEXT: movl $1, %edi |
| ; AVX512VL-NEXT: shlxq %rsi, %rdi, %rdi |
| ; AVX512VL-NEXT: kmovd %edi, %k1 |
| ; AVX512VL-NEXT: shlxq %rsi, %rcx, %rcx |
| ; AVX512VL-NEXT: kmovd %ecx, %k2 |
| ; AVX512VL-NEXT: vpternlogq {{.*#+}} zmm0 {%k2} {z} = -1 |
| ; AVX512VL-NEXT: vpbroadcastq %rdx, %zmm0 {%k1} |
| ; AVX512VL-NEXT: vmovdqu64 %zmm0, (%rax) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: shl_allbits_i512: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: movq $-1, %rcx |
| ; AVX512VBMI-NEXT: shlxq %rsi, %rcx, %rdx |
| ; AVX512VBMI-NEXT: shrl $6, %esi |
| ; AVX512VBMI-NEXT: movl $1, %edi |
| ; AVX512VBMI-NEXT: shlxq %rsi, %rdi, %rdi |
| ; AVX512VBMI-NEXT: kmovd %edi, %k1 |
| ; AVX512VBMI-NEXT: shlxq %rsi, %rcx, %rcx |
| ; AVX512VBMI-NEXT: kmovd %ecx, %k2 |
| ; AVX512VBMI-NEXT: vpternlogq {{.*#+}} zmm0 {%k2} {z} = -1 |
| ; AVX512VBMI-NEXT: vpbroadcastq %rdx, %zmm0 {%k1} |
| ; AVX512VBMI-NEXT: vmovdqu64 %zmm0, (%rax) |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| %r = shl i512 -1, %a0 |
| ret i512 %r |
| } |
| |
| define i512 @lshr_allbits_i512(i512 %a0) nounwind { |
| ; SSE-LABEL: lshr_allbits_i512: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: pushq %r14 |
| ; SSE-NEXT: pushq %rbx |
| ; SSE-NEXT: pushq %rax |
| ; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %esi, %ecx |
| ; SSE-NEXT: andl $63, %ecx |
| ; SSE-NEXT: shrl $3, %esi |
| ; SSE-NEXT: andl $56, %esi |
| ; SSE-NEXT: movq -112(%rsp,%rsi), %rdx |
| ; SSE-NEXT: movq -120(%rsp,%rsi), %rax |
| ; SSE-NEXT: movq %rax, %r8 |
| ; SSE-NEXT: shrdq %cl, %rdx, %r8 |
| ; SSE-NEXT: movq -104(%rsp,%rsi), %r9 |
| ; SSE-NEXT: shrdq %cl, %r9, %rdx |
| ; SSE-NEXT: movq -96(%rsp,%rsi), %r10 |
| ; SSE-NEXT: shrdq %cl, %r10, %r9 |
| ; SSE-NEXT: movq -88(%rsp,%rsi), %r11 |
| ; SSE-NEXT: shrdq %cl, %r11, %r10 |
| ; SSE-NEXT: movq -80(%rsp,%rsi), %rbx |
| ; SSE-NEXT: shrdq %cl, %rbx, %r11 |
| ; SSE-NEXT: movq -72(%rsp,%rsi), %r14 |
| ; SSE-NEXT: shrdq %cl, %r14, %rbx |
| ; SSE-NEXT: movq -128(%rsp,%rsi), %rsi |
| ; SSE-NEXT: shrdq %cl, %rax, %rsi |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; SSE-NEXT: shrq %cl, %r14 |
| ; SSE-NEXT: movq %r14, 56(%rdi) |
| ; SSE-NEXT: movq %rbx, 48(%rdi) |
| ; SSE-NEXT: movq %r11, 40(%rdi) |
| ; SSE-NEXT: movq %r10, 32(%rdi) |
| ; SSE-NEXT: movq %r9, 24(%rdi) |
| ; SSE-NEXT: movq %rdx, 16(%rdi) |
| ; SSE-NEXT: movq %r8, 8(%rdi) |
| ; SSE-NEXT: movq %rsi, (%rdi) |
| ; SSE-NEXT: addq $8, %rsp |
| ; SSE-NEXT: popq %rbx |
| ; SSE-NEXT: popq %r14 |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: lshr_allbits_i512: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: pushq %r14 |
| ; AVX2-NEXT: pushq %rbx |
| ; AVX2-NEXT: pushq %rax |
| ; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 |
| ; AVX2-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %esi, %ecx |
| ; AVX2-NEXT: andl $63, %ecx |
| ; AVX2-NEXT: shrl $3, %esi |
| ; AVX2-NEXT: andl $56, %esi |
| ; AVX2-NEXT: movq -112(%rsp,%rsi), %rdx |
| ; AVX2-NEXT: movq -120(%rsp,%rsi), %rax |
| ; AVX2-NEXT: movq %rax, %r8 |
| ; AVX2-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX2-NEXT: movq -104(%rsp,%rsi), %r9 |
| ; AVX2-NEXT: shrdq %cl, %r9, %rdx |
| ; AVX2-NEXT: movq -96(%rsp,%rsi), %r10 |
| ; AVX2-NEXT: shrdq %cl, %r10, %r9 |
| ; AVX2-NEXT: movq -88(%rsp,%rsi), %r11 |
| ; AVX2-NEXT: shrdq %cl, %r11, %r10 |
| ; AVX2-NEXT: movq -80(%rsp,%rsi), %rbx |
| ; AVX2-NEXT: shrdq %cl, %rbx, %r11 |
| ; AVX2-NEXT: movq -128(%rsp,%rsi), %r14 |
| ; AVX2-NEXT: movq -72(%rsp,%rsi), %rsi |
| ; AVX2-NEXT: shrdq %cl, %rsi, %rbx |
| ; AVX2-NEXT: shrdq %cl, %rax, %r14 |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: shrxq %rcx, %rsi, %rcx |
| ; AVX2-NEXT: movq %rcx, 56(%rdi) |
| ; AVX2-NEXT: movq %rbx, 48(%rdi) |
| ; AVX2-NEXT: movq %r11, 40(%rdi) |
| ; AVX2-NEXT: movq %r10, 32(%rdi) |
| ; AVX2-NEXT: movq %r9, 24(%rdi) |
| ; AVX2-NEXT: movq %rdx, 16(%rdi) |
| ; AVX2-NEXT: movq %r8, 8(%rdi) |
| ; AVX2-NEXT: movq %r14, (%rdi) |
| ; AVX2-NEXT: addq $8, %rsp |
| ; AVX2-NEXT: popq %rbx |
| ; AVX2-NEXT: popq %r14 |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: lshr_allbits_i512: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: movq $-1, %rcx |
| ; AVX512F-NEXT: shrxq %rsi, %rcx, %rcx |
| ; AVX512F-NEXT: shrl $6, %esi |
| ; AVX512F-NEXT: movl $128, %edx |
| ; AVX512F-NEXT: shrxq %rsi, %rdx, %rdx |
| ; AVX512F-NEXT: kmovw %edx, %k1 |
| ; AVX512F-NEXT: movl $255, %edx |
| ; AVX512F-NEXT: shrxq %rsi, %rdx, %rdx |
| ; AVX512F-NEXT: kmovw %edx, %k2 |
| ; AVX512F-NEXT: vpternlogq {{.*#+}} zmm0 {%k2} {z} = -1 |
| ; AVX512F-NEXT: vpbroadcastq %rcx, %zmm0 {%k1} |
| ; AVX512F-NEXT: vmovdqu64 %zmm0, (%rdi) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: lshr_allbits_i512: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: movq $-1, %rcx |
| ; AVX512VL-NEXT: shrxq %rsi, %rcx, %rcx |
| ; AVX512VL-NEXT: shrl $6, %esi |
| ; AVX512VL-NEXT: movl $128, %edx |
| ; AVX512VL-NEXT: shrxq %rsi, %rdx, %rdx |
| ; AVX512VL-NEXT: kmovd %edx, %k1 |
| ; AVX512VL-NEXT: movl $255, %edx |
| ; AVX512VL-NEXT: shrxq %rsi, %rdx, %rdx |
| ; AVX512VL-NEXT: kmovd %edx, %k2 |
| ; AVX512VL-NEXT: vpternlogq {{.*#+}} zmm0 {%k2} {z} = -1 |
| ; AVX512VL-NEXT: vpbroadcastq %rcx, %zmm0 {%k1} |
| ; AVX512VL-NEXT: vmovdqu64 %zmm0, (%rdi) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: lshr_allbits_i512: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: movq $-1, %rcx |
| ; AVX512VBMI-NEXT: shrxq %rsi, %rcx, %rcx |
| ; AVX512VBMI-NEXT: shrl $6, %esi |
| ; AVX512VBMI-NEXT: movl $128, %edx |
| ; AVX512VBMI-NEXT: shrxq %rsi, %rdx, %rdx |
| ; AVX512VBMI-NEXT: kmovd %edx, %k1 |
| ; AVX512VBMI-NEXT: movl $255, %edx |
| ; AVX512VBMI-NEXT: shrxq %rsi, %rdx, %rdx |
| ; AVX512VBMI-NEXT: kmovd %edx, %k2 |
| ; AVX512VBMI-NEXT: vpternlogq {{.*#+}} zmm0 {%k2} {z} = -1 |
| ; AVX512VBMI-NEXT: vpbroadcastq %rcx, %zmm0 {%k1} |
| ; AVX512VBMI-NEXT: vmovdqu64 %zmm0, (%rdi) |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| %r = lshr i512 -1, %a0 |
| ret i512 %r |
| } |
| |
| define i64 @lshr_extract_i512_i64(i512 %a0, i512 %a1) nounwind { |
| ; SSE-LABEL: lshr_extract_i512_i64: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: pushq %rax |
| ; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| ; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 |
| ; SSE-NEXT: xorps %xmm1, %xmm1 |
| ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %r9, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %r10d, %ecx |
| ; SSE-NEXT: shrl $3, %ecx |
| ; SSE-NEXT: andl $56, %ecx |
| ; SSE-NEXT: movq -128(%rsp,%rcx), %rax |
| ; SSE-NEXT: movq -120(%rsp,%rcx), %rdx |
| ; SSE-NEXT: movl %r10d, %ecx |
| ; SSE-NEXT: shrdq %cl, %rdx, %rax |
| ; SSE-NEXT: popq %rcx |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: lshr_extract_i512_i64: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: pushq %rax |
| ; AVX2-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| ; AVX2-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 |
| ; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 |
| ; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %r9, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %r10d, %ecx |
| ; AVX2-NEXT: shrl $3, %ecx |
| ; AVX2-NEXT: andl $56, %ecx |
| ; AVX2-NEXT: movq -128(%rsp,%rcx), %rax |
| ; AVX2-NEXT: movq -120(%rsp,%rcx), %rdx |
| ; AVX2-NEXT: movl %r10d, %ecx |
| ; AVX2-NEXT: shrdq %cl, %rdx, %rax |
| ; AVX2-NEXT: popq %rcx |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: lshr_extract_i512_i64: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: pushq %rax |
| ; AVX512F-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| ; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 |
| ; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 |
| ; AVX512F-NEXT: vmovups %zmm1, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %r9, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movl %r10d, %ecx |
| ; AVX512F-NEXT: shrl $3, %ecx |
| ; AVX512F-NEXT: andl $56, %ecx |
| ; AVX512F-NEXT: movq -128(%rsp,%rcx), %rax |
| ; AVX512F-NEXT: movq -120(%rsp,%rcx), %rdx |
| ; AVX512F-NEXT: movl %r10d, %ecx |
| ; AVX512F-NEXT: shrdq %cl, %rdx, %rax |
| ; AVX512F-NEXT: popq %rcx |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: lshr_extract_i512_i64: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: pushq %rax |
| ; AVX512VL-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| ; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 |
| ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 |
| ; AVX512VL-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %r9, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movl %r10d, %ecx |
| ; AVX512VL-NEXT: shrl $3, %ecx |
| ; AVX512VL-NEXT: andl $56, %ecx |
| ; AVX512VL-NEXT: movq -128(%rsp,%rcx), %rax |
| ; AVX512VL-NEXT: movq -120(%rsp,%rcx), %rdx |
| ; AVX512VL-NEXT: movl %r10d, %ecx |
| ; AVX512VL-NEXT: shrdq %cl, %rdx, %rax |
| ; AVX512VL-NEXT: popq %rcx |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: lshr_extract_i512_i64: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: pushq %rax |
| ; AVX512VBMI-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| ; AVX512VBMI-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 |
| ; AVX512VBMI-NEXT: vxorps %xmm1, %xmm1, %xmm1 |
| ; AVX512VBMI-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %r9, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movl %r10d, %ecx |
| ; AVX512VBMI-NEXT: shrl $3, %ecx |
| ; AVX512VBMI-NEXT: andl $56, %ecx |
| ; AVX512VBMI-NEXT: movq -128(%rsp,%rcx), %rax |
| ; AVX512VBMI-NEXT: movq -120(%rsp,%rcx), %rdx |
| ; AVX512VBMI-NEXT: movl %r10d, %ecx |
| ; AVX512VBMI-NEXT: shrdq %cl, %rdx, %rax |
| ; AVX512VBMI-NEXT: popq %rcx |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| %b = lshr i512 %a0, %a1 |
| %r = trunc i512 %b to i64 |
| ret i64 %r |
| } |
| |
| define i64 @ashr_extract_i512_i64(i512 %a0, i512 %a1) nounwind { |
| ; CHECK-LABEL: ashr_extract_i512_i64: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: pushq %rax |
| ; CHECK-NEXT: movq %rcx, %rax |
| ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx |
| ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r10 |
| ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r11 |
| ; CHECK-NEXT: movq %r11, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: movq %r10, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: movq %r9, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: sarq $63, %r11 |
| ; CHECK-NEXT: movq %r11, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: movq %r11, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: movq %r11, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: movq %r11, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: movq %r11, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: movq %r11, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: movq %r11, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: movq %r11, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: movl %ecx, %edx |
| ; CHECK-NEXT: shrl $3, %edx |
| ; CHECK-NEXT: andl $56, %edx |
| ; CHECK-NEXT: movq -128(%rsp,%rdx), %rax |
| ; CHECK-NEXT: movq -120(%rsp,%rdx), %rdx |
| ; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; CHECK-NEXT: shrdq %cl, %rdx, %rax |
| ; CHECK-NEXT: popq %rcx |
| ; CHECK-NEXT: retq |
| %b = ashr i512 %a0, %a1 |
| %r = trunc i512 %b to i64 |
| ret i64 %r |
| } |
| |
| define i64 @lshr_extract_load_i512_i64(ptr %p0, i512 %a1) nounwind { |
| ; SSE-LABEL: lshr_extract_load_i512_i64: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: pushq %rax |
| ; SSE-NEXT: movq %rsi, %rcx |
| ; SSE-NEXT: movaps (%rdi), %xmm0 |
| ; SSE-NEXT: movaps 16(%rdi), %xmm1 |
| ; SSE-NEXT: movaps 32(%rdi), %xmm2 |
| ; SSE-NEXT: movaps 48(%rdi), %xmm3 |
| ; SSE-NEXT: xorps %xmm4, %xmm4 |
| ; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %ecx, %edx |
| ; SSE-NEXT: shrl $3, %edx |
| ; SSE-NEXT: andl $56, %edx |
| ; SSE-NEXT: movq -128(%rsp,%rdx), %rax |
| ; SSE-NEXT: movq -120(%rsp,%rdx), %rdx |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; SSE-NEXT: shrdq %cl, %rdx, %rax |
| ; SSE-NEXT: popq %rcx |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: lshr_extract_load_i512_i64: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: pushq %rax |
| ; AVX2-NEXT: movq %rsi, %rcx |
| ; AVX2-NEXT: vmovups (%rdi), %ymm0 |
| ; AVX2-NEXT: vmovups 32(%rdi), %ymm1 |
| ; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 |
| ; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %ecx, %edx |
| ; AVX2-NEXT: shrl $3, %edx |
| ; AVX2-NEXT: andl $56, %edx |
| ; AVX2-NEXT: movq -128(%rsp,%rdx), %rax |
| ; AVX2-NEXT: movq -120(%rsp,%rdx), %rdx |
| ; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX2-NEXT: shrdq %cl, %rdx, %rax |
| ; AVX2-NEXT: popq %rcx |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: lshr_extract_load_i512_i64: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: movq %rsi, %rcx |
| ; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 |
| ; AVX512F-NEXT: movl %ecx, %eax |
| ; AVX512F-NEXT: shrl $6, %eax |
| ; AVX512F-NEXT: movl $-1, %edx |
| ; AVX512F-NEXT: shlxl %eax, %edx, %eax |
| ; AVX512F-NEXT: kmovw %eax, %k1 |
| ; AVX512F-NEXT: vpcompressq %zmm0, %zmm0 {%k1} {z} |
| ; AVX512F-NEXT: vpextrq $1, %xmm0, %rdx |
| ; AVX512F-NEXT: vmovq %xmm0, %rax |
| ; AVX512F-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX512F-NEXT: shrdq %cl, %rdx, %rax |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: lshr_extract_load_i512_i64: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: movq %rsi, %rcx |
| ; AVX512VL-NEXT: vmovdqu64 (%rdi), %zmm0 |
| ; AVX512VL-NEXT: movl %ecx, %eax |
| ; AVX512VL-NEXT: shrl $6, %eax |
| ; AVX512VL-NEXT: movl $-1, %edx |
| ; AVX512VL-NEXT: shlxl %eax, %edx, %eax |
| ; AVX512VL-NEXT: kmovd %eax, %k1 |
| ; AVX512VL-NEXT: vpcompressq %zmm0, %zmm0 {%k1} {z} |
| ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rdx |
| ; AVX512VL-NEXT: vmovq %xmm0, %rax |
| ; AVX512VL-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX512VL-NEXT: shrdq %cl, %rdx, %rax |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: lshr_extract_load_i512_i64: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: movq %rsi, %rcx |
| ; AVX512VBMI-NEXT: vmovdqu64 (%rdi), %zmm0 |
| ; AVX512VBMI-NEXT: movl %ecx, %eax |
| ; AVX512VBMI-NEXT: shrl $6, %eax |
| ; AVX512VBMI-NEXT: movl $-1, %edx |
| ; AVX512VBMI-NEXT: shlxl %eax, %edx, %eax |
| ; AVX512VBMI-NEXT: kmovd %eax, %k1 |
| ; AVX512VBMI-NEXT: vpcompressq %zmm0, %zmm0 {%k1} {z} |
| ; AVX512VBMI-NEXT: vpextrq $1, %xmm0, %rdx |
| ; AVX512VBMI-NEXT: vmovq %xmm0, %rax |
| ; AVX512VBMI-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX512VBMI-NEXT: shrdq %cl, %rdx, %rax |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| %a0 = load i512, ptr %p0 |
| %b = lshr i512 %a0, %a1 |
| %r = trunc i512 %b to i64 |
| ret i64 %r |
| } |
| |
| define i64 @ashr_extract_load_i512_i64(ptr %p0, i512 %a1) nounwind { |
| ; SSE-LABEL: ashr_extract_load_i512_i64: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: pushq %rax |
| ; SSE-NEXT: movq %rsi, %rcx |
| ; SSE-NEXT: movaps (%rdi), %xmm0 |
| ; SSE-NEXT: movaps 16(%rdi), %xmm1 |
| ; SSE-NEXT: movaps 32(%rdi), %xmm2 |
| ; SSE-NEXT: movq 48(%rdi), %rax |
| ; SSE-NEXT: movq 56(%rdi), %rdx |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: sarq $63, %rdx |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %ecx, %edx |
| ; SSE-NEXT: shrl $3, %edx |
| ; SSE-NEXT: andl $56, %edx |
| ; SSE-NEXT: movq -128(%rsp,%rdx), %rax |
| ; SSE-NEXT: movq -120(%rsp,%rdx), %rdx |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; SSE-NEXT: shrdq %cl, %rdx, %rax |
| ; SSE-NEXT: popq %rcx |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: ashr_extract_load_i512_i64: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: pushq %rax |
| ; AVX2-NEXT: movq %rsi, %rcx |
| ; AVX2-NEXT: vmovups (%rdi), %ymm0 |
| ; AVX2-NEXT: vmovaps 32(%rdi), %xmm1 |
| ; AVX2-NEXT: movq 48(%rdi), %rax |
| ; AVX2-NEXT: movq 56(%rdi), %rdx |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: sarq $63, %rdx |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %ecx, %edx |
| ; AVX2-NEXT: shrl $3, %edx |
| ; AVX2-NEXT: andl $56, %edx |
| ; AVX2-NEXT: movq -128(%rsp,%rdx), %rax |
| ; AVX2-NEXT: movq -120(%rsp,%rdx), %rdx |
| ; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX2-NEXT: shrdq %cl, %rdx, %rax |
| ; AVX2-NEXT: popq %rcx |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: ashr_extract_load_i512_i64: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: movq %rsi, %rcx |
| ; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 |
| ; AVX512F-NEXT: vpsraq $63, %zmm0, %zmm1 |
| ; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm2 = [7,7,7,7,7,7,7,7] |
| ; AVX512F-NEXT: vpermq %zmm1, %zmm2, %zmm1 |
| ; AVX512F-NEXT: movl %ecx, %eax |
| ; AVX512F-NEXT: shrl $6, %eax |
| ; AVX512F-NEXT: movl $-1, %edx |
| ; AVX512F-NEXT: shlxl %eax, %edx, %eax |
| ; AVX512F-NEXT: kmovw %eax, %k1 |
| ; AVX512F-NEXT: vpcompressq %zmm0, %zmm1 {%k1} |
| ; AVX512F-NEXT: vpextrq $1, %xmm1, %rdx |
| ; AVX512F-NEXT: vmovq %xmm1, %rax |
| ; AVX512F-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX512F-NEXT: shrdq %cl, %rdx, %rax |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: ashr_extract_load_i512_i64: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vmovdqu64 (%rdi), %zmm0 |
| ; AVX512VL-NEXT: movq %rsi, %rcx |
| ; AVX512VL-NEXT: vpsraq $63, %zmm0, %zmm1 |
| ; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm2 = [7,7,7,7,7,7,7,7] |
| ; AVX512VL-NEXT: vpermq %zmm1, %zmm2, %zmm1 |
| ; AVX512VL-NEXT: movl %ecx, %eax |
| ; AVX512VL-NEXT: shrl $6, %eax |
| ; AVX512VL-NEXT: movl $-1, %edx |
| ; AVX512VL-NEXT: shlxl %eax, %edx, %eax |
| ; AVX512VL-NEXT: kmovd %eax, %k1 |
| ; AVX512VL-NEXT: vpcompressq %zmm0, %zmm1 {%k1} |
| ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rdx |
| ; AVX512VL-NEXT: vmovq %xmm1, %rax |
| ; AVX512VL-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX512VL-NEXT: shrdq %cl, %rdx, %rax |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: ashr_extract_load_i512_i64: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: vmovdqu64 (%rdi), %zmm0 |
| ; AVX512VBMI-NEXT: movq %rsi, %rcx |
| ; AVX512VBMI-NEXT: vpsraq $63, %zmm0, %zmm1 |
| ; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} zmm2 = [7,7,7,7,7,7,7,7] |
| ; AVX512VBMI-NEXT: vpermq %zmm1, %zmm2, %zmm1 |
| ; AVX512VBMI-NEXT: movl %ecx, %eax |
| ; AVX512VBMI-NEXT: shrl $6, %eax |
| ; AVX512VBMI-NEXT: movl $-1, %edx |
| ; AVX512VBMI-NEXT: shlxl %eax, %edx, %eax |
| ; AVX512VBMI-NEXT: kmovd %eax, %k1 |
| ; AVX512VBMI-NEXT: vpcompressq %zmm0, %zmm1 {%k1} |
| ; AVX512VBMI-NEXT: vpextrq $1, %xmm1, %rdx |
| ; AVX512VBMI-NEXT: vmovq %xmm1, %rax |
| ; AVX512VBMI-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX512VBMI-NEXT: shrdq %cl, %rdx, %rax |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| %a0 = load i512, ptr %p0 |
| %b = ashr i512 %a0, %a1 |
| %r = trunc i512 %b to i64 |
| ret i64 %r |
| } |
| |
| define i64 @lshr_extract_idx_load_i512_i64(ptr %p0, i512 %a1) nounwind { |
| ; SSE-LABEL: lshr_extract_idx_load_i512_i64: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: pushq %rax |
| ; SSE-NEXT: movaps (%rdi), %xmm0 |
| ; SSE-NEXT: movaps 16(%rdi), %xmm1 |
| ; SSE-NEXT: movaps 32(%rdi), %xmm2 |
| ; SSE-NEXT: movaps 48(%rdi), %xmm3 |
| ; SSE-NEXT: xorps %xmm4, %xmm4 |
| ; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm4, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm3, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: andl $7, %esi |
| ; SSE-NEXT: movq -128(%rsp,%rsi,8), %rax |
| ; SSE-NEXT: popq %rcx |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: lshr_extract_idx_load_i512_i64: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: pushq %rax |
| ; AVX2-NEXT: vmovups (%rdi), %ymm0 |
| ; AVX2-NEXT: vmovups 32(%rdi), %ymm1 |
| ; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2 |
| ; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm2, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: andl $7, %esi |
| ; AVX2-NEXT: movq -128(%rsp,%rsi,8), %rax |
| ; AVX2-NEXT: popq %rcx |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: lshr_extract_idx_load_i512_i64: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 |
| ; AVX512F-NEXT: movl $-1, %eax |
| ; AVX512F-NEXT: shlxl %esi, %eax, %eax |
| ; AVX512F-NEXT: kmovw %eax, %k1 |
| ; AVX512F-NEXT: vpcompressq %zmm0, %zmm0 {%k1} {z} |
| ; AVX512F-NEXT: vmovq %xmm0, %rax |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: lshr_extract_idx_load_i512_i64: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vmovdqu64 (%rdi), %zmm0 |
| ; AVX512VL-NEXT: movl $-1, %eax |
| ; AVX512VL-NEXT: shlxl %esi, %eax, %eax |
| ; AVX512VL-NEXT: kmovd %eax, %k1 |
| ; AVX512VL-NEXT: vpcompressq %zmm0, %zmm0 {%k1} {z} |
| ; AVX512VL-NEXT: vmovq %xmm0, %rax |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: lshr_extract_idx_load_i512_i64: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: vmovdqu64 (%rdi), %zmm0 |
| ; AVX512VBMI-NEXT: movl $-1, %eax |
| ; AVX512VBMI-NEXT: shlxl %esi, %eax, %eax |
| ; AVX512VBMI-NEXT: kmovd %eax, %k1 |
| ; AVX512VBMI-NEXT: vpcompressq %zmm0, %zmm0 {%k1} {z} |
| ; AVX512VBMI-NEXT: vmovq %xmm0, %rax |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| %a0 = load i512, ptr %p0 |
| %m1 = mul i512 %a1, 64 |
| %b = lshr i512 %a0, %m1 |
| %r = trunc i512 %b to i64 |
| ret i64 %r |
| } |
| |
| define i64 @ashr_extract_idx_load_i512_i64(ptr %p0, i512 %a1) nounwind { |
| ; SSE-LABEL: ashr_extract_idx_load_i512_i64: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: pushq %rax |
| ; SSE-NEXT: movaps (%rdi), %xmm0 |
| ; SSE-NEXT: movaps 16(%rdi), %xmm1 |
| ; SSE-NEXT: movaps 32(%rdi), %xmm2 |
| ; SSE-NEXT: movq 48(%rdi), %rax |
| ; SSE-NEXT: movq 56(%rdi), %rcx |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: sarq $63, %rcx |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: andl $7, %esi |
| ; SSE-NEXT: movq -128(%rsp,%rsi,8), %rax |
| ; SSE-NEXT: popq %rcx |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: ashr_extract_idx_load_i512_i64: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: pushq %rax |
| ; AVX2-NEXT: vmovups (%rdi), %ymm0 |
| ; AVX2-NEXT: vmovaps 32(%rdi), %xmm1 |
| ; AVX2-NEXT: movq 48(%rdi), %rax |
| ; AVX2-NEXT: movq 56(%rdi), %rcx |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: sarq $63, %rcx |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: andl $7, %esi |
| ; AVX2-NEXT: movq -128(%rsp,%rsi,8), %rax |
| ; AVX2-NEXT: popq %rcx |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: ashr_extract_idx_load_i512_i64: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm0 |
| ; AVX512F-NEXT: vpsraq $63, %zmm0, %zmm1 |
| ; AVX512F-NEXT: vpbroadcastq {{.*#+}} zmm2 = [7,7,7,7,7,7,7,7] |
| ; AVX512F-NEXT: vpermq %zmm1, %zmm2, %zmm1 |
| ; AVX512F-NEXT: movl $-1, %eax |
| ; AVX512F-NEXT: shlxl %esi, %eax, %eax |
| ; AVX512F-NEXT: kmovw %eax, %k1 |
| ; AVX512F-NEXT: vpcompressq %zmm0, %zmm1 {%k1} |
| ; AVX512F-NEXT: vmovq %xmm1, %rax |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: ashr_extract_idx_load_i512_i64: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vmovdqu64 (%rdi), %zmm0 |
| ; AVX512VL-NEXT: vpsraq $63, %zmm0, %zmm1 |
| ; AVX512VL-NEXT: vpbroadcastq {{.*#+}} zmm2 = [7,7,7,7,7,7,7,7] |
| ; AVX512VL-NEXT: vpermq %zmm1, %zmm2, %zmm1 |
| ; AVX512VL-NEXT: movl $-1, %eax |
| ; AVX512VL-NEXT: shlxl %esi, %eax, %eax |
| ; AVX512VL-NEXT: kmovd %eax, %k1 |
| ; AVX512VL-NEXT: vpcompressq %zmm0, %zmm1 {%k1} |
| ; AVX512VL-NEXT: vmovq %xmm1, %rax |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: ashr_extract_idx_load_i512_i64: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: vmovdqu64 (%rdi), %zmm0 |
| ; AVX512VBMI-NEXT: vpsraq $63, %zmm0, %zmm1 |
| ; AVX512VBMI-NEXT: vpbroadcastq {{.*#+}} zmm2 = [7,7,7,7,7,7,7,7] |
| ; AVX512VBMI-NEXT: vpermq %zmm1, %zmm2, %zmm1 |
| ; AVX512VBMI-NEXT: movl $-1, %eax |
| ; AVX512VBMI-NEXT: shlxl %esi, %eax, %eax |
| ; AVX512VBMI-NEXT: kmovd %eax, %k1 |
| ; AVX512VBMI-NEXT: vpcompressq %zmm0, %zmm1 {%k1} |
| ; AVX512VBMI-NEXT: vmovq %xmm1, %rax |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| %a0 = load i512, ptr %p0 |
| %m1 = mul i512 %a1, 64 |
| %b = ashr i512 %a0, %m1 |
| %r = trunc i512 %b to i64 |
| ret i64 %r |
| } |