| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s -check-prefixes=CHECK,SSE |
| ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s -check-prefixes=CHECK,SSE |
| ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s -check-prefixes=CHECK,AVX2 |
| ; RUN: llc < %s -mtriple=x86_64-- -mcpu=knl | FileCheck %s -check-prefixes=CHECK,AVX512,AVX512F |
| ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s -check-prefixes=CHECK,AVX512,AVX512VL |
| ; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 -mattr=+AVX512vbmi2 | FileCheck %s -check-prefixes=CHECK,AVX512,AVX512VBMI |
| ; RUN: llc < %s -mtriple=i686-- | FileCheck %s -check-prefixes=X86 |
| |
| define i256 @shl_i256(i256 %a0, i256 %a1) nounwind { |
| ; SSE-LABEL: shl_i256: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %r9d, %eax |
| ; SSE-NEXT: shrb $3, %al |
| ; SSE-NEXT: andb $24, %al |
| ; SSE-NEXT: negb %al |
| ; SSE-NEXT: movsbq %al, %rax |
| ; SSE-NEXT: movq -32(%rsp,%rax), %rdx |
| ; SSE-NEXT: movq -24(%rsp,%rax), %rsi |
| ; SSE-NEXT: movq %rsi, %r8 |
| ; SSE-NEXT: movl %r9d, %ecx |
| ; SSE-NEXT: shldq %cl, %rdx, %r8 |
| ; SSE-NEXT: movq -16(%rsp,%rax), %r10 |
| ; SSE-NEXT: shldq %cl, %rsi, %r10 |
| ; SSE-NEXT: movq -40(%rsp,%rax), %rax |
| ; SSE-NEXT: movq %rax, %rsi |
| ; SSE-NEXT: shlq %cl, %rsi |
| ; SSE-NEXT: shldq %cl, %rax, %rdx |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: movq %r10, 24(%rdi) |
| ; SSE-NEXT: movq %r8, 16(%rdi) |
| ; SSE-NEXT: movq %rdx, 8(%rdi) |
| ; SSE-NEXT: movq %rsi, (%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: shl_i256: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %r9d, %eax |
| ; AVX2-NEXT: shrb $3, %al |
| ; AVX2-NEXT: andb $24, %al |
| ; AVX2-NEXT: negb %al |
| ; AVX2-NEXT: movsbq %al, %rdx |
| ; AVX2-NEXT: movq -32(%rsp,%rdx), %rsi |
| ; AVX2-NEXT: movq -24(%rsp,%rdx), %rax |
| ; AVX2-NEXT: movq %rax, %r8 |
| ; AVX2-NEXT: movl %r9d, %ecx |
| ; AVX2-NEXT: shldq %cl, %rsi, %r8 |
| ; AVX2-NEXT: movq -16(%rsp,%rdx), %r10 |
| ; AVX2-NEXT: shldq %cl, %rax, %r10 |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: movq -40(%rsp,%rdx), %rdx |
| ; AVX2-NEXT: shlxq %r9, %rdx, %rdi |
| ; AVX2-NEXT: shldq %cl, %rdx, %rsi |
| ; AVX2-NEXT: movq %r10, 24(%rax) |
| ; AVX2-NEXT: movq %r8, 16(%rax) |
| ; AVX2-NEXT: movq %rsi, 8(%rax) |
| ; AVX2-NEXT: movq %rdi, (%rax) |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: shl_i256: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movl %r9d, %eax |
| ; AVX512F-NEXT: shrb $3, %al |
| ; AVX512F-NEXT: andb $24, %al |
| ; AVX512F-NEXT: negb %al |
| ; AVX512F-NEXT: movsbq %al, %rdx |
| ; AVX512F-NEXT: movq -32(%rsp,%rdx), %rsi |
| ; AVX512F-NEXT: movq -24(%rsp,%rdx), %rax |
| ; AVX512F-NEXT: movq %rax, %r8 |
| ; AVX512F-NEXT: movl %r9d, %ecx |
| ; AVX512F-NEXT: shldq %cl, %rsi, %r8 |
| ; AVX512F-NEXT: movq -16(%rsp,%rdx), %r10 |
| ; AVX512F-NEXT: shldq %cl, %rax, %r10 |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: movq -40(%rsp,%rdx), %rdx |
| ; AVX512F-NEXT: shlxq %r9, %rdx, %rdi |
| ; AVX512F-NEXT: shldq %cl, %rdx, %rsi |
| ; AVX512F-NEXT: movq %r10, 24(%rax) |
| ; AVX512F-NEXT: movq %r8, 16(%rax) |
| ; AVX512F-NEXT: movq %rsi, 8(%rax) |
| ; AVX512F-NEXT: movq %rdi, (%rax) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: shl_i256: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movl %r9d, %eax |
| ; AVX512VL-NEXT: shrb $3, %al |
| ; AVX512VL-NEXT: andb $24, %al |
| ; AVX512VL-NEXT: negb %al |
| ; AVX512VL-NEXT: movsbq %al, %rax |
| ; AVX512VL-NEXT: movq -32(%rsp,%rax), %rdx |
| ; AVX512VL-NEXT: movq -24(%rsp,%rax), %rsi |
| ; AVX512VL-NEXT: movq %rsi, %r8 |
| ; AVX512VL-NEXT: movl %r9d, %ecx |
| ; AVX512VL-NEXT: shldq %cl, %rdx, %r8 |
| ; AVX512VL-NEXT: movq -16(%rsp,%rax), %r10 |
| ; AVX512VL-NEXT: shldq %cl, %rsi, %r10 |
| ; AVX512VL-NEXT: movq -40(%rsp,%rax), %rsi |
| ; AVX512VL-NEXT: shldq %cl, %rsi, %rdx |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: shlxq %r9, %rsi, %rcx |
| ; AVX512VL-NEXT: movq %r10, 24(%rdi) |
| ; AVX512VL-NEXT: movq %r8, 16(%rdi) |
| ; AVX512VL-NEXT: movq %rdx, 8(%rdi) |
| ; AVX512VL-NEXT: movq %rcx, (%rdi) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: shl_i256: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movl %r9d, %eax |
| ; AVX512VBMI-NEXT: shrb $3, %al |
| ; AVX512VBMI-NEXT: andb $24, %al |
| ; AVX512VBMI-NEXT: negb %al |
| ; AVX512VBMI-NEXT: movsbq %al, %rax |
| ; AVX512VBMI-NEXT: movq -32(%rsp,%rax), %rdx |
| ; AVX512VBMI-NEXT: movq -24(%rsp,%rax), %rsi |
| ; AVX512VBMI-NEXT: movq %rsi, %r8 |
| ; AVX512VBMI-NEXT: movl %r9d, %ecx |
| ; AVX512VBMI-NEXT: shldq %cl, %rdx, %r8 |
| ; AVX512VBMI-NEXT: movq -16(%rsp,%rax), %r10 |
| ; AVX512VBMI-NEXT: shldq %cl, %rsi, %r10 |
| ; AVX512VBMI-NEXT: movq -40(%rsp,%rax), %rsi |
| ; AVX512VBMI-NEXT: shldq %cl, %rsi, %rdx |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: shlxq %r9, %rsi, %rcx |
| ; AVX512VBMI-NEXT: movq %r10, 24(%rdi) |
| ; AVX512VBMI-NEXT: movq %r8, 16(%rdi) |
| ; AVX512VBMI-NEXT: movq %rdx, 8(%rdi) |
| ; AVX512VBMI-NEXT: movq %rcx, (%rdi) |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| ; |
| ; X86-LABEL: shl_i256: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: movl %esp, %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: andl $-16, %esp |
| ; X86-NEXT: subl $112, %esp |
| ; X86-NEXT: movl 44(%ebp), %ecx |
| ; X86-NEXT: movl 12(%ebp), %eax |
| ; X86-NEXT: movl 16(%ebp), %edx |
| ; X86-NEXT: movl 20(%ebp), %esi |
| ; X86-NEXT: movl 40(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 36(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 32(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 28(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 24(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: shrb $3, %al |
| ; X86-NEXT: andb $28, %al |
| ; X86-NEXT: negb %al |
| ; X86-NEXT: movsbl %al, %eax |
| ; X86-NEXT: movl 68(%esp,%eax), %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 72(%esp,%eax), %edx |
| ; X86-NEXT: movl %edx, %edi |
| ; X86-NEXT: shldl %cl, %esi, %edi |
| ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 76(%esp,%eax), %esi |
| ; X86-NEXT: movl %esi, %edi |
| ; X86-NEXT: shldl %cl, %edx, %edi |
| ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 80(%esp,%eax), %edx |
| ; X86-NEXT: movl %edx, %edi |
| ; X86-NEXT: shldl %cl, %esi, %edi |
| ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 84(%esp,%eax), %esi |
| ; X86-NEXT: movl %esi, %ebx |
| ; X86-NEXT: shldl %cl, %edx, %ebx |
| ; X86-NEXT: movl 88(%esp,%eax), %edi |
| ; X86-NEXT: movl %edi, %edx |
| ; X86-NEXT: shldl %cl, %esi, %edx |
| ; X86-NEXT: movl 64(%esp,%eax), %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 92(%esp,%eax), %esi |
| ; X86-NEXT: shldl %cl, %edi, %esi |
| ; X86-NEXT: movl 8(%ebp), %eax |
| ; X86-NEXT: movl %esi, 28(%eax) |
| ; X86-NEXT: movl %edx, 24(%eax) |
| ; X86-NEXT: movl %ebx, 20(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: movl %edx, 16(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: movl %edx, 12(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: movl %edx, 8(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload |
| ; X86-NEXT: movl %edi, %edx |
| ; X86-NEXT: shll %cl, %edx |
| ; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| ; X86-NEXT: shldl %cl, %edi, %esi |
| ; X86-NEXT: movl %esi, 4(%eax) |
| ; X86-NEXT: movl %edx, (%eax) |
| ; X86-NEXT: leal -12(%ebp), %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl $4 |
| %r = shl i256 %a0, %a1 |
| ret i256 %r |
| } |
| |
| define i256 @lshr_i256(i256 %a0, i256 %a1) nounwind { |
| ; SSE-LABEL: lshr_i256: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %r9d, %eax |
| ; SSE-NEXT: shrb $6, %al |
| ; SSE-NEXT: movzbl %al, %eax |
| ; SSE-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; SSE-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; SSE-NEXT: movq %rsi, %r8 |
| ; SSE-NEXT: movl %r9d, %ecx |
| ; SSE-NEXT: shrdq %cl, %rdx, %r8 |
| ; SSE-NEXT: movq -48(%rsp,%rax,8), %r10 |
| ; SSE-NEXT: shrdq %cl, %r10, %rdx |
| ; SSE-NEXT: movq -72(%rsp,%rax,8), %r11 |
| ; SSE-NEXT: shrdq %cl, %rsi, %r11 |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: shrq %cl, %r10 |
| ; SSE-NEXT: movq %r10, 24(%rdi) |
| ; SSE-NEXT: movq %rdx, 16(%rdi) |
| ; SSE-NEXT: movq %r8, 8(%rdi) |
| ; SSE-NEXT: movq %r11, (%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: lshr_i256: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %r9d, %eax |
| ; AVX2-NEXT: shrb $6, %al |
| ; AVX2-NEXT: movzbl %al, %eax |
| ; AVX2-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX2-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX2-NEXT: movq %rsi, %r8 |
| ; AVX2-NEXT: movl %r9d, %ecx |
| ; AVX2-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX2-NEXT: movq -72(%rsp,%rax,8), %r10 |
| ; AVX2-NEXT: movq -48(%rsp,%rax,8), %r11 |
| ; AVX2-NEXT: shrdq %cl, %r11, %rdx |
| ; AVX2-NEXT: shrdq %cl, %rsi, %r10 |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: shrxq %r9, %r11, %rcx |
| ; AVX2-NEXT: movq %rcx, 24(%rdi) |
| ; AVX2-NEXT: movq %rdx, 16(%rdi) |
| ; AVX2-NEXT: movq %r8, 8(%rdi) |
| ; AVX2-NEXT: movq %r10, (%rdi) |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: lshr_i256: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movl %r9d, %eax |
| ; AVX512F-NEXT: shrb $6, %al |
| ; AVX512F-NEXT: movzbl %al, %eax |
| ; AVX512F-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX512F-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX512F-NEXT: movq %rsi, %r8 |
| ; AVX512F-NEXT: movl %r9d, %ecx |
| ; AVX512F-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX512F-NEXT: movq -72(%rsp,%rax,8), %r10 |
| ; AVX512F-NEXT: movq -48(%rsp,%rax,8), %r11 |
| ; AVX512F-NEXT: shrdq %cl, %r11, %rdx |
| ; AVX512F-NEXT: shrdq %cl, %rsi, %r10 |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: shrxq %r9, %r11, %rcx |
| ; AVX512F-NEXT: movq %rcx, 24(%rdi) |
| ; AVX512F-NEXT: movq %rdx, 16(%rdi) |
| ; AVX512F-NEXT: movq %r8, 8(%rdi) |
| ; AVX512F-NEXT: movq %r10, (%rdi) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: lshr_i256: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movl %r9d, %eax |
| ; AVX512VL-NEXT: shrb $6, %al |
| ; AVX512VL-NEXT: movzbl %al, %eax |
| ; AVX512VL-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX512VL-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX512VL-NEXT: movq %rsi, %r8 |
| ; AVX512VL-NEXT: movl %r9d, %ecx |
| ; AVX512VL-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX512VL-NEXT: movq -48(%rsp,%rax,8), %r10 |
| ; AVX512VL-NEXT: shrdq %cl, %r10, %rdx |
| ; AVX512VL-NEXT: movq -72(%rsp,%rax,8), %r11 |
| ; AVX512VL-NEXT: shrdq %cl, %rsi, %r11 |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: shrxq %r9, %r10, %rcx |
| ; AVX512VL-NEXT: movq %rcx, 24(%rdi) |
| ; AVX512VL-NEXT: movq %rdx, 16(%rdi) |
| ; AVX512VL-NEXT: movq %r8, 8(%rdi) |
| ; AVX512VL-NEXT: movq %r11, (%rdi) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: lshr_i256: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movl %r9d, %eax |
| ; AVX512VBMI-NEXT: shrb $6, %al |
| ; AVX512VBMI-NEXT: movzbl %al, %eax |
| ; AVX512VBMI-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX512VBMI-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX512VBMI-NEXT: movq %rsi, %r8 |
| ; AVX512VBMI-NEXT: movl %r9d, %ecx |
| ; AVX512VBMI-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX512VBMI-NEXT: movq -48(%rsp,%rax,8), %r10 |
| ; AVX512VBMI-NEXT: shrdq %cl, %r10, %rdx |
| ; AVX512VBMI-NEXT: movq -72(%rsp,%rax,8), %r11 |
| ; AVX512VBMI-NEXT: shrdq %cl, %rsi, %r11 |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: shrxq %r9, %r10, %rcx |
| ; AVX512VBMI-NEXT: movq %rcx, 24(%rdi) |
| ; AVX512VBMI-NEXT: movq %rdx, 16(%rdi) |
| ; AVX512VBMI-NEXT: movq %r8, 8(%rdi) |
| ; AVX512VBMI-NEXT: movq %r11, (%rdi) |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| ; |
| ; X86-LABEL: lshr_i256: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: movl %esp, %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: andl $-16, %esp |
| ; X86-NEXT: subl $112, %esp |
| ; X86-NEXT: movl 44(%ebp), %ecx |
| ; X86-NEXT: movl 12(%ebp), %eax |
| ; X86-NEXT: movl 16(%ebp), %edx |
| ; X86-NEXT: movl 20(%ebp), %esi |
| ; X86-NEXT: movl 40(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 36(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 32(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 28(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 24(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: shrb $5, %al |
| ; X86-NEXT: movzbl %al, %eax |
| ; X86-NEXT: movl 40(%esp,%eax,4), %edx |
| ; X86-NEXT: movl 36(%esp,%eax,4), %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: shrdl %cl, %edx, %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 44(%esp,%eax,4), %esi |
| ; X86-NEXT: shrdl %cl, %esi, %edx |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 48(%esp,%eax,4), %ebx |
| ; X86-NEXT: shrdl %cl, %ebx, %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 52(%esp,%eax,4), %edi |
| ; X86-NEXT: shrdl %cl, %edi, %ebx |
| ; X86-NEXT: movl 56(%esp,%eax,4), %esi |
| ; X86-NEXT: shrdl %cl, %esi, %edi |
| ; X86-NEXT: movl 32(%esp,%eax,4), %edx |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 60(%esp,%eax,4), %edx |
| ; X86-NEXT: shrdl %cl, %edx, %esi |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill |
| ; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; X86-NEXT: shrl %cl, %edx |
| ; X86-NEXT: movl 8(%ebp), %eax |
| ; X86-NEXT: movl %edx, 28(%eax) |
| ; X86-NEXT: movl %esi, 24(%eax) |
| ; X86-NEXT: movl %edi, 20(%eax) |
| ; X86-NEXT: movl %ebx, 16(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, 12(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, 8(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, 4(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, (%eax) |
| ; X86-NEXT: leal -12(%ebp), %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl $4 |
| %r = lshr i256 %a0, %a1 |
| ret i256 %r |
| } |
| |
| define i256 @ashr_i256(i256 %a0, i256 %a1) nounwind { |
| ; SSE-LABEL: ashr_i256: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: sarq $63, %r8 |
| ; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %r9d, %eax |
| ; SSE-NEXT: shrb $6, %al |
| ; SSE-NEXT: movzbl %al, %eax |
| ; SSE-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; SSE-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; SSE-NEXT: movq %rsi, %r8 |
| ; SSE-NEXT: movl %r9d, %ecx |
| ; SSE-NEXT: shrdq %cl, %rdx, %r8 |
| ; SSE-NEXT: movq -48(%rsp,%rax,8), %r10 |
| ; SSE-NEXT: shrdq %cl, %r10, %rdx |
| ; SSE-NEXT: movq -72(%rsp,%rax,8), %r11 |
| ; SSE-NEXT: shrdq %cl, %rsi, %r11 |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: sarq %cl, %r10 |
| ; SSE-NEXT: movq %r10, 24(%rdi) |
| ; SSE-NEXT: movq %rdx, 16(%rdi) |
| ; SSE-NEXT: movq %r8, 8(%rdi) |
| ; SSE-NEXT: movq %r11, (%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: ashr_i256: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: sarq $63, %r8 |
| ; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %r9d, %eax |
| ; AVX2-NEXT: shrb $6, %al |
| ; AVX2-NEXT: movzbl %al, %eax |
| ; AVX2-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX2-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX2-NEXT: movq %rsi, %r8 |
| ; AVX2-NEXT: movl %r9d, %ecx |
| ; AVX2-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX2-NEXT: movq -72(%rsp,%rax,8), %r10 |
| ; AVX2-NEXT: movq -48(%rsp,%rax,8), %r11 |
| ; AVX2-NEXT: shrdq %cl, %r11, %rdx |
| ; AVX2-NEXT: shrdq %cl, %rsi, %r10 |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: sarxq %r9, %r11, %rcx |
| ; AVX2-NEXT: movq %rcx, 24(%rdi) |
| ; AVX2-NEXT: movq %rdx, 16(%rdi) |
| ; AVX2-NEXT: movq %r8, 8(%rdi) |
| ; AVX2-NEXT: movq %r10, (%rdi) |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: ashr_i256: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: sarq $63, %r8 |
| ; AVX512F-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movl %r9d, %eax |
| ; AVX512F-NEXT: shrb $6, %al |
| ; AVX512F-NEXT: movzbl %al, %eax |
| ; AVX512F-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX512F-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX512F-NEXT: movq %rsi, %r8 |
| ; AVX512F-NEXT: movl %r9d, %ecx |
| ; AVX512F-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX512F-NEXT: movq -72(%rsp,%rax,8), %r10 |
| ; AVX512F-NEXT: movq -48(%rsp,%rax,8), %r11 |
| ; AVX512F-NEXT: shrdq %cl, %r11, %rdx |
| ; AVX512F-NEXT: shrdq %cl, %rsi, %r10 |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: sarxq %r9, %r11, %rcx |
| ; AVX512F-NEXT: movq %rcx, 24(%rdi) |
| ; AVX512F-NEXT: movq %rdx, 16(%rdi) |
| ; AVX512F-NEXT: movq %r8, 8(%rdi) |
| ; AVX512F-NEXT: movq %r10, (%rdi) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: ashr_i256: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: sarq $63, %r8 |
| ; AVX512VL-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movl %r9d, %eax |
| ; AVX512VL-NEXT: shrb $6, %al |
| ; AVX512VL-NEXT: movzbl %al, %eax |
| ; AVX512VL-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX512VL-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX512VL-NEXT: movq %rsi, %r8 |
| ; AVX512VL-NEXT: movl %r9d, %ecx |
| ; AVX512VL-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX512VL-NEXT: movq -48(%rsp,%rax,8), %r10 |
| ; AVX512VL-NEXT: shrdq %cl, %r10, %rdx |
| ; AVX512VL-NEXT: movq -72(%rsp,%rax,8), %r11 |
| ; AVX512VL-NEXT: shrdq %cl, %rsi, %r11 |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: sarxq %r9, %r10, %rcx |
| ; AVX512VL-NEXT: movq %rcx, 24(%rdi) |
| ; AVX512VL-NEXT: movq %rdx, 16(%rdi) |
| ; AVX512VL-NEXT: movq %r8, 8(%rdi) |
| ; AVX512VL-NEXT: movq %r11, (%rdi) |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: ashr_i256: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: sarq $63, %r8 |
| ; AVX512VBMI-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %r8, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movl %r9d, %eax |
| ; AVX512VBMI-NEXT: shrb $6, %al |
| ; AVX512VBMI-NEXT: movzbl %al, %eax |
| ; AVX512VBMI-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX512VBMI-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX512VBMI-NEXT: movq %rsi, %r8 |
| ; AVX512VBMI-NEXT: movl %r9d, %ecx |
| ; AVX512VBMI-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX512VBMI-NEXT: movq -48(%rsp,%rax,8), %r10 |
| ; AVX512VBMI-NEXT: shrdq %cl, %r10, %rdx |
| ; AVX512VBMI-NEXT: movq -72(%rsp,%rax,8), %r11 |
| ; AVX512VBMI-NEXT: shrdq %cl, %rsi, %r11 |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: sarxq %r9, %r10, %rcx |
| ; AVX512VBMI-NEXT: movq %rcx, 24(%rdi) |
| ; AVX512VBMI-NEXT: movq %rdx, 16(%rdi) |
| ; AVX512VBMI-NEXT: movq %r8, 8(%rdi) |
| ; AVX512VBMI-NEXT: movq %r11, (%rdi) |
| ; AVX512VBMI-NEXT: retq |
| ; |
| ; X86-LABEL: ashr_i256: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: movl %esp, %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: andl $-16, %esp |
| ; X86-NEXT: subl $112, %esp |
| ; X86-NEXT: movl 44(%ebp), %ecx |
| ; X86-NEXT: movl 12(%ebp), %eax |
| ; X86-NEXT: movl 16(%ebp), %edx |
| ; X86-NEXT: movl 20(%ebp), %esi |
| ; X86-NEXT: movl 36(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 32(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 28(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 24(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 40(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: sarl $31, %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: shrb $5, %al |
| ; X86-NEXT: movzbl %al, %eax |
| ; X86-NEXT: movl 40(%esp,%eax,4), %edx |
| ; X86-NEXT: movl 36(%esp,%eax,4), %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: shrdl %cl, %edx, %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 44(%esp,%eax,4), %esi |
| ; X86-NEXT: shrdl %cl, %esi, %edx |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 48(%esp,%eax,4), %ebx |
| ; X86-NEXT: shrdl %cl, %ebx, %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 52(%esp,%eax,4), %edi |
| ; X86-NEXT: shrdl %cl, %edi, %ebx |
| ; X86-NEXT: movl 56(%esp,%eax,4), %esi |
| ; X86-NEXT: shrdl %cl, %esi, %edi |
| ; X86-NEXT: movl 32(%esp,%eax,4), %edx |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 60(%esp,%eax,4), %edx |
| ; X86-NEXT: shrdl %cl, %edx, %esi |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill |
| ; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; X86-NEXT: sarl %cl, %edx |
| ; X86-NEXT: movl 8(%ebp), %eax |
| ; X86-NEXT: movl %edx, 28(%eax) |
| ; X86-NEXT: movl %esi, 24(%eax) |
| ; X86-NEXT: movl %edi, 20(%eax) |
| ; X86-NEXT: movl %ebx, 16(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, 12(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, 8(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, 4(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, (%eax) |
| ; X86-NEXT: leal -12(%ebp), %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl $4 |
| %r = ashr i256 %a0, %a1 |
| ret i256 %r |
| } |
| |
| define i256 @shl_i256_load(ptr %p0, i256 %a1) nounwind { |
| ; SSE-LABEL: shl_i256_load: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movq %rdx, %rcx |
| ; SSE-NEXT: movaps (%rsi), %xmm0 |
| ; SSE-NEXT: movaps 16(%rsi), %xmm1 |
| ; SSE-NEXT: xorps %xmm2, %xmm2 |
| ; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %ecx, %eax |
| ; SSE-NEXT: shrb $3, %al |
| ; SSE-NEXT: andb $24, %al |
| ; SSE-NEXT: negb %al |
| ; SSE-NEXT: movsbq %al, %rax |
| ; SSE-NEXT: movq -32(%rsp,%rax), %rdx |
| ; SSE-NEXT: movq -24(%rsp,%rax), %rsi |
| ; SSE-NEXT: movq %rsi, %r8 |
| ; SSE-NEXT: shldq %cl, %rdx, %r8 |
| ; SSE-NEXT: movq -16(%rsp,%rax), %r9 |
| ; SSE-NEXT: shldq %cl, %rsi, %r9 |
| ; SSE-NEXT: movq -40(%rsp,%rax), %rax |
| ; SSE-NEXT: movq %rax, %rsi |
| ; SSE-NEXT: shlq %cl, %rsi |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; SSE-NEXT: shldq %cl, %rax, %rdx |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: movq %r9, 24(%rdi) |
| ; SSE-NEXT: movq %r8, 16(%rdi) |
| ; SSE-NEXT: movq %rdx, 8(%rdi) |
| ; SSE-NEXT: movq %rsi, (%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: shl_i256_load: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: movq %rdx, %rcx |
| ; AVX2-NEXT: vmovups (%rsi), %ymm0 |
| ; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 |
| ; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %ecx, %eax |
| ; AVX2-NEXT: shrb $3, %al |
| ; AVX2-NEXT: andb $24, %al |
| ; AVX2-NEXT: negb %al |
| ; AVX2-NEXT: movsbq %al, %rdx |
| ; AVX2-NEXT: movq -32(%rsp,%rdx), %rsi |
| ; AVX2-NEXT: movq -24(%rsp,%rdx), %rax |
| ; AVX2-NEXT: movq %rax, %r8 |
| ; AVX2-NEXT: shldq %cl, %rsi, %r8 |
| ; AVX2-NEXT: movq -16(%rsp,%rdx), %r9 |
| ; AVX2-NEXT: shldq %cl, %rax, %r9 |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: movq -40(%rsp,%rdx), %rdx |
| ; AVX2-NEXT: shlxq %rcx, %rdx, %rdi |
| ; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX2-NEXT: shldq %cl, %rdx, %rsi |
| ; AVX2-NEXT: movq %r9, 24(%rax) |
| ; AVX2-NEXT: movq %r8, 16(%rax) |
| ; AVX2-NEXT: movq %rsi, 8(%rax) |
| ; AVX2-NEXT: movq %rdi, (%rax) |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: shl_i256_load: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: movq %rdx, %rcx |
| ; AVX512F-NEXT: vmovups (%rsi), %ymm0 |
| ; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 |
| ; AVX512F-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movl %ecx, %eax |
| ; AVX512F-NEXT: shrb $3, %al |
| ; AVX512F-NEXT: andb $24, %al |
| ; AVX512F-NEXT: negb %al |
| ; AVX512F-NEXT: movsbq %al, %rdx |
| ; AVX512F-NEXT: movq -32(%rsp,%rdx), %rsi |
| ; AVX512F-NEXT: movq -24(%rsp,%rdx), %rax |
| ; AVX512F-NEXT: movq %rax, %r8 |
| ; AVX512F-NEXT: shldq %cl, %rsi, %r8 |
| ; AVX512F-NEXT: movq -16(%rsp,%rdx), %r9 |
| ; AVX512F-NEXT: shldq %cl, %rax, %r9 |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: movq -40(%rsp,%rdx), %rdx |
| ; AVX512F-NEXT: shlxq %rcx, %rdx, %rdi |
| ; AVX512F-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX512F-NEXT: shldq %cl, %rdx, %rsi |
| ; AVX512F-NEXT: movq %r9, 24(%rax) |
| ; AVX512F-NEXT: movq %r8, 16(%rax) |
| ; AVX512F-NEXT: movq %rsi, 8(%rax) |
| ; AVX512F-NEXT: movq %rdi, (%rax) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: shl_i256_load: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: movq %rdx, %rcx |
| ; AVX512VL-NEXT: vmovups (%rsi), %ymm0 |
| ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 |
| ; AVX512VL-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movl %ecx, %eax |
| ; AVX512VL-NEXT: shrb $3, %al |
| ; AVX512VL-NEXT: andb $24, %al |
| ; AVX512VL-NEXT: negb %al |
| ; AVX512VL-NEXT: movsbq %al, %rax |
| ; AVX512VL-NEXT: movq -32(%rsp,%rax), %rdx |
| ; AVX512VL-NEXT: movq -24(%rsp,%rax), %rsi |
| ; AVX512VL-NEXT: movq %rsi, %r8 |
| ; AVX512VL-NEXT: shldq %cl, %rdx, %r8 |
| ; AVX512VL-NEXT: movq -16(%rsp,%rax), %r9 |
| ; AVX512VL-NEXT: shldq %cl, %rsi, %r9 |
| ; AVX512VL-NEXT: movq -40(%rsp,%rax), %rsi |
| ; AVX512VL-NEXT: shldq %cl, %rsi, %rdx |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: shlxq %rcx, %rsi, %rcx |
| ; AVX512VL-NEXT: movq %r9, 24(%rdi) |
| ; AVX512VL-NEXT: movq %r8, 16(%rdi) |
| ; AVX512VL-NEXT: movq %rdx, 8(%rdi) |
| ; AVX512VL-NEXT: movq %rcx, (%rdi) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: shl_i256_load: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: movq %rdx, %rcx |
| ; AVX512VBMI-NEXT: vmovups (%rsi), %ymm0 |
| ; AVX512VBMI-NEXT: vxorps %xmm1, %xmm1, %xmm1 |
| ; AVX512VBMI-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movl %ecx, %eax |
| ; AVX512VBMI-NEXT: shrb $3, %al |
| ; AVX512VBMI-NEXT: andb $24, %al |
| ; AVX512VBMI-NEXT: negb %al |
| ; AVX512VBMI-NEXT: movsbq %al, %rax |
| ; AVX512VBMI-NEXT: movq -32(%rsp,%rax), %rdx |
| ; AVX512VBMI-NEXT: movq -24(%rsp,%rax), %rsi |
| ; AVX512VBMI-NEXT: movq %rsi, %r8 |
| ; AVX512VBMI-NEXT: shldq %cl, %rdx, %r8 |
| ; AVX512VBMI-NEXT: movq -16(%rsp,%rax), %r9 |
| ; AVX512VBMI-NEXT: shldq %cl, %rsi, %r9 |
| ; AVX512VBMI-NEXT: movq -40(%rsp,%rax), %rsi |
| ; AVX512VBMI-NEXT: shldq %cl, %rsi, %rdx |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: shlxq %rcx, %rsi, %rcx |
| ; AVX512VBMI-NEXT: movq %r9, 24(%rdi) |
| ; AVX512VBMI-NEXT: movq %r8, 16(%rdi) |
| ; AVX512VBMI-NEXT: movq %rdx, 8(%rdi) |
| ; AVX512VBMI-NEXT: movq %rcx, (%rdi) |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| ; |
| ; X86-LABEL: shl_i256_load: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: movl %esp, %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: andl $-16, %esp |
| ; X86-NEXT: subl $112, %esp |
| ; X86-NEXT: movl 12(%ebp), %ecx |
| ; X86-NEXT: movl (%ecx), %eax |
| ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 4(%ecx), %eax |
| ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 8(%ecx), %esi |
| ; X86-NEXT: movl 12(%ecx), %edi |
| ; X86-NEXT: movl 16(%ecx), %ebx |
| ; X86-NEXT: movl 20(%ecx), %edx |
| ; X86-NEXT: movl 24(%ecx), %eax |
| ; X86-NEXT: movl 28(%ecx), %ecx |
| ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 16(%ebp), %ecx |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: shrb $3, %al |
| ; X86-NEXT: andb $28, %al |
| ; X86-NEXT: negb %al |
| ; X86-NEXT: movsbl %al, %eax |
| ; X86-NEXT: movl 68(%esp,%eax), %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 72(%esp,%eax), %edx |
| ; X86-NEXT: movl %edx, %edi |
| ; X86-NEXT: shldl %cl, %esi, %edi |
| ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 76(%esp,%eax), %esi |
| ; X86-NEXT: movl %esi, %edi |
| ; X86-NEXT: shldl %cl, %edx, %edi |
| ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 80(%esp,%eax), %edx |
| ; X86-NEXT: movl %edx, %edi |
| ; X86-NEXT: shldl %cl, %esi, %edi |
| ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 84(%esp,%eax), %esi |
| ; X86-NEXT: movl %esi, %ebx |
| ; X86-NEXT: shldl %cl, %edx, %ebx |
| ; X86-NEXT: movl 88(%esp,%eax), %edi |
| ; X86-NEXT: movl %edi, %edx |
| ; X86-NEXT: shldl %cl, %esi, %edx |
| ; X86-NEXT: movl 64(%esp,%eax), %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 92(%esp,%eax), %esi |
| ; X86-NEXT: shldl %cl, %edi, %esi |
| ; X86-NEXT: movl 8(%ebp), %eax |
| ; X86-NEXT: movl %esi, 28(%eax) |
| ; X86-NEXT: movl %edx, 24(%eax) |
| ; X86-NEXT: movl %ebx, 20(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: movl %edx, 16(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: movl %edx, 12(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: movl %edx, 8(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload |
| ; X86-NEXT: movl %edi, %edx |
| ; X86-NEXT: shll %cl, %edx |
| ; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| ; X86-NEXT: shldl %cl, %edi, %esi |
| ; X86-NEXT: movl %esi, 4(%eax) |
| ; X86-NEXT: movl %edx, (%eax) |
| ; X86-NEXT: leal -12(%ebp), %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl $4 |
| %a0 = load i256, ptr %p0 |
| %r = shl i256 %a0, %a1 |
| ret i256 %r |
| } |
| |
| define i256 @lshr_i256_load(ptr %p0, i256 %a1) nounwind { |
| ; SSE-LABEL: lshr_i256_load: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movq %rdx, %rcx |
| ; SSE-NEXT: movaps (%rsi), %xmm0 |
| ; SSE-NEXT: movaps 16(%rsi), %xmm1 |
| ; SSE-NEXT: xorps %xmm2, %xmm2 |
| ; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %ecx, %eax |
| ; SSE-NEXT: shrb $6, %al |
| ; SSE-NEXT: movzbl %al, %eax |
| ; SSE-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; SSE-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; SSE-NEXT: movq %rsi, %r8 |
| ; SSE-NEXT: shrdq %cl, %rdx, %r8 |
| ; SSE-NEXT: movq -48(%rsp,%rax,8), %r9 |
| ; SSE-NEXT: shrdq %cl, %r9, %rdx |
| ; SSE-NEXT: movq -72(%rsp,%rax,8), %r10 |
| ; SSE-NEXT: shrdq %cl, %rsi, %r10 |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; SSE-NEXT: shrq %cl, %r9 |
| ; SSE-NEXT: movq %r9, 24(%rdi) |
| ; SSE-NEXT: movq %rdx, 16(%rdi) |
| ; SSE-NEXT: movq %r8, 8(%rdi) |
| ; SSE-NEXT: movq %r10, (%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: lshr_i256_load: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: movq %rdx, %rcx |
| ; AVX2-NEXT: vmovups (%rsi), %ymm0 |
| ; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 |
| ; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %ecx, %eax |
| ; AVX2-NEXT: shrb $6, %al |
| ; AVX2-NEXT: movzbl %al, %eax |
| ; AVX2-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX2-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX2-NEXT: movq %rsi, %r8 |
| ; AVX2-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX2-NEXT: movq -72(%rsp,%rax,8), %r9 |
| ; AVX2-NEXT: movq -48(%rsp,%rax,8), %r10 |
| ; AVX2-NEXT: shrdq %cl, %r10, %rdx |
| ; AVX2-NEXT: shrdq %cl, %rsi, %r9 |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: shrxq %rcx, %r10, %rcx |
| ; AVX2-NEXT: movq %rcx, 24(%rdi) |
| ; AVX2-NEXT: movq %rdx, 16(%rdi) |
| ; AVX2-NEXT: movq %r8, 8(%rdi) |
| ; AVX2-NEXT: movq %r9, (%rdi) |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: lshr_i256_load: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: movq %rdx, %rcx |
| ; AVX512F-NEXT: vmovups (%rsi), %ymm0 |
| ; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 |
| ; AVX512F-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movl %ecx, %eax |
| ; AVX512F-NEXT: shrb $6, %al |
| ; AVX512F-NEXT: movzbl %al, %eax |
| ; AVX512F-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX512F-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX512F-NEXT: movq %rsi, %r8 |
| ; AVX512F-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX512F-NEXT: movq -72(%rsp,%rax,8), %r9 |
| ; AVX512F-NEXT: movq -48(%rsp,%rax,8), %r10 |
| ; AVX512F-NEXT: shrdq %cl, %r10, %rdx |
| ; AVX512F-NEXT: shrdq %cl, %rsi, %r9 |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: shrxq %rcx, %r10, %rcx |
| ; AVX512F-NEXT: movq %rcx, 24(%rdi) |
| ; AVX512F-NEXT: movq %rdx, 16(%rdi) |
| ; AVX512F-NEXT: movq %r8, 8(%rdi) |
| ; AVX512F-NEXT: movq %r9, (%rdi) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: lshr_i256_load: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: movq %rdx, %rcx |
| ; AVX512VL-NEXT: vmovups (%rsi), %ymm0 |
| ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 |
| ; AVX512VL-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movl %ecx, %eax |
| ; AVX512VL-NEXT: shrb $6, %al |
| ; AVX512VL-NEXT: movzbl %al, %eax |
| ; AVX512VL-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX512VL-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX512VL-NEXT: movq %rsi, %r8 |
| ; AVX512VL-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX512VL-NEXT: movq -48(%rsp,%rax,8), %r9 |
| ; AVX512VL-NEXT: shrdq %cl, %r9, %rdx |
| ; AVX512VL-NEXT: movq -72(%rsp,%rax,8), %r10 |
| ; AVX512VL-NEXT: shrdq %cl, %rsi, %r10 |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: shrxq %rcx, %r9, %rcx |
| ; AVX512VL-NEXT: movq %rcx, 24(%rdi) |
| ; AVX512VL-NEXT: movq %rdx, 16(%rdi) |
| ; AVX512VL-NEXT: movq %r8, 8(%rdi) |
| ; AVX512VL-NEXT: movq %r10, (%rdi) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: lshr_i256_load: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: movq %rdx, %rcx |
| ; AVX512VBMI-NEXT: vmovups (%rsi), %ymm0 |
| ; AVX512VBMI-NEXT: vxorps %xmm1, %xmm1, %xmm1 |
| ; AVX512VBMI-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movl %ecx, %eax |
| ; AVX512VBMI-NEXT: shrb $6, %al |
| ; AVX512VBMI-NEXT: movzbl %al, %eax |
| ; AVX512VBMI-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX512VBMI-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX512VBMI-NEXT: movq %rsi, %r8 |
| ; AVX512VBMI-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX512VBMI-NEXT: movq -48(%rsp,%rax,8), %r9 |
| ; AVX512VBMI-NEXT: shrdq %cl, %r9, %rdx |
| ; AVX512VBMI-NEXT: movq -72(%rsp,%rax,8), %r10 |
| ; AVX512VBMI-NEXT: shrdq %cl, %rsi, %r10 |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: shrxq %rcx, %r9, %rcx |
| ; AVX512VBMI-NEXT: movq %rcx, 24(%rdi) |
| ; AVX512VBMI-NEXT: movq %rdx, 16(%rdi) |
| ; AVX512VBMI-NEXT: movq %r8, 8(%rdi) |
| ; AVX512VBMI-NEXT: movq %r10, (%rdi) |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| ; |
| ; X86-LABEL: lshr_i256_load: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: movl %esp, %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: andl $-16, %esp |
| ; X86-NEXT: subl $112, %esp |
| ; X86-NEXT: movl 12(%ebp), %ecx |
| ; X86-NEXT: movl (%ecx), %eax |
| ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 4(%ecx), %eax |
| ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 8(%ecx), %esi |
| ; X86-NEXT: movl 12(%ecx), %edi |
| ; X86-NEXT: movl 16(%ecx), %ebx |
| ; X86-NEXT: movl 20(%ecx), %edx |
| ; X86-NEXT: movl 24(%ecx), %eax |
| ; X86-NEXT: movl 28(%ecx), %ecx |
| ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 16(%ebp), %ecx |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: shrb $5, %al |
| ; X86-NEXT: movzbl %al, %eax |
| ; X86-NEXT: movl 40(%esp,%eax,4), %edx |
| ; X86-NEXT: movl 36(%esp,%eax,4), %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: shrdl %cl, %edx, %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 44(%esp,%eax,4), %esi |
| ; X86-NEXT: shrdl %cl, %esi, %edx |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 48(%esp,%eax,4), %ebx |
| ; X86-NEXT: shrdl %cl, %ebx, %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 52(%esp,%eax,4), %edi |
| ; X86-NEXT: shrdl %cl, %edi, %ebx |
| ; X86-NEXT: movl 56(%esp,%eax,4), %esi |
| ; X86-NEXT: shrdl %cl, %esi, %edi |
| ; X86-NEXT: movl 32(%esp,%eax,4), %edx |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 60(%esp,%eax,4), %edx |
| ; X86-NEXT: shrdl %cl, %edx, %esi |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill |
| ; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; X86-NEXT: shrl %cl, %edx |
| ; X86-NEXT: movl 8(%ebp), %eax |
| ; X86-NEXT: movl %edx, 28(%eax) |
| ; X86-NEXT: movl %esi, 24(%eax) |
| ; X86-NEXT: movl %edi, 20(%eax) |
| ; X86-NEXT: movl %ebx, 16(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, 12(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, 8(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, 4(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, (%eax) |
| ; X86-NEXT: leal -12(%ebp), %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl $4 |
| %a0 = load i256, ptr %p0 |
| %r = lshr i256 %a0, %a1 |
| ret i256 %r |
| } |
| |
| define i256 @ashr_i256_load(ptr %p0, i256 %a1) nounwind { |
| ; SSE-LABEL: ashr_i256_load: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movq %rdx, %rcx |
| ; SSE-NEXT: movaps (%rsi), %xmm0 |
| ; SSE-NEXT: movq 16(%rsi), %rax |
| ; SSE-NEXT: movq 24(%rsi), %rdx |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: sarq $63, %rdx |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %ecx, %eax |
| ; SSE-NEXT: shrb $6, %al |
| ; SSE-NEXT: movzbl %al, %eax |
| ; SSE-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; SSE-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; SSE-NEXT: movq %rsi, %r8 |
| ; SSE-NEXT: shrdq %cl, %rdx, %r8 |
| ; SSE-NEXT: movq -48(%rsp,%rax,8), %r9 |
| ; SSE-NEXT: shrdq %cl, %r9, %rdx |
| ; SSE-NEXT: movq -72(%rsp,%rax,8), %r10 |
| ; SSE-NEXT: shrdq %cl, %rsi, %r10 |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; SSE-NEXT: sarq %cl, %r9 |
| ; SSE-NEXT: movq %r9, 24(%rdi) |
| ; SSE-NEXT: movq %rdx, 16(%rdi) |
| ; SSE-NEXT: movq %r8, 8(%rdi) |
| ; SSE-NEXT: movq %r10, (%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: ashr_i256_load: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: movq %rdx, %rcx |
| ; AVX2-NEXT: vmovaps (%rsi), %xmm0 |
| ; AVX2-NEXT: movq 16(%rsi), %rax |
| ; AVX2-NEXT: movq 24(%rsi), %rdx |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: sarq $63, %rdx |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %ecx, %eax |
| ; AVX2-NEXT: shrb $6, %al |
| ; AVX2-NEXT: movzbl %al, %eax |
| ; AVX2-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX2-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX2-NEXT: movq %rsi, %r8 |
| ; AVX2-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX2-NEXT: movq -72(%rsp,%rax,8), %r9 |
| ; AVX2-NEXT: movq -48(%rsp,%rax,8), %r10 |
| ; AVX2-NEXT: shrdq %cl, %r10, %rdx |
| ; AVX2-NEXT: shrdq %cl, %rsi, %r9 |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: sarxq %rcx, %r10, %rcx |
| ; AVX2-NEXT: movq %rcx, 24(%rdi) |
| ; AVX2-NEXT: movq %rdx, 16(%rdi) |
| ; AVX2-NEXT: movq %r8, 8(%rdi) |
| ; AVX2-NEXT: movq %r9, (%rdi) |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: ashr_i256_load: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: movq %rdx, %rcx |
| ; AVX512F-NEXT: vmovaps (%rsi), %xmm0 |
| ; AVX512F-NEXT: movq 16(%rsi), %rax |
| ; AVX512F-NEXT: movq 24(%rsi), %rdx |
| ; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: sarq $63, %rdx |
| ; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movl %ecx, %eax |
| ; AVX512F-NEXT: shrb $6, %al |
| ; AVX512F-NEXT: movzbl %al, %eax |
| ; AVX512F-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX512F-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX512F-NEXT: movq %rsi, %r8 |
| ; AVX512F-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX512F-NEXT: movq -72(%rsp,%rax,8), %r9 |
| ; AVX512F-NEXT: movq -48(%rsp,%rax,8), %r10 |
| ; AVX512F-NEXT: shrdq %cl, %r10, %rdx |
| ; AVX512F-NEXT: shrdq %cl, %rsi, %r9 |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: sarxq %rcx, %r10, %rcx |
| ; AVX512F-NEXT: movq %rcx, 24(%rdi) |
| ; AVX512F-NEXT: movq %rdx, 16(%rdi) |
| ; AVX512F-NEXT: movq %r8, 8(%rdi) |
| ; AVX512F-NEXT: movq %r9, (%rdi) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: ashr_i256_load: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: movq %rdx, %rcx |
| ; AVX512VL-NEXT: vmovaps (%rsi), %xmm0 |
| ; AVX512VL-NEXT: movq 16(%rsi), %rax |
| ; AVX512VL-NEXT: movq 24(%rsi), %rdx |
| ; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: sarq $63, %rdx |
| ; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movl %ecx, %eax |
| ; AVX512VL-NEXT: shrb $6, %al |
| ; AVX512VL-NEXT: movzbl %al, %eax |
| ; AVX512VL-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX512VL-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX512VL-NEXT: movq %rsi, %r8 |
| ; AVX512VL-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX512VL-NEXT: movq -48(%rsp,%rax,8), %r9 |
| ; AVX512VL-NEXT: shrdq %cl, %r9, %rdx |
| ; AVX512VL-NEXT: movq -72(%rsp,%rax,8), %r10 |
| ; AVX512VL-NEXT: shrdq %cl, %rsi, %r10 |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: sarxq %rcx, %r9, %rcx |
| ; AVX512VL-NEXT: movq %rcx, 24(%rdi) |
| ; AVX512VL-NEXT: movq %rdx, 16(%rdi) |
| ; AVX512VL-NEXT: movq %r8, 8(%rdi) |
| ; AVX512VL-NEXT: movq %r10, (%rdi) |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: ashr_i256_load: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: movq %rdx, %rcx |
| ; AVX512VBMI-NEXT: vmovaps (%rsi), %xmm0 |
| ; AVX512VBMI-NEXT: movq 16(%rsi), %rax |
| ; AVX512VBMI-NEXT: movq 24(%rsi), %rdx |
| ; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: sarq $63, %rdx |
| ; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movl %ecx, %eax |
| ; AVX512VBMI-NEXT: shrb $6, %al |
| ; AVX512VBMI-NEXT: movzbl %al, %eax |
| ; AVX512VBMI-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX512VBMI-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX512VBMI-NEXT: movq %rsi, %r8 |
| ; AVX512VBMI-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX512VBMI-NEXT: movq -48(%rsp,%rax,8), %r9 |
| ; AVX512VBMI-NEXT: shrdq %cl, %r9, %rdx |
| ; AVX512VBMI-NEXT: movq -72(%rsp,%rax,8), %r10 |
| ; AVX512VBMI-NEXT: shrdq %cl, %rsi, %r10 |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: sarxq %rcx, %r9, %rcx |
| ; AVX512VBMI-NEXT: movq %rcx, 24(%rdi) |
| ; AVX512VBMI-NEXT: movq %rdx, 16(%rdi) |
| ; AVX512VBMI-NEXT: movq %r8, 8(%rdi) |
| ; AVX512VBMI-NEXT: movq %r10, (%rdi) |
| ; AVX512VBMI-NEXT: retq |
| ; |
| ; X86-LABEL: ashr_i256_load: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: movl %esp, %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: andl $-16, %esp |
| ; X86-NEXT: subl $112, %esp |
| ; X86-NEXT: movl 12(%ebp), %eax |
| ; X86-NEXT: movl (%eax), %ecx |
| ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 4(%eax), %ecx |
| ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 8(%eax), %edi |
| ; X86-NEXT: movl 12(%eax), %esi |
| ; X86-NEXT: movl 16(%eax), %ebx |
| ; X86-NEXT: movl 20(%eax), %edx |
| ; X86-NEXT: movl 24(%eax), %ecx |
| ; X86-NEXT: movl 28(%eax), %eax |
| ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 16(%ebp), %ecx |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: sarl $31, %eax |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: shrb $5, %al |
| ; X86-NEXT: movzbl %al, %eax |
| ; X86-NEXT: movl 40(%esp,%eax,4), %edx |
| ; X86-NEXT: movl 36(%esp,%eax,4), %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: shrdl %cl, %edx, %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 44(%esp,%eax,4), %esi |
| ; X86-NEXT: shrdl %cl, %esi, %edx |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 48(%esp,%eax,4), %ebx |
| ; X86-NEXT: shrdl %cl, %ebx, %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 52(%esp,%eax,4), %edi |
| ; X86-NEXT: shrdl %cl, %edi, %ebx |
| ; X86-NEXT: movl 56(%esp,%eax,4), %esi |
| ; X86-NEXT: shrdl %cl, %esi, %edi |
| ; X86-NEXT: movl 32(%esp,%eax,4), %edx |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 60(%esp,%eax,4), %edx |
| ; X86-NEXT: shrdl %cl, %edx, %esi |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill |
| ; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; X86-NEXT: sarl %cl, %edx |
| ; X86-NEXT: movl 8(%ebp), %eax |
| ; X86-NEXT: movl %edx, 28(%eax) |
| ; X86-NEXT: movl %esi, 24(%eax) |
| ; X86-NEXT: movl %edi, 20(%eax) |
| ; X86-NEXT: movl %ebx, 16(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, 12(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, 8(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, 4(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, (%eax) |
| ; X86-NEXT: leal -12(%ebp), %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl $4 |
| %a0 = load i256, ptr %p0 |
| %r = ashr i256 %a0, %a1 |
| ret i256 %r |
| } |
| |
| define i256 @shl_i256_1(i256 %a0) nounwind { |
| ; CHECK-LABEL: shl_i256_1: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movq %rdi, %rax |
| ; CHECK-NEXT: shldq $1, %rcx, %r8 |
| ; CHECK-NEXT: shldq $1, %rdx, %rcx |
| ; CHECK-NEXT: shldq $1, %rsi, %rdx |
| ; CHECK-NEXT: addq %rsi, %rsi |
| ; CHECK-NEXT: movq %r8, 24(%rdi) |
| ; CHECK-NEXT: movq %rcx, 16(%rdi) |
| ; CHECK-NEXT: movq %rdx, 8(%rdi) |
| ; CHECK-NEXT: movq %rsi, (%rdi) |
| ; CHECK-NEXT: retq |
| ; |
| ; X86-LABEL: shl_i256_1: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: shldl $1, %ecx, %edx |
| ; X86-NEXT: movl %edx, 28(%eax) |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: shldl $1, %edx, %ecx |
| ; X86-NEXT: movl %ecx, 24(%eax) |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: shldl $1, %ecx, %edx |
| ; X86-NEXT: movl %edx, 20(%eax) |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: shldl $1, %edx, %ecx |
| ; X86-NEXT: movl %ecx, 16(%eax) |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: shldl $1, %ecx, %edx |
| ; X86-NEXT: movl %edx, 12(%eax) |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: shldl $1, %edx, %ecx |
| ; X86-NEXT: movl %ecx, 8(%eax) |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: shldl $1, %ecx, %edx |
| ; X86-NEXT: movl %edx, 4(%eax) |
| ; X86-NEXT: addl %ecx, %ecx |
| ; X86-NEXT: movl %ecx, (%eax) |
| ; X86-NEXT: retl $4 |
| %r = shl i256 %a0, 1 |
| ret i256 %r |
| } |
| |
| define i256 @lshr_i256_1(i256 %a0) nounwind { |
| ; CHECK-LABEL: lshr_i256_1: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movq %rdi, %rax |
| ; CHECK-NEXT: shrdq $1, %rdx, %rsi |
| ; CHECK-NEXT: shrdq $1, %rcx, %rdx |
| ; CHECK-NEXT: shrdq $1, %r8, %rcx |
| ; CHECK-NEXT: shrq %r8 |
| ; CHECK-NEXT: movq %r8, 24(%rdi) |
| ; CHECK-NEXT: movq %rcx, 16(%rdi) |
| ; CHECK-NEXT: movq %rdx, 8(%rdi) |
| ; CHECK-NEXT: movq %rsi, (%rdi) |
| ; CHECK-NEXT: retq |
| ; |
| ; X86-LABEL: lshr_i256_1: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: subl $8, %esp |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi |
| ; X86-NEXT: movl %edi, %esi |
| ; X86-NEXT: shldl $31, %eax, %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: shrdl $1, %eax, %ecx |
| ; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill |
| ; X86-NEXT: movl %edx, %ebp |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: shldl $31, %eax, %ebp |
| ; X86-NEXT: shrdl $1, %eax, %edi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl %ebx, %esi |
| ; X86-NEXT: shldl $31, %eax, %esi |
| ; X86-NEXT: shrdl $1, %eax, %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: shrdl $1, %eax, %ebx |
| ; X86-NEXT: shrl %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl %eax, 28(%ecx) |
| ; X86-NEXT: movl %ebx, 24(%ecx) |
| ; X86-NEXT: movl %esi, 20(%ecx) |
| ; X86-NEXT: movl %edx, 16(%ecx) |
| ; X86-NEXT: movl %ebp, 12(%ecx) |
| ; X86-NEXT: movl %edi, 8(%ecx) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: movl %eax, 4(%ecx) |
| ; X86-NEXT: movl (%esp), %eax # 4-byte Reload |
| ; X86-NEXT: movl %eax, (%ecx) |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: addl $8, %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl $4 |
| %r = lshr i256 %a0, 1 |
| ret i256 %r |
| } |
| |
| define i256 @ashr_i256_1(i256 %a0) nounwind { |
| ; CHECK-LABEL: ashr_i256_1: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movq %rdi, %rax |
| ; CHECK-NEXT: shrdq $1, %rdx, %rsi |
| ; CHECK-NEXT: shrdq $1, %rcx, %rdx |
| ; CHECK-NEXT: shrdq $1, %r8, %rcx |
| ; CHECK-NEXT: sarq %r8 |
| ; CHECK-NEXT: movq %r8, 24(%rdi) |
| ; CHECK-NEXT: movq %rcx, 16(%rdi) |
| ; CHECK-NEXT: movq %rdx, 8(%rdi) |
| ; CHECK-NEXT: movq %rsi, (%rdi) |
| ; CHECK-NEXT: retq |
| ; |
| ; X86-LABEL: ashr_i256_1: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: subl $8, %esp |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi |
| ; X86-NEXT: movl %edi, %esi |
| ; X86-NEXT: shldl $31, %eax, %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: shrdl $1, %eax, %ecx |
| ; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill |
| ; X86-NEXT: movl %edx, %ebp |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: shldl $31, %eax, %ebp |
| ; X86-NEXT: shrdl $1, %eax, %edi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl %ebx, %esi |
| ; X86-NEXT: shldl $31, %eax, %esi |
| ; X86-NEXT: shrdl $1, %eax, %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: shrdl $1, %eax, %ebx |
| ; X86-NEXT: sarl %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl %eax, 28(%ecx) |
| ; X86-NEXT: movl %ebx, 24(%ecx) |
| ; X86-NEXT: movl %esi, 20(%ecx) |
| ; X86-NEXT: movl %edx, 16(%ecx) |
| ; X86-NEXT: movl %ebp, 12(%ecx) |
| ; X86-NEXT: movl %edi, 8(%ecx) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: movl %eax, 4(%ecx) |
| ; X86-NEXT: movl (%esp), %eax # 4-byte Reload |
| ; X86-NEXT: movl %eax, (%ecx) |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: addl $8, %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl $4 |
| %r = ashr i256 %a0, 1 |
| ret i256 %r |
| } |
| |
| define i256 @shl_i256_200(i256 %a0) nounwind { |
| ; SSE-LABEL: shl_i256_200: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: shlq $8, %rsi |
| ; SSE-NEXT: movq %rsi, 24(%rdi) |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: movaps %xmm0, (%rdi) |
| ; SSE-NEXT: movq $0, 16(%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: shl_i256_200: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: shlq $8, %rsi |
| ; AVX2-NEXT: movq %rsi, 24(%rdi) |
| ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovaps %xmm0, (%rdi) |
| ; AVX2-NEXT: movq $0, 16(%rdi) |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512-LABEL: shl_i256_200: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: movq %rdi, %rax |
| ; AVX512-NEXT: shlq $8, %rsi |
| ; AVX512-NEXT: movq %rsi, 24(%rdi) |
| ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512-NEXT: vmovaps %xmm0, (%rdi) |
| ; AVX512-NEXT: movq $0, 16(%rdi) |
| ; AVX512-NEXT: retq |
| ; |
| ; X86-LABEL: shl_i256_200: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: shll $8, %ecx |
| ; X86-NEXT: movl %edx, 28(%eax) |
| ; X86-NEXT: movl %ecx, 24(%eax) |
| ; X86-NEXT: movl $0, 20(%eax) |
| ; X86-NEXT: movl $0, 16(%eax) |
| ; X86-NEXT: movl $0, 12(%eax) |
| ; X86-NEXT: movl $0, 8(%eax) |
| ; X86-NEXT: movl $0, 4(%eax) |
| ; X86-NEXT: movl $0, (%eax) |
| ; X86-NEXT: retl $4 |
| %r = shl i256 %a0, 200 |
| ret i256 %r |
| } |
| |
| define i256 @lshr_i256_200(i256 %a0) nounwind { |
| ; SSE-LABEL: lshr_i256_200: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: shrq $8, %r8 |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: movups %xmm0, 8(%rdi) |
| ; SSE-NEXT: movq %r8, (%rdi) |
| ; SSE-NEXT: movq $0, 24(%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: lshr_i256_200: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: shrq $8, %r8 |
| ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovups %xmm0, 8(%rdi) |
| ; AVX2-NEXT: movq %r8, (%rdi) |
| ; AVX2-NEXT: movq $0, 24(%rdi) |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512-LABEL: lshr_i256_200: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: movq %rdi, %rax |
| ; AVX512-NEXT: shrq $8, %r8 |
| ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512-NEXT: vmovups %xmm0, 8(%rdi) |
| ; AVX512-NEXT: movq %r8, (%rdi) |
| ; AVX512-NEXT: movq $0, 24(%rdi) |
| ; AVX512-NEXT: retq |
| ; |
| ; X86-LABEL: lshr_i256_200: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: shrl $8, %edx |
| ; X86-NEXT: movl %edx, 4(%eax) |
| ; X86-NEXT: movl %ecx, (%eax) |
| ; X86-NEXT: movl $0, 28(%eax) |
| ; X86-NEXT: movl $0, 24(%eax) |
| ; X86-NEXT: movl $0, 20(%eax) |
| ; X86-NEXT: movl $0, 16(%eax) |
| ; X86-NEXT: movl $0, 12(%eax) |
| ; X86-NEXT: movl $0, 8(%eax) |
| ; X86-NEXT: retl $4 |
| %r = lshr i256 %a0, 200 |
| ret i256 %r |
| } |
| |
| define i256 @ashr_i256_200(i256 %a0) nounwind { |
| ; CHECK-LABEL: ashr_i256_200: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movq %rdi, %rax |
| ; CHECK-NEXT: movq %r8, %rcx |
| ; CHECK-NEXT: sarq $8, %rcx |
| ; CHECK-NEXT: sarq $63, %r8 |
| ; CHECK-NEXT: movq %r8, 24(%rdi) |
| ; CHECK-NEXT: movq %r8, 16(%rdi) |
| ; CHECK-NEXT: movq %r8, 8(%rdi) |
| ; CHECK-NEXT: movq %rcx, (%rdi) |
| ; CHECK-NEXT: retq |
| ; |
| ; X86-LABEL: ashr_i256_200: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl %edx, %esi |
| ; X86-NEXT: sarl $8, %esi |
| ; X86-NEXT: sarl $31, %edx |
| ; X86-NEXT: movl %edx, 28(%eax) |
| ; X86-NEXT: movl %edx, 24(%eax) |
| ; X86-NEXT: movl %edx, 20(%eax) |
| ; X86-NEXT: movl %edx, 16(%eax) |
| ; X86-NEXT: movl %edx, 12(%eax) |
| ; X86-NEXT: movl %edx, 8(%eax) |
| ; X86-NEXT: movl %esi, 4(%eax) |
| ; X86-NEXT: movl %ecx, (%eax) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: retl $4 |
| %r = ashr i256 %a0, 200 |
| ret i256 %r |
| } |
| |
| define i256 @shl_i256_255(i256 %a0) nounwind { |
| ; SSE-LABEL: shl_i256_255: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: shlq $63, %rsi |
| ; SSE-NEXT: movq %rsi, 24(%rdi) |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: movaps %xmm0, (%rdi) |
| ; SSE-NEXT: movq $0, 16(%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: shl_i256_255: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: shlq $63, %rsi |
| ; AVX2-NEXT: movq %rsi, 24(%rdi) |
| ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovaps %xmm0, (%rdi) |
| ; AVX2-NEXT: movq $0, 16(%rdi) |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512-LABEL: shl_i256_255: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: movq %rdi, %rax |
| ; AVX512-NEXT: shlq $63, %rsi |
| ; AVX512-NEXT: movq %rsi, 24(%rdi) |
| ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512-NEXT: vmovaps %xmm0, (%rdi) |
| ; AVX512-NEXT: movq $0, 16(%rdi) |
| ; AVX512-NEXT: retq |
| ; |
| ; X86-LABEL: shl_i256_255: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: shll $31, %ecx |
| ; X86-NEXT: movl %ecx, 28(%eax) |
| ; X86-NEXT: movl $0, 24(%eax) |
| ; X86-NEXT: movl $0, 20(%eax) |
| ; X86-NEXT: movl $0, 16(%eax) |
| ; X86-NEXT: movl $0, 12(%eax) |
| ; X86-NEXT: movl $0, 8(%eax) |
| ; X86-NEXT: movl $0, 4(%eax) |
| ; X86-NEXT: movl $0, (%eax) |
| ; X86-NEXT: retl $4 |
| %r = shl i256 %a0, 255 |
| ret i256 %r |
| } |
| |
| define i256 @lshr_i256_255(i256 %a0) nounwind { |
| ; SSE-LABEL: lshr_i256_255: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: shrq $63, %r8 |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: movups %xmm0, 8(%rdi) |
| ; SSE-NEXT: movq %r8, (%rdi) |
| ; SSE-NEXT: movq $0, 24(%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: lshr_i256_255: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: shrq $63, %r8 |
| ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovups %xmm0, 8(%rdi) |
| ; AVX2-NEXT: movq %r8, (%rdi) |
| ; AVX2-NEXT: movq $0, 24(%rdi) |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512-LABEL: lshr_i256_255: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: movq %rdi, %rax |
| ; AVX512-NEXT: shrq $63, %r8 |
| ; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512-NEXT: vmovups %xmm0, 8(%rdi) |
| ; AVX512-NEXT: movq %r8, (%rdi) |
| ; AVX512-NEXT: movq $0, 24(%rdi) |
| ; AVX512-NEXT: retq |
| ; |
| ; X86-LABEL: lshr_i256_255: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: shrl $31, %ecx |
| ; X86-NEXT: movl %ecx, (%eax) |
| ; X86-NEXT: movl $0, 28(%eax) |
| ; X86-NEXT: movl $0, 24(%eax) |
| ; X86-NEXT: movl $0, 20(%eax) |
| ; X86-NEXT: movl $0, 16(%eax) |
| ; X86-NEXT: movl $0, 12(%eax) |
| ; X86-NEXT: movl $0, 8(%eax) |
| ; X86-NEXT: movl $0, 4(%eax) |
| ; X86-NEXT: retl $4 |
| %r = lshr i256 %a0, 255 |
| ret i256 %r |
| } |
| |
| define i256 @ashr_i256_255(i256 %a0) nounwind { |
| ; CHECK-LABEL: ashr_i256_255: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movq %rdi, %rax |
| ; CHECK-NEXT: sarq $63, %r8 |
| ; CHECK-NEXT: movq %r8, 24(%rdi) |
| ; CHECK-NEXT: movq %r8, 16(%rdi) |
| ; CHECK-NEXT: movq %r8, 8(%rdi) |
| ; CHECK-NEXT: movq %r8, (%rdi) |
| ; CHECK-NEXT: retq |
| ; |
| ; X86-LABEL: ashr_i256_255: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: sarl $31, %ecx |
| ; X86-NEXT: movl %ecx, 28(%eax) |
| ; X86-NEXT: movl %ecx, 24(%eax) |
| ; X86-NEXT: movl %ecx, 20(%eax) |
| ; X86-NEXT: movl %ecx, 16(%eax) |
| ; X86-NEXT: movl %ecx, 12(%eax) |
| ; X86-NEXT: movl %ecx, 8(%eax) |
| ; X86-NEXT: movl %ecx, 4(%eax) |
| ; X86-NEXT: movl %ecx, (%eax) |
| ; X86-NEXT: retl $4 |
| %r = ashr i256 %a0, 255 |
| ret i256 %r |
| } |
| |
| define i256 @shl_1_i256(i256 %a0) nounwind { |
| ; SSE-LABEL: shl_1_i256: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movq %rsi, %rcx |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %ecx, %eax |
| ; SSE-NEXT: shrb $3, %al |
| ; SSE-NEXT: andb $24, %al |
| ; SSE-NEXT: negb %al |
| ; SSE-NEXT: movsbq %al, %rax |
| ; SSE-NEXT: movq -32(%rsp,%rax), %rdx |
| ; SSE-NEXT: movq -24(%rsp,%rax), %rsi |
| ; SSE-NEXT: movq %rsi, %r8 |
| ; SSE-NEXT: shldq %cl, %rdx, %r8 |
| ; SSE-NEXT: movq -16(%rsp,%rax), %r9 |
| ; SSE-NEXT: shldq %cl, %rsi, %r9 |
| ; SSE-NEXT: movq -40(%rsp,%rax), %rax |
| ; SSE-NEXT: movq %rax, %rsi |
| ; SSE-NEXT: shlq %cl, %rsi |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; SSE-NEXT: shldq %cl, %rax, %rdx |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: movq %r9, 24(%rdi) |
| ; SSE-NEXT: movq %r8, 16(%rdi) |
| ; SSE-NEXT: movq %rdx, 8(%rdi) |
| ; SSE-NEXT: movq %rsi, (%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: shl_1_i256: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: movq %rsi, %rcx |
| ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [1,0,0,0] |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %ecx, %eax |
| ; AVX2-NEXT: shrb $3, %al |
| ; AVX2-NEXT: andb $24, %al |
| ; AVX2-NEXT: negb %al |
| ; AVX2-NEXT: movsbq %al, %rdx |
| ; AVX2-NEXT: movq -32(%rsp,%rdx), %rsi |
| ; AVX2-NEXT: movq -24(%rsp,%rdx), %rax |
| ; AVX2-NEXT: movq %rax, %r8 |
| ; AVX2-NEXT: shldq %cl, %rsi, %r8 |
| ; AVX2-NEXT: movq -16(%rsp,%rdx), %r9 |
| ; AVX2-NEXT: shldq %cl, %rax, %r9 |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: movq -40(%rsp,%rdx), %rdx |
| ; AVX2-NEXT: shlxq %rcx, %rdx, %rdi |
| ; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX2-NEXT: shldq %cl, %rdx, %rsi |
| ; AVX2-NEXT: movq %r9, 24(%rax) |
| ; AVX2-NEXT: movq %r8, 16(%rax) |
| ; AVX2-NEXT: movq %rsi, 8(%rax) |
| ; AVX2-NEXT: movq %rdi, (%rax) |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: shl_1_i256: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: movq %rsi, %rcx |
| ; AVX512F-NEXT: vmovaps {{.*#+}} zmm0 = [0,0,0,0,1,0,0,0] |
| ; AVX512F-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movl %ecx, %eax |
| ; AVX512F-NEXT: shrb $3, %al |
| ; AVX512F-NEXT: andb $24, %al |
| ; AVX512F-NEXT: negb %al |
| ; AVX512F-NEXT: movsbq %al, %rdx |
| ; AVX512F-NEXT: movq -32(%rsp,%rdx), %rsi |
| ; AVX512F-NEXT: movq -24(%rsp,%rdx), %rax |
| ; AVX512F-NEXT: movq %rax, %r8 |
| ; AVX512F-NEXT: shldq %cl, %rsi, %r8 |
| ; AVX512F-NEXT: movq -16(%rsp,%rdx), %r9 |
| ; AVX512F-NEXT: shldq %cl, %rax, %r9 |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: movq -40(%rsp,%rdx), %rdx |
| ; AVX512F-NEXT: shlxq %rcx, %rdx, %rdi |
| ; AVX512F-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX512F-NEXT: shldq %cl, %rdx, %rsi |
| ; AVX512F-NEXT: movq %r9, 24(%rax) |
| ; AVX512F-NEXT: movq %r8, 16(%rax) |
| ; AVX512F-NEXT: movq %rsi, 8(%rax) |
| ; AVX512F-NEXT: movq %rdi, (%rax) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: shl_1_i256: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: movq %rsi, %rcx |
| ; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: vmovaps {{.*#+}} xmm0 = [1,0,0,0] |
| ; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movl %ecx, %eax |
| ; AVX512VL-NEXT: shrb $3, %al |
| ; AVX512VL-NEXT: andb $24, %al |
| ; AVX512VL-NEXT: negb %al |
| ; AVX512VL-NEXT: movsbq %al, %rax |
| ; AVX512VL-NEXT: movq -32(%rsp,%rax), %rdx |
| ; AVX512VL-NEXT: movq -24(%rsp,%rax), %rsi |
| ; AVX512VL-NEXT: movq %rsi, %r8 |
| ; AVX512VL-NEXT: shldq %cl, %rdx, %r8 |
| ; AVX512VL-NEXT: movq -16(%rsp,%rax), %r9 |
| ; AVX512VL-NEXT: shldq %cl, %rsi, %r9 |
| ; AVX512VL-NEXT: movq -40(%rsp,%rax), %rsi |
| ; AVX512VL-NEXT: shldq %cl, %rsi, %rdx |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: shlxq %rcx, %rsi, %rcx |
| ; AVX512VL-NEXT: movq %r9, 24(%rdi) |
| ; AVX512VL-NEXT: movq %r8, 16(%rdi) |
| ; AVX512VL-NEXT: movq %rdx, 8(%rdi) |
| ; AVX512VL-NEXT: movq %rcx, (%rdi) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: shl_1_i256: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: movq %rsi, %rcx |
| ; AVX512VBMI-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: vmovaps {{.*#+}} xmm0 = [1,0,0,0] |
| ; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movl %ecx, %eax |
| ; AVX512VBMI-NEXT: shrb $3, %al |
| ; AVX512VBMI-NEXT: andb $24, %al |
| ; AVX512VBMI-NEXT: negb %al |
| ; AVX512VBMI-NEXT: movsbq %al, %rax |
| ; AVX512VBMI-NEXT: movq -32(%rsp,%rax), %rdx |
| ; AVX512VBMI-NEXT: movq -24(%rsp,%rax), %rsi |
| ; AVX512VBMI-NEXT: movq %rsi, %r8 |
| ; AVX512VBMI-NEXT: shldq %cl, %rdx, %r8 |
| ; AVX512VBMI-NEXT: movq -16(%rsp,%rax), %r9 |
| ; AVX512VBMI-NEXT: shldq %cl, %rsi, %r9 |
| ; AVX512VBMI-NEXT: movq -40(%rsp,%rax), %rsi |
| ; AVX512VBMI-NEXT: shldq %cl, %rsi, %rdx |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: shlxq %rcx, %rsi, %rcx |
| ; AVX512VBMI-NEXT: movq %r9, 24(%rdi) |
| ; AVX512VBMI-NEXT: movq %r8, 16(%rdi) |
| ; AVX512VBMI-NEXT: movq %rdx, 8(%rdi) |
| ; AVX512VBMI-NEXT: movq %rcx, (%rdi) |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| ; |
| ; X86-LABEL: shl_1_i256: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: movl %esp, %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: andl $-16, %esp |
| ; X86-NEXT: subl $112, %esp |
| ; X86-NEXT: movl 12(%ebp), %ecx |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: shrb $3, %al |
| ; X86-NEXT: andb $28, %al |
| ; X86-NEXT: negb %al |
| ; X86-NEXT: movsbl %al, %eax |
| ; X86-NEXT: movl 68(%esp,%eax), %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 72(%esp,%eax), %edx |
| ; X86-NEXT: movl %edx, %edi |
| ; X86-NEXT: shldl %cl, %esi, %edi |
| ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 76(%esp,%eax), %esi |
| ; X86-NEXT: movl %esi, %edi |
| ; X86-NEXT: shldl %cl, %edx, %edi |
| ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 80(%esp,%eax), %edx |
| ; X86-NEXT: movl %edx, %edi |
| ; X86-NEXT: shldl %cl, %esi, %edi |
| ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 84(%esp,%eax), %esi |
| ; X86-NEXT: movl %esi, %ebx |
| ; X86-NEXT: shldl %cl, %edx, %ebx |
| ; X86-NEXT: movl 88(%esp,%eax), %edi |
| ; X86-NEXT: movl %edi, %edx |
| ; X86-NEXT: shldl %cl, %esi, %edx |
| ; X86-NEXT: movl 64(%esp,%eax), %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 92(%esp,%eax), %esi |
| ; X86-NEXT: shldl %cl, %edi, %esi |
| ; X86-NEXT: movl 8(%ebp), %eax |
| ; X86-NEXT: movl %esi, 28(%eax) |
| ; X86-NEXT: movl %edx, 24(%eax) |
| ; X86-NEXT: movl %ebx, 20(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: movl %edx, 16(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: movl %edx, 12(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: movl %edx, 8(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload |
| ; X86-NEXT: movl %edi, %edx |
| ; X86-NEXT: shll %cl, %edx |
| ; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| ; X86-NEXT: shldl %cl, %edi, %esi |
| ; X86-NEXT: movl %esi, 4(%eax) |
| ; X86-NEXT: movl %edx, (%eax) |
| ; X86-NEXT: leal -12(%ebp), %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl $4 |
| %r = shl i256 1, %a0 |
| ret i256 %r |
| } |
| |
| define i256 @lshr_signbit_i256(i256 %a0) nounwind { |
| ; SSE-LABEL: lshr_signbit_i256: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movq %rsi, %rcx |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 |
| ; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %ecx, %eax |
| ; SSE-NEXT: shrb $6, %al |
| ; SSE-NEXT: movzbl %al, %eax |
| ; SSE-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; SSE-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; SSE-NEXT: movq %rsi, %r8 |
| ; SSE-NEXT: shrdq %cl, %rdx, %r8 |
| ; SSE-NEXT: movq -48(%rsp,%rax,8), %r9 |
| ; SSE-NEXT: shrdq %cl, %r9, %rdx |
| ; SSE-NEXT: movq -72(%rsp,%rax,8), %r10 |
| ; SSE-NEXT: shrdq %cl, %rsi, %r10 |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; SSE-NEXT: shrq %cl, %r9 |
| ; SSE-NEXT: movq %r9, 24(%rdi) |
| ; SSE-NEXT: movq %rdx, 16(%rdi) |
| ; SSE-NEXT: movq %r8, 8(%rdi) |
| ; SSE-NEXT: movq %r10, (%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: lshr_signbit_i256: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rsi, %rcx |
| ; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,9223372036854775808] |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %ecx, %eax |
| ; AVX2-NEXT: shrb $6, %al |
| ; AVX2-NEXT: movzbl %al, %eax |
| ; AVX2-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX2-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX2-NEXT: movq %rsi, %r8 |
| ; AVX2-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX2-NEXT: movq -72(%rsp,%rax,8), %r9 |
| ; AVX2-NEXT: movq -48(%rsp,%rax,8), %r10 |
| ; AVX2-NEXT: shrdq %cl, %r10, %rdx |
| ; AVX2-NEXT: shrdq %cl, %rsi, %r9 |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: shrxq %rcx, %r10, %rcx |
| ; AVX2-NEXT: movq %rcx, 24(%rdi) |
| ; AVX2-NEXT: movq %rdx, 16(%rdi) |
| ; AVX2-NEXT: movq %r8, 8(%rdi) |
| ; AVX2-NEXT: movq %r9, (%rdi) |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: lshr_signbit_i256: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: movq %rsi, %rcx |
| ; AVX512F-NEXT: vmovaps {{.*#+}} zmm0 = [0,0,0,9223372036854775808,0,0,0,0] |
| ; AVX512F-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movl %ecx, %eax |
| ; AVX512F-NEXT: shrb $6, %al |
| ; AVX512F-NEXT: movzbl %al, %eax |
| ; AVX512F-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX512F-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX512F-NEXT: movq %rsi, %r8 |
| ; AVX512F-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX512F-NEXT: movq -72(%rsp,%rax,8), %r9 |
| ; AVX512F-NEXT: movq -48(%rsp,%rax,8), %r10 |
| ; AVX512F-NEXT: shrdq %cl, %r10, %rdx |
| ; AVX512F-NEXT: shrdq %cl, %rsi, %r9 |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: shrxq %rcx, %r10, %rcx |
| ; AVX512F-NEXT: movq %rcx, 24(%rdi) |
| ; AVX512F-NEXT: movq %rdx, 16(%rdi) |
| ; AVX512F-NEXT: movq %r8, 8(%rdi) |
| ; AVX512F-NEXT: movq %r9, (%rdi) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: lshr_signbit_i256: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: movq %rsi, %rcx |
| ; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,9223372036854775808] |
| ; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movl %ecx, %eax |
| ; AVX512VL-NEXT: shrb $6, %al |
| ; AVX512VL-NEXT: movzbl %al, %eax |
| ; AVX512VL-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX512VL-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX512VL-NEXT: movq %rsi, %r8 |
| ; AVX512VL-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX512VL-NEXT: movq -48(%rsp,%rax,8), %r9 |
| ; AVX512VL-NEXT: shrdq %cl, %r9, %rdx |
| ; AVX512VL-NEXT: movq -72(%rsp,%rax,8), %r10 |
| ; AVX512VL-NEXT: shrdq %cl, %rsi, %r10 |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: shrxq %rcx, %r9, %rcx |
| ; AVX512VL-NEXT: movq %rcx, 24(%rdi) |
| ; AVX512VL-NEXT: movq %rdx, 16(%rdi) |
| ; AVX512VL-NEXT: movq %r8, 8(%rdi) |
| ; AVX512VL-NEXT: movq %r10, (%rdi) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: lshr_signbit_i256: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: movq %rsi, %rcx |
| ; AVX512VBMI-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,9223372036854775808] |
| ; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movl %ecx, %eax |
| ; AVX512VBMI-NEXT: shrb $6, %al |
| ; AVX512VBMI-NEXT: movzbl %al, %eax |
| ; AVX512VBMI-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX512VBMI-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX512VBMI-NEXT: movq %rsi, %r8 |
| ; AVX512VBMI-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX512VBMI-NEXT: movq -48(%rsp,%rax,8), %r9 |
| ; AVX512VBMI-NEXT: shrdq %cl, %r9, %rdx |
| ; AVX512VBMI-NEXT: movq -72(%rsp,%rax,8), %r10 |
| ; AVX512VBMI-NEXT: shrdq %cl, %rsi, %r10 |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: shrxq %rcx, %r9, %rcx |
| ; AVX512VBMI-NEXT: movq %rcx, 24(%rdi) |
| ; AVX512VBMI-NEXT: movq %rdx, 16(%rdi) |
| ; AVX512VBMI-NEXT: movq %r8, 8(%rdi) |
| ; AVX512VBMI-NEXT: movq %r10, (%rdi) |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| ; |
| ; X86-LABEL: lshr_signbit_i256: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: movl %esp, %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: andl $-16, %esp |
| ; X86-NEXT: subl $112, %esp |
| ; X86-NEXT: movl 12(%ebp), %ecx |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-2147483648, {{[0-9]+}}(%esp) # imm = 0x80000000 |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: shrb $5, %al |
| ; X86-NEXT: movzbl %al, %eax |
| ; X86-NEXT: movl 40(%esp,%eax,4), %edx |
| ; X86-NEXT: movl 36(%esp,%eax,4), %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: shrdl %cl, %edx, %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 44(%esp,%eax,4), %esi |
| ; X86-NEXT: shrdl %cl, %esi, %edx |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 48(%esp,%eax,4), %ebx |
| ; X86-NEXT: shrdl %cl, %ebx, %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 52(%esp,%eax,4), %edi |
| ; X86-NEXT: shrdl %cl, %edi, %ebx |
| ; X86-NEXT: movl 56(%esp,%eax,4), %esi |
| ; X86-NEXT: shrdl %cl, %esi, %edi |
| ; X86-NEXT: movl 32(%esp,%eax,4), %edx |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 60(%esp,%eax,4), %edx |
| ; X86-NEXT: shrdl %cl, %edx, %esi |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill |
| ; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; X86-NEXT: shrl %cl, %edx |
| ; X86-NEXT: movl 8(%ebp), %eax |
| ; X86-NEXT: movl %edx, 28(%eax) |
| ; X86-NEXT: movl %esi, 24(%eax) |
| ; X86-NEXT: movl %edi, 20(%eax) |
| ; X86-NEXT: movl %ebx, 16(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, 12(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, 8(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, 4(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, (%eax) |
| ; X86-NEXT: leal -12(%ebp), %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl $4 |
| %s = shl i256 1, 255 |
| %r = lshr i256 %s, %a0 |
| ret i256 %r |
| } |
| |
| define i256 @ashr_signbit_i256(i256 %a0) nounwind { |
| ; SSE-LABEL: ashr_signbit_i256: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movq %rsi, %rcx |
| ; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 |
| ; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %ecx, %eax |
| ; SSE-NEXT: shrb $6, %al |
| ; SSE-NEXT: movzbl %al, %eax |
| ; SSE-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; SSE-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; SSE-NEXT: movq %rsi, %r8 |
| ; SSE-NEXT: shrdq %cl, %rdx, %r8 |
| ; SSE-NEXT: movq -48(%rsp,%rax,8), %r9 |
| ; SSE-NEXT: shrdq %cl, %r9, %rdx |
| ; SSE-NEXT: movq -72(%rsp,%rax,8), %r10 |
| ; SSE-NEXT: shrdq %cl, %rsi, %r10 |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; SSE-NEXT: sarq %cl, %r9 |
| ; SSE-NEXT: movq %r9, 24(%rdi) |
| ; SSE-NEXT: movq %rdx, 16(%rdi) |
| ; SSE-NEXT: movq %r8, 8(%rdi) |
| ; SSE-NEXT: movq %r10, (%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: ashr_signbit_i256: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 |
| ; AVX2-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rsi, %rcx |
| ; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,9223372036854775808] |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %ecx, %eax |
| ; AVX2-NEXT: shrb $6, %al |
| ; AVX2-NEXT: movzbl %al, %eax |
| ; AVX2-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX2-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX2-NEXT: movq %rsi, %r8 |
| ; AVX2-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX2-NEXT: movq -72(%rsp,%rax,8), %r9 |
| ; AVX2-NEXT: movq -48(%rsp,%rax,8), %r10 |
| ; AVX2-NEXT: shrdq %cl, %r10, %rdx |
| ; AVX2-NEXT: shrdq %cl, %rsi, %r9 |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: sarxq %rcx, %r10, %rcx |
| ; AVX2-NEXT: movq %rcx, 24(%rdi) |
| ; AVX2-NEXT: movq %rdx, 16(%rdi) |
| ; AVX2-NEXT: movq %r8, 8(%rdi) |
| ; AVX2-NEXT: movq %r9, (%rdi) |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: ashr_signbit_i256: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: movq %rsi, %rcx |
| ; AVX512F-NEXT: vmovaps {{.*#+}} zmm0 = [0,0,0,9223372036854775808,18446744073709551615,18446744073709551615,18446744073709551615,18446744073709551615] |
| ; AVX512F-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movl %ecx, %eax |
| ; AVX512F-NEXT: shrb $6, %al |
| ; AVX512F-NEXT: movzbl %al, %eax |
| ; AVX512F-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX512F-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX512F-NEXT: movq %rsi, %r8 |
| ; AVX512F-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX512F-NEXT: movq -72(%rsp,%rax,8), %r9 |
| ; AVX512F-NEXT: movq -48(%rsp,%rax,8), %r10 |
| ; AVX512F-NEXT: shrdq %cl, %r10, %rdx |
| ; AVX512F-NEXT: shrdq %cl, %rsi, %r9 |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: sarxq %rcx, %r10, %rcx |
| ; AVX512F-NEXT: movq %rcx, 24(%rdi) |
| ; AVX512F-NEXT: movq %rdx, 16(%rdi) |
| ; AVX512F-NEXT: movq %r8, 8(%rdi) |
| ; AVX512F-NEXT: movq %r9, (%rdi) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: ashr_signbit_i256: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: movq %rsi, %rcx |
| ; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 |
| ; AVX512VL-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,9223372036854775808] |
| ; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movl %ecx, %eax |
| ; AVX512VL-NEXT: shrb $6, %al |
| ; AVX512VL-NEXT: movzbl %al, %eax |
| ; AVX512VL-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX512VL-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX512VL-NEXT: movq %rsi, %r8 |
| ; AVX512VL-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX512VL-NEXT: movq -48(%rsp,%rax,8), %r9 |
| ; AVX512VL-NEXT: shrdq %cl, %r9, %rdx |
| ; AVX512VL-NEXT: movq -72(%rsp,%rax,8), %r10 |
| ; AVX512VL-NEXT: shrdq %cl, %rsi, %r10 |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: sarxq %rcx, %r9, %rcx |
| ; AVX512VL-NEXT: movq %rcx, 24(%rdi) |
| ; AVX512VL-NEXT: movq %rdx, 16(%rdi) |
| ; AVX512VL-NEXT: movq %r8, 8(%rdi) |
| ; AVX512VL-NEXT: movq %r10, (%rdi) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: ashr_signbit_i256: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: movq %rsi, %rcx |
| ; AVX512VBMI-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 |
| ; AVX512VBMI-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,9223372036854775808] |
| ; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movl %ecx, %eax |
| ; AVX512VBMI-NEXT: shrb $6, %al |
| ; AVX512VBMI-NEXT: movzbl %al, %eax |
| ; AVX512VBMI-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX512VBMI-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX512VBMI-NEXT: movq %rsi, %r8 |
| ; AVX512VBMI-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX512VBMI-NEXT: movq -48(%rsp,%rax,8), %r9 |
| ; AVX512VBMI-NEXT: shrdq %cl, %r9, %rdx |
| ; AVX512VBMI-NEXT: movq -72(%rsp,%rax,8), %r10 |
| ; AVX512VBMI-NEXT: shrdq %cl, %rsi, %r10 |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: sarxq %rcx, %r9, %rcx |
| ; AVX512VBMI-NEXT: movq %rcx, 24(%rdi) |
| ; AVX512VBMI-NEXT: movq %rdx, 16(%rdi) |
| ; AVX512VBMI-NEXT: movq %r8, 8(%rdi) |
| ; AVX512VBMI-NEXT: movq %r10, (%rdi) |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| ; |
| ; X86-LABEL: ashr_signbit_i256: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: movl %esp, %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: andl $-16, %esp |
| ; X86-NEXT: subl $112, %esp |
| ; X86-NEXT: movl 12(%ebp), %ecx |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-2147483648, {{[0-9]+}}(%esp) # imm = 0x80000000 |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: shrb $5, %al |
| ; X86-NEXT: movzbl %al, %eax |
| ; X86-NEXT: movl 40(%esp,%eax,4), %edx |
| ; X86-NEXT: movl 36(%esp,%eax,4), %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: shrdl %cl, %edx, %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 44(%esp,%eax,4), %esi |
| ; X86-NEXT: shrdl %cl, %esi, %edx |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 48(%esp,%eax,4), %ebx |
| ; X86-NEXT: shrdl %cl, %ebx, %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 52(%esp,%eax,4), %edi |
| ; X86-NEXT: shrdl %cl, %edi, %ebx |
| ; X86-NEXT: movl 56(%esp,%eax,4), %esi |
| ; X86-NEXT: shrdl %cl, %esi, %edi |
| ; X86-NEXT: movl 32(%esp,%eax,4), %edx |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 60(%esp,%eax,4), %edx |
| ; X86-NEXT: shrdl %cl, %edx, %esi |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill |
| ; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; X86-NEXT: sarl %cl, %edx |
| ; X86-NEXT: movl 8(%ebp), %eax |
| ; X86-NEXT: movl %edx, 28(%eax) |
| ; X86-NEXT: movl %esi, 24(%eax) |
| ; X86-NEXT: movl %edi, 20(%eax) |
| ; X86-NEXT: movl %ebx, 16(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, 12(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, 8(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, 4(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, (%eax) |
| ; X86-NEXT: leal -12(%ebp), %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl $4 |
| %s = shl i256 1, 255 |
| %r = ashr i256 %s, %a0 |
| ret i256 %r |
| } |
| |
| define i256 @shl_allbits_i256(i256 %a0) nounwind { |
| ; SSE-LABEL: shl_allbits_i256: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movq %rsi, %rcx |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %ecx, %eax |
| ; SSE-NEXT: shrb $3, %al |
| ; SSE-NEXT: andb $24, %al |
| ; SSE-NEXT: negb %al |
| ; SSE-NEXT: movsbq %al, %rax |
| ; SSE-NEXT: movq -32(%rsp,%rax), %rdx |
| ; SSE-NEXT: movq -24(%rsp,%rax), %rsi |
| ; SSE-NEXT: movq %rsi, %r8 |
| ; SSE-NEXT: shldq %cl, %rdx, %r8 |
| ; SSE-NEXT: movq -16(%rsp,%rax), %r9 |
| ; SSE-NEXT: shldq %cl, %rsi, %r9 |
| ; SSE-NEXT: movq -40(%rsp,%rax), %rax |
| ; SSE-NEXT: movq %rax, %rsi |
| ; SSE-NEXT: shlq %cl, %rsi |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; SSE-NEXT: shldq %cl, %rax, %rdx |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: movq %r9, 24(%rdi) |
| ; SSE-NEXT: movq %r8, 16(%rdi) |
| ; SSE-NEXT: movq %rdx, 8(%rdi) |
| ; SSE-NEXT: movq %rsi, (%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: shl_allbits_i256: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: movq %rsi, %rcx |
| ; AVX2-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 |
| ; AVX2-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %ecx, %eax |
| ; AVX2-NEXT: shrb $3, %al |
| ; AVX2-NEXT: andb $24, %al |
| ; AVX2-NEXT: negb %al |
| ; AVX2-NEXT: movsbq %al, %rdx |
| ; AVX2-NEXT: movq -32(%rsp,%rdx), %rsi |
| ; AVX2-NEXT: movq -24(%rsp,%rdx), %rax |
| ; AVX2-NEXT: movq %rax, %r8 |
| ; AVX2-NEXT: shldq %cl, %rsi, %r8 |
| ; AVX2-NEXT: movq -16(%rsp,%rdx), %r9 |
| ; AVX2-NEXT: shldq %cl, %rax, %r9 |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: movq -40(%rsp,%rdx), %rdx |
| ; AVX2-NEXT: shlxq %rcx, %rdx, %rdi |
| ; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX2-NEXT: shldq %cl, %rdx, %rsi |
| ; AVX2-NEXT: movq %r9, 24(%rax) |
| ; AVX2-NEXT: movq %r8, 16(%rax) |
| ; AVX2-NEXT: movq %rsi, 8(%rax) |
| ; AVX2-NEXT: movq %rdi, (%rax) |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: shl_allbits_i256: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: movq %rsi, %rcx |
| ; AVX512F-NEXT: vmovaps {{.*#+}} zmm0 = [0,0,0,0,18446744073709551615,18446744073709551615,18446744073709551615,18446744073709551615] |
| ; AVX512F-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movl %ecx, %eax |
| ; AVX512F-NEXT: shrb $3, %al |
| ; AVX512F-NEXT: andb $24, %al |
| ; AVX512F-NEXT: negb %al |
| ; AVX512F-NEXT: movsbq %al, %rdx |
| ; AVX512F-NEXT: movq -32(%rsp,%rdx), %rsi |
| ; AVX512F-NEXT: movq -24(%rsp,%rdx), %rax |
| ; AVX512F-NEXT: movq %rax, %r8 |
| ; AVX512F-NEXT: shldq %cl, %rsi, %r8 |
| ; AVX512F-NEXT: movq -16(%rsp,%rdx), %r9 |
| ; AVX512F-NEXT: shldq %cl, %rax, %r9 |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: movq -40(%rsp,%rdx), %rdx |
| ; AVX512F-NEXT: shlxq %rcx, %rdx, %rdi |
| ; AVX512F-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX512F-NEXT: shldq %cl, %rdx, %rsi |
| ; AVX512F-NEXT: movq %r9, 24(%rax) |
| ; AVX512F-NEXT: movq %r8, 16(%rax) |
| ; AVX512F-NEXT: movq %rsi, 8(%rax) |
| ; AVX512F-NEXT: movq %rdi, (%rax) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: shl_allbits_i256: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: movq %rsi, %rcx |
| ; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 |
| ; AVX512VL-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movl %ecx, %eax |
| ; AVX512VL-NEXT: shrb $3, %al |
| ; AVX512VL-NEXT: andb $24, %al |
| ; AVX512VL-NEXT: negb %al |
| ; AVX512VL-NEXT: movsbq %al, %rax |
| ; AVX512VL-NEXT: movq -32(%rsp,%rax), %rdx |
| ; AVX512VL-NEXT: movq -24(%rsp,%rax), %rsi |
| ; AVX512VL-NEXT: movq %rsi, %r8 |
| ; AVX512VL-NEXT: shldq %cl, %rdx, %r8 |
| ; AVX512VL-NEXT: movq -16(%rsp,%rax), %r9 |
| ; AVX512VL-NEXT: shldq %cl, %rsi, %r9 |
| ; AVX512VL-NEXT: movq -40(%rsp,%rax), %rsi |
| ; AVX512VL-NEXT: shldq %cl, %rsi, %rdx |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: shlxq %rcx, %rsi, %rcx |
| ; AVX512VL-NEXT: movq %r9, 24(%rdi) |
| ; AVX512VL-NEXT: movq %r8, 16(%rdi) |
| ; AVX512VL-NEXT: movq %rdx, 8(%rdi) |
| ; AVX512VL-NEXT: movq %rcx, (%rdi) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: shl_allbits_i256: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: movq %rsi, %rcx |
| ; AVX512VBMI-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 |
| ; AVX512VBMI-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: vpxor %xmm0, %xmm0, %xmm0 |
| ; AVX512VBMI-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movl %ecx, %eax |
| ; AVX512VBMI-NEXT: shrb $3, %al |
| ; AVX512VBMI-NEXT: andb $24, %al |
| ; AVX512VBMI-NEXT: negb %al |
| ; AVX512VBMI-NEXT: movsbq %al, %rax |
| ; AVX512VBMI-NEXT: movq -32(%rsp,%rax), %rdx |
| ; AVX512VBMI-NEXT: movq -24(%rsp,%rax), %rsi |
| ; AVX512VBMI-NEXT: movq %rsi, %r8 |
| ; AVX512VBMI-NEXT: shldq %cl, %rdx, %r8 |
| ; AVX512VBMI-NEXT: movq -16(%rsp,%rax), %r9 |
| ; AVX512VBMI-NEXT: shldq %cl, %rsi, %r9 |
| ; AVX512VBMI-NEXT: movq -40(%rsp,%rax), %rsi |
| ; AVX512VBMI-NEXT: shldq %cl, %rsi, %rdx |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: shlxq %rcx, %rsi, %rcx |
| ; AVX512VBMI-NEXT: movq %r9, 24(%rdi) |
| ; AVX512VBMI-NEXT: movq %r8, 16(%rdi) |
| ; AVX512VBMI-NEXT: movq %rdx, 8(%rdi) |
| ; AVX512VBMI-NEXT: movq %rcx, (%rdi) |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| ; |
| ; X86-LABEL: shl_allbits_i256: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: movl %esp, %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: andl $-16, %esp |
| ; X86-NEXT: subl $112, %esp |
| ; X86-NEXT: movl 12(%ebp), %ecx |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: shrb $3, %al |
| ; X86-NEXT: andb $28, %al |
| ; X86-NEXT: negb %al |
| ; X86-NEXT: movsbl %al, %eax |
| ; X86-NEXT: movl 68(%esp,%eax), %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 72(%esp,%eax), %edx |
| ; X86-NEXT: movl %edx, %edi |
| ; X86-NEXT: shldl %cl, %esi, %edi |
| ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 76(%esp,%eax), %esi |
| ; X86-NEXT: movl %esi, %edi |
| ; X86-NEXT: shldl %cl, %edx, %edi |
| ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 80(%esp,%eax), %edx |
| ; X86-NEXT: movl %edx, %edi |
| ; X86-NEXT: shldl %cl, %esi, %edi |
| ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 84(%esp,%eax), %esi |
| ; X86-NEXT: movl %esi, %ebx |
| ; X86-NEXT: shldl %cl, %edx, %ebx |
| ; X86-NEXT: movl 88(%esp,%eax), %edi |
| ; X86-NEXT: movl %edi, %edx |
| ; X86-NEXT: shldl %cl, %esi, %edx |
| ; X86-NEXT: movl 64(%esp,%eax), %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 92(%esp,%eax), %esi |
| ; X86-NEXT: shldl %cl, %edi, %esi |
| ; X86-NEXT: movl 8(%ebp), %eax |
| ; X86-NEXT: movl %esi, 28(%eax) |
| ; X86-NEXT: movl %edx, 24(%eax) |
| ; X86-NEXT: movl %ebx, 20(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: movl %edx, 16(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: movl %edx, 12(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: movl %edx, 8(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload |
| ; X86-NEXT: movl %edi, %edx |
| ; X86-NEXT: shll %cl, %edx |
| ; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| ; X86-NEXT: shldl %cl, %edi, %esi |
| ; X86-NEXT: movl %esi, 4(%eax) |
| ; X86-NEXT: movl %edx, (%eax) |
| ; X86-NEXT: leal -12(%ebp), %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl $4 |
| %r = shl i256 -1, %a0 |
| ret i256 %r |
| } |
| |
| define i256 @lshr_allbits_i256(i256 %a0) nounwind { |
| ; SSE-LABEL: lshr_allbits_i256: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movq %rsi, %rcx |
| ; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %ecx, %eax |
| ; SSE-NEXT: shrb $6, %al |
| ; SSE-NEXT: movzbl %al, %eax |
| ; SSE-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; SSE-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; SSE-NEXT: movq %rsi, %r8 |
| ; SSE-NEXT: shrdq %cl, %rdx, %r8 |
| ; SSE-NEXT: movq -48(%rsp,%rax,8), %r9 |
| ; SSE-NEXT: shrdq %cl, %r9, %rdx |
| ; SSE-NEXT: movq -72(%rsp,%rax,8), %r10 |
| ; SSE-NEXT: shrdq %cl, %rsi, %r10 |
| ; SSE-NEXT: movq %rdi, %rax |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; SSE-NEXT: shrq %cl, %r9 |
| ; SSE-NEXT: movq %r9, 24(%rdi) |
| ; SSE-NEXT: movq %rdx, 16(%rdi) |
| ; SSE-NEXT: movq %r8, 8(%rdi) |
| ; SSE-NEXT: movq %r10, (%rdi) |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: lshr_allbits_i256: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rsi, %rcx |
| ; AVX2-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 |
| ; AVX2-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %ecx, %eax |
| ; AVX2-NEXT: shrb $6, %al |
| ; AVX2-NEXT: movzbl %al, %eax |
| ; AVX2-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX2-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX2-NEXT: movq %rsi, %r8 |
| ; AVX2-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX2-NEXT: movq -72(%rsp,%rax,8), %r9 |
| ; AVX2-NEXT: movq -48(%rsp,%rax,8), %r10 |
| ; AVX2-NEXT: shrdq %cl, %r10, %rdx |
| ; AVX2-NEXT: shrdq %cl, %rsi, %r9 |
| ; AVX2-NEXT: movq %rdi, %rax |
| ; AVX2-NEXT: shrxq %rcx, %r10, %rcx |
| ; AVX2-NEXT: movq %rcx, 24(%rdi) |
| ; AVX2-NEXT: movq %rdx, 16(%rdi) |
| ; AVX2-NEXT: movq %r8, 8(%rdi) |
| ; AVX2-NEXT: movq %r9, (%rdi) |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: lshr_allbits_i256: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: movq %rsi, %rcx |
| ; AVX512F-NEXT: vmovaps {{.*#+}} zmm0 = [18446744073709551615,18446744073709551615,18446744073709551615,18446744073709551615,0,0,0,0] |
| ; AVX512F-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movl %ecx, %eax |
| ; AVX512F-NEXT: shrb $6, %al |
| ; AVX512F-NEXT: movzbl %al, %eax |
| ; AVX512F-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX512F-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX512F-NEXT: movq %rsi, %r8 |
| ; AVX512F-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX512F-NEXT: movq -72(%rsp,%rax,8), %r9 |
| ; AVX512F-NEXT: movq -48(%rsp,%rax,8), %r10 |
| ; AVX512F-NEXT: shrdq %cl, %r10, %rdx |
| ; AVX512F-NEXT: shrdq %cl, %rsi, %r9 |
| ; AVX512F-NEXT: movq %rdi, %rax |
| ; AVX512F-NEXT: shrxq %rcx, %r10, %rcx |
| ; AVX512F-NEXT: movq %rcx, 24(%rdi) |
| ; AVX512F-NEXT: movq %rdx, 16(%rdi) |
| ; AVX512F-NEXT: movq %r8, 8(%rdi) |
| ; AVX512F-NEXT: movq %r9, (%rdi) |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: lshr_allbits_i256: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rsi, %rcx |
| ; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 |
| ; AVX512VL-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movl %ecx, %eax |
| ; AVX512VL-NEXT: shrb $6, %al |
| ; AVX512VL-NEXT: movzbl %al, %eax |
| ; AVX512VL-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX512VL-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX512VL-NEXT: movq %rsi, %r8 |
| ; AVX512VL-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX512VL-NEXT: movq -48(%rsp,%rax,8), %r9 |
| ; AVX512VL-NEXT: shrdq %cl, %r9, %rdx |
| ; AVX512VL-NEXT: movq -72(%rsp,%rax,8), %r10 |
| ; AVX512VL-NEXT: shrdq %cl, %rsi, %r10 |
| ; AVX512VL-NEXT: movq %rdi, %rax |
| ; AVX512VL-NEXT: shrxq %rcx, %r9, %rcx |
| ; AVX512VL-NEXT: movq %rcx, 24(%rdi) |
| ; AVX512VL-NEXT: movq %rdx, 16(%rdi) |
| ; AVX512VL-NEXT: movq %r8, 8(%rdi) |
| ; AVX512VL-NEXT: movq %r10, (%rdi) |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: lshr_allbits_i256: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: vpxor %xmm0, %xmm0, %xmm0 |
| ; AVX512VBMI-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rsi, %rcx |
| ; AVX512VBMI-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 |
| ; AVX512VBMI-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movl %ecx, %eax |
| ; AVX512VBMI-NEXT: shrb $6, %al |
| ; AVX512VBMI-NEXT: movzbl %al, %eax |
| ; AVX512VBMI-NEXT: movq -56(%rsp,%rax,8), %rdx |
| ; AVX512VBMI-NEXT: movq -64(%rsp,%rax,8), %rsi |
| ; AVX512VBMI-NEXT: movq %rsi, %r8 |
| ; AVX512VBMI-NEXT: shrdq %cl, %rdx, %r8 |
| ; AVX512VBMI-NEXT: movq -48(%rsp,%rax,8), %r9 |
| ; AVX512VBMI-NEXT: shrdq %cl, %r9, %rdx |
| ; AVX512VBMI-NEXT: movq -72(%rsp,%rax,8), %r10 |
| ; AVX512VBMI-NEXT: shrdq %cl, %rsi, %r10 |
| ; AVX512VBMI-NEXT: movq %rdi, %rax |
| ; AVX512VBMI-NEXT: shrxq %rcx, %r9, %rcx |
| ; AVX512VBMI-NEXT: movq %rcx, 24(%rdi) |
| ; AVX512VBMI-NEXT: movq %rdx, 16(%rdi) |
| ; AVX512VBMI-NEXT: movq %r8, 8(%rdi) |
| ; AVX512VBMI-NEXT: movq %r10, (%rdi) |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| ; |
| ; X86-LABEL: lshr_allbits_i256: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: movl %esp, %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: andl $-16, %esp |
| ; X86-NEXT: subl $112, %esp |
| ; X86-NEXT: movl 12(%ebp), %ecx |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $-1, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: shrb $5, %al |
| ; X86-NEXT: movzbl %al, %eax |
| ; X86-NEXT: movl 40(%esp,%eax,4), %edx |
| ; X86-NEXT: movl 36(%esp,%eax,4), %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: shrdl %cl, %edx, %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 44(%esp,%eax,4), %esi |
| ; X86-NEXT: shrdl %cl, %esi, %edx |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 48(%esp,%eax,4), %ebx |
| ; X86-NEXT: shrdl %cl, %ebx, %esi |
| ; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 52(%esp,%eax,4), %edi |
| ; X86-NEXT: shrdl %cl, %edi, %ebx |
| ; X86-NEXT: movl 56(%esp,%eax,4), %esi |
| ; X86-NEXT: shrdl %cl, %esi, %edi |
| ; X86-NEXT: movl 32(%esp,%eax,4), %edx |
| ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 60(%esp,%eax,4), %edx |
| ; X86-NEXT: shrdl %cl, %edx, %esi |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill |
| ; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; X86-NEXT: shrl %cl, %edx |
| ; X86-NEXT: movl 8(%ebp), %eax |
| ; X86-NEXT: movl %edx, 28(%eax) |
| ; X86-NEXT: movl %esi, 24(%eax) |
| ; X86-NEXT: movl %edi, 20(%eax) |
| ; X86-NEXT: movl %ebx, 16(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, 12(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, 8(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, 4(%eax) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, (%eax) |
| ; X86-NEXT: leal -12(%ebp), %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl $4 |
| %r = lshr i256 -1, %a0 |
| ret i256 %r |
| } |
| |
| define i64 @lshr_extract_i256_i64(i256 %a0, i256 %a1) nounwind { |
| ; SSE-LABEL: lshr_extract_i256_i64: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: xorps %xmm0, %xmm0 |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %r8d, %eax |
| ; SSE-NEXT: shrb $6, %al |
| ; SSE-NEXT: movzbl %al, %ecx |
| ; SSE-NEXT: movq -72(%rsp,%rcx,8), %rax |
| ; SSE-NEXT: movq -64(%rsp,%rcx,8), %rdx |
| ; SSE-NEXT: movl %r8d, %ecx |
| ; SSE-NEXT: shrdq %cl, %rdx, %rax |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: lshr_extract_i256_i64: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %r8d, %eax |
| ; AVX2-NEXT: shrb $6, %al |
| ; AVX2-NEXT: movzbl %al, %ecx |
| ; AVX2-NEXT: movq -72(%rsp,%rcx,8), %rax |
| ; AVX2-NEXT: movq -64(%rsp,%rcx,8), %rdx |
| ; AVX2-NEXT: movl %r8d, %ecx |
| ; AVX2-NEXT: shrdq %cl, %rdx, %rax |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: lshr_extract_i256_i64: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512F-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movl %r8d, %eax |
| ; AVX512F-NEXT: shrb $6, %al |
| ; AVX512F-NEXT: movzbl %al, %ecx |
| ; AVX512F-NEXT: movq -72(%rsp,%rcx,8), %rax |
| ; AVX512F-NEXT: movq -64(%rsp,%rcx,8), %rdx |
| ; AVX512F-NEXT: movl %r8d, %ecx |
| ; AVX512F-NEXT: shrdq %cl, %rdx, %rax |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: lshr_extract_i256_i64: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movl %r8d, %eax |
| ; AVX512VL-NEXT: shrb $6, %al |
| ; AVX512VL-NEXT: movzbl %al, %ecx |
| ; AVX512VL-NEXT: movq -72(%rsp,%rcx,8), %rax |
| ; AVX512VL-NEXT: movq -64(%rsp,%rcx,8), %rdx |
| ; AVX512VL-NEXT: movl %r8d, %ecx |
| ; AVX512VL-NEXT: shrdq %cl, %rdx, %rax |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: lshr_extract_i256_i64: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: vxorps %xmm0, %xmm0, %xmm0 |
| ; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movl %r8d, %eax |
| ; AVX512VBMI-NEXT: shrb $6, %al |
| ; AVX512VBMI-NEXT: movzbl %al, %ecx |
| ; AVX512VBMI-NEXT: movq -72(%rsp,%rcx,8), %rax |
| ; AVX512VBMI-NEXT: movq -64(%rsp,%rcx,8), %rdx |
| ; AVX512VBMI-NEXT: movl %r8d, %ecx |
| ; AVX512VBMI-NEXT: shrdq %cl, %rdx, %rax |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| ; |
| ; X86-LABEL: lshr_extract_i256_i64: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: movl %esp, %ebp |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: andl $-16, %esp |
| ; X86-NEXT: subl $64, %esp |
| ; X86-NEXT: movl 40(%ebp), %ecx |
| ; X86-NEXT: movl 8(%ebp), %eax |
| ; X86-NEXT: movl 12(%ebp), %edx |
| ; X86-NEXT: movl 16(%ebp), %esi |
| ; X86-NEXT: movl 36(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 32(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 28(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 24(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 20(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, (%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: shrb $5, %al |
| ; X86-NEXT: movzbl %al, %edx |
| ; X86-NEXT: movl 8(%esp,%edx,4), %esi |
| ; X86-NEXT: movl (%esp,%edx,4), %eax |
| ; X86-NEXT: movl 4(%esp,%edx,4), %edi |
| ; X86-NEXT: movl %edi, %edx |
| ; X86-NEXT: shrdl %cl, %esi, %edx |
| ; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; X86-NEXT: shrdl %cl, %edi, %eax |
| ; X86-NEXT: leal -8(%ebp), %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl |
| %b = lshr i256 %a0, %a1 |
| %r = trunc i256 %b to i64 |
| ret i64 %r |
| } |
| |
| define i64 @ashr_extract_i256_i64(i256 %a0, i256 %a1) nounwind { |
| ; CHECK-LABEL: ashr_extract_i256_i64: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: sarq $63, %rcx |
| ; CHECK-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; CHECK-NEXT: movl %r8d, %eax |
| ; CHECK-NEXT: shrb $6, %al |
| ; CHECK-NEXT: movzbl %al, %ecx |
| ; CHECK-NEXT: movq -72(%rsp,%rcx,8), %rax |
| ; CHECK-NEXT: movq -64(%rsp,%rcx,8), %rdx |
| ; CHECK-NEXT: movl %r8d, %ecx |
| ; CHECK-NEXT: shrdq %cl, %rdx, %rax |
| ; CHECK-NEXT: retq |
| ; |
| ; X86-LABEL: ashr_extract_i256_i64: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: movl %esp, %ebp |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: andl $-16, %esp |
| ; X86-NEXT: subl $64, %esp |
| ; X86-NEXT: movl 40(%ebp), %ecx |
| ; X86-NEXT: movl 8(%ebp), %eax |
| ; X86-NEXT: movl 12(%ebp), %edx |
| ; X86-NEXT: movl 16(%ebp), %esi |
| ; X86-NEXT: movl 32(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 28(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 24(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 20(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 36(%ebp), %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, (%esp) |
| ; X86-NEXT: sarl $31, %edi |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: shrb $5, %al |
| ; X86-NEXT: movzbl %al, %edx |
| ; X86-NEXT: movl 8(%esp,%edx,4), %esi |
| ; X86-NEXT: movl (%esp,%edx,4), %eax |
| ; X86-NEXT: movl 4(%esp,%edx,4), %edi |
| ; X86-NEXT: movl %edi, %edx |
| ; X86-NEXT: shrdl %cl, %esi, %edx |
| ; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; X86-NEXT: shrdl %cl, %edi, %eax |
| ; X86-NEXT: leal -8(%ebp), %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl |
| %b = ashr i256 %a0, %a1 |
| %r = trunc i256 %b to i64 |
| ret i64 %r |
| } |
| |
| define i64 @lshr_extract_load_i256_i64(ptr %p0, i256 %a1) nounwind { |
| ; SSE-LABEL: lshr_extract_load_i256_i64: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movq %rsi, %rcx |
| ; SSE-NEXT: movaps (%rdi), %xmm0 |
| ; SSE-NEXT: movaps 16(%rdi), %xmm1 |
| ; SSE-NEXT: xorps %xmm2, %xmm2 |
| ; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %ecx, %eax |
| ; SSE-NEXT: shrb $6, %al |
| ; SSE-NEXT: movzbl %al, %edx |
| ; SSE-NEXT: movq -72(%rsp,%rdx,8), %rax |
| ; SSE-NEXT: movq -64(%rsp,%rdx,8), %rdx |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; SSE-NEXT: shrdq %cl, %rdx, %rax |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: lshr_extract_load_i256_i64: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: movq %rsi, %rcx |
| ; AVX2-NEXT: vmovups (%rdi), %ymm0 |
| ; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1 |
| ; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %ecx, %eax |
| ; AVX2-NEXT: shrb $6, %al |
| ; AVX2-NEXT: movzbl %al, %edx |
| ; AVX2-NEXT: movq -72(%rsp,%rdx,8), %rax |
| ; AVX2-NEXT: movq -64(%rsp,%rdx,8), %rdx |
| ; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX2-NEXT: shrdq %cl, %rdx, %rax |
| ; AVX2-NEXT: vzeroupper |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512F-LABEL: lshr_extract_load_i256_i64: |
| ; AVX512F: # %bb.0: |
| ; AVX512F-NEXT: movq %rsi, %rcx |
| ; AVX512F-NEXT: vmovups (%rdi), %ymm0 |
| ; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 |
| ; AVX512F-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512F-NEXT: movl %ecx, %eax |
| ; AVX512F-NEXT: shrb $6, %al |
| ; AVX512F-NEXT: movzbl %al, %edx |
| ; AVX512F-NEXT: movq -72(%rsp,%rdx,8), %rax |
| ; AVX512F-NEXT: movq -64(%rsp,%rdx,8), %rdx |
| ; AVX512F-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX512F-NEXT: shrdq %cl, %rdx, %rax |
| ; AVX512F-NEXT: retq |
| ; |
| ; AVX512VL-LABEL: lshr_extract_load_i256_i64: |
| ; AVX512VL: # %bb.0: |
| ; AVX512VL-NEXT: movq %rsi, %rcx |
| ; AVX512VL-NEXT: vmovups (%rdi), %ymm0 |
| ; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1 |
| ; AVX512VL-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VL-NEXT: movl %ecx, %eax |
| ; AVX512VL-NEXT: shrb $6, %al |
| ; AVX512VL-NEXT: movzbl %al, %edx |
| ; AVX512VL-NEXT: movq -72(%rsp,%rdx,8), %rax |
| ; AVX512VL-NEXT: movq -64(%rsp,%rdx,8), %rdx |
| ; AVX512VL-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX512VL-NEXT: shrdq %cl, %rdx, %rax |
| ; AVX512VL-NEXT: vzeroupper |
| ; AVX512VL-NEXT: retq |
| ; |
| ; AVX512VBMI-LABEL: lshr_extract_load_i256_i64: |
| ; AVX512VBMI: # %bb.0: |
| ; AVX512VBMI-NEXT: movq %rsi, %rcx |
| ; AVX512VBMI-NEXT: vmovups (%rdi), %ymm0 |
| ; AVX512VBMI-NEXT: vxorps %xmm1, %xmm1, %xmm1 |
| ; AVX512VBMI-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp) |
| ; AVX512VBMI-NEXT: movl %ecx, %eax |
| ; AVX512VBMI-NEXT: shrb $6, %al |
| ; AVX512VBMI-NEXT: movzbl %al, %edx |
| ; AVX512VBMI-NEXT: movq -72(%rsp,%rdx,8), %rax |
| ; AVX512VBMI-NEXT: movq -64(%rsp,%rdx,8), %rdx |
| ; AVX512VBMI-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX512VBMI-NEXT: shrdq %cl, %rdx, %rax |
| ; AVX512VBMI-NEXT: vzeroupper |
| ; AVX512VBMI-NEXT: retq |
| ; |
| ; X86-LABEL: lshr_extract_load_i256_i64: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: movl %esp, %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: andl $-16, %esp |
| ; X86-NEXT: subl $96, %esp |
| ; X86-NEXT: movl 8(%ebp), %ecx |
| ; X86-NEXT: movl (%ecx), %eax |
| ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 4(%ecx), %eax |
| ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 8(%ecx), %esi |
| ; X86-NEXT: movl 12(%ecx), %edi |
| ; X86-NEXT: movl 16(%ecx), %ebx |
| ; X86-NEXT: movl 20(%ecx), %edx |
| ; X86-NEXT: movl 24(%ecx), %eax |
| ; X86-NEXT: movl 28(%ecx), %ecx |
| ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 12(%ebp), %ecx |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: shrb $5, %al |
| ; X86-NEXT: movzbl %al, %edx |
| ; X86-NEXT: movl 24(%esp,%edx,4), %esi |
| ; X86-NEXT: movl 16(%esp,%edx,4), %eax |
| ; X86-NEXT: movl 20(%esp,%edx,4), %edi |
| ; X86-NEXT: movl %edi, %edx |
| ; X86-NEXT: shrdl %cl, %esi, %edx |
| ; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; X86-NEXT: shrdl %cl, %edi, %eax |
| ; X86-NEXT: leal -12(%ebp), %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl |
| %a0 = load i256, ptr %p0 |
| %b = lshr i256 %a0, %a1 |
| %r = trunc i256 %b to i64 |
| ret i64 %r |
| } |
| |
| define i64 @ashr_extract_load_i256_i64(ptr %p0, i256 %a1) nounwind { |
| ; SSE-LABEL: ashr_extract_load_i256_i64: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movq %rsi, %rcx |
| ; SSE-NEXT: movaps (%rdi), %xmm0 |
| ; SSE-NEXT: movq 16(%rdi), %rax |
| ; SSE-NEXT: movq 24(%rdi), %rdx |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: sarq $63, %rdx |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movl %ecx, %eax |
| ; SSE-NEXT: shrb $6, %al |
| ; SSE-NEXT: movzbl %al, %edx |
| ; SSE-NEXT: movq -72(%rsp,%rdx,8), %rax |
| ; SSE-NEXT: movq -64(%rsp,%rdx,8), %rdx |
| ; SSE-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; SSE-NEXT: shrdq %cl, %rdx, %rax |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: ashr_extract_load_i256_i64: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: movq %rsi, %rcx |
| ; AVX2-NEXT: vmovaps (%rdi), %xmm0 |
| ; AVX2-NEXT: movq 16(%rdi), %rax |
| ; AVX2-NEXT: movq 24(%rdi), %rdx |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: sarq $63, %rdx |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movl %ecx, %eax |
| ; AVX2-NEXT: shrb $6, %al |
| ; AVX2-NEXT: movzbl %al, %edx |
| ; AVX2-NEXT: movq -72(%rsp,%rdx,8), %rax |
| ; AVX2-NEXT: movq -64(%rsp,%rdx,8), %rdx |
| ; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX2-NEXT: shrdq %cl, %rdx, %rax |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512-LABEL: ashr_extract_load_i256_i64: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: movq %rsi, %rcx |
| ; AVX512-NEXT: vmovaps (%rdi), %xmm0 |
| ; AVX512-NEXT: movq 16(%rdi), %rax |
| ; AVX512-NEXT: movq 24(%rdi), %rdx |
| ; AVX512-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; AVX512-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512-NEXT: sarq $63, %rdx |
| ; AVX512-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) |
| ; AVX512-NEXT: movl %ecx, %eax |
| ; AVX512-NEXT: shrb $6, %al |
| ; AVX512-NEXT: movzbl %al, %edx |
| ; AVX512-NEXT: movq -72(%rsp,%rdx,8), %rax |
| ; AVX512-NEXT: movq -64(%rsp,%rdx,8), %rdx |
| ; AVX512-NEXT: # kill: def $cl killed $cl killed $rcx |
| ; AVX512-NEXT: shrdq %cl, %rdx, %rax |
| ; AVX512-NEXT: retq |
| ; |
| ; X86-LABEL: ashr_extract_load_i256_i64: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: movl %esp, %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: andl $-16, %esp |
| ; X86-NEXT: subl $96, %esp |
| ; X86-NEXT: movl 8(%ebp), %eax |
| ; X86-NEXT: movl (%eax), %ecx |
| ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 4(%eax), %ecx |
| ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 8(%eax), %edi |
| ; X86-NEXT: movl 12(%eax), %esi |
| ; X86-NEXT: movl 16(%eax), %ebx |
| ; X86-NEXT: movl 20(%eax), %edx |
| ; X86-NEXT: movl 24(%eax), %ecx |
| ; X86-NEXT: movl 28(%eax), %eax |
| ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl 12(%ebp), %ecx |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload |
| ; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: sarl $31, %eax |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ecx, %eax |
| ; X86-NEXT: shrb $5, %al |
| ; X86-NEXT: movzbl %al, %edx |
| ; X86-NEXT: movl 24(%esp,%edx,4), %esi |
| ; X86-NEXT: movl 16(%esp,%edx,4), %eax |
| ; X86-NEXT: movl 20(%esp,%edx,4), %edi |
| ; X86-NEXT: movl %edi, %edx |
| ; X86-NEXT: shrdl %cl, %esi, %edx |
| ; X86-NEXT: # kill: def $cl killed $cl killed $ecx |
| ; X86-NEXT: shrdl %cl, %edi, %eax |
| ; X86-NEXT: leal -12(%ebp), %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl |
| %a0 = load i256, ptr %p0 |
| %b = ashr i256 %a0, %a1 |
| %r = trunc i256 %b to i64 |
| ret i64 %r |
| } |
| |
| define i64 @lshr_extract_idx_load_i256_i64(ptr %p0, i256 %a1) nounwind { |
| ; CHECK-LABEL: lshr_extract_idx_load_i256_i64: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: andl $3, %esi |
| ; CHECK-NEXT: movq (%rdi,%rsi,8), %rax |
| ; CHECK-NEXT: retq |
| ; |
| ; X86-LABEL: lshr_extract_idx_load_i256_i64: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: andl $3, %edx |
| ; X86-NEXT: movl (%ecx,%edx,8), %eax |
| ; X86-NEXT: movl 4(%ecx,%edx,8), %edx |
| ; X86-NEXT: retl |
| %a0 = load i256, ptr %p0 |
| %m1 = mul i256 %a1, 64 |
| %b = lshr i256 %a0, %m1 |
| %r = trunc i256 %b to i64 |
| ret i64 %r |
| } |
| |
| define i64 @ashr_extract_idx_load_i256_i64(ptr %p0, i256 %a1) nounwind { |
| ; SSE-LABEL: ashr_extract_idx_load_i256_i64: |
| ; SSE: # %bb.0: |
| ; SSE-NEXT: movaps (%rdi), %xmm0 |
| ; SSE-NEXT: movq 16(%rdi), %rax |
| ; SSE-NEXT: movq 24(%rdi), %rcx |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: sarq $63, %rcx |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; SSE-NEXT: andb $3, %sil |
| ; SSE-NEXT: movzbl %sil, %eax |
| ; SSE-NEXT: movq -72(%rsp,%rax,8), %rax |
| ; SSE-NEXT: retq |
| ; |
| ; AVX2-LABEL: ashr_extract_idx_load_i256_i64: |
| ; AVX2: # %bb.0: |
| ; AVX2-NEXT: vmovaps (%rdi), %xmm0 |
| ; AVX2-NEXT: movq 16(%rdi), %rax |
| ; AVX2-NEXT: movq 24(%rdi), %rcx |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: sarq $63, %rcx |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX2-NEXT: andb $3, %sil |
| ; AVX2-NEXT: movzbl %sil, %eax |
| ; AVX2-NEXT: movq -72(%rsp,%rax,8), %rax |
| ; AVX2-NEXT: retq |
| ; |
| ; AVX512-LABEL: ashr_extract_idx_load_i256_i64: |
| ; AVX512: # %bb.0: |
| ; AVX512-NEXT: vmovaps (%rdi), %xmm0 |
| ; AVX512-NEXT: movq 16(%rdi), %rax |
| ; AVX512-NEXT: movq 24(%rdi), %rcx |
| ; AVX512-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512-NEXT: movq %rax, -{{[0-9]+}}(%rsp) |
| ; AVX512-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) |
| ; AVX512-NEXT: sarq $63, %rcx |
| ; AVX512-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) |
| ; AVX512-NEXT: andb $3, %sil |
| ; AVX512-NEXT: movzbl %sil, %eax |
| ; AVX512-NEXT: movq -72(%rsp,%rax,8), %rax |
| ; AVX512-NEXT: retq |
| ; |
| ; X86-LABEL: ashr_extract_idx_load_i256_i64: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: movl %esp, %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: andl $-16, %esp |
| ; X86-NEXT: subl $96, %esp |
| ; X86-NEXT: movl 8(%ebp), %eax |
| ; X86-NEXT: movl (%eax), %ecx |
| ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 4(%eax), %ecx |
| ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X86-NEXT: movl 8(%eax), %esi |
| ; X86-NEXT: movl 12(%eax), %edi |
| ; X86-NEXT: movl 16(%eax), %ebx |
| ; X86-NEXT: movl 20(%eax), %edx |
| ; X86-NEXT: movl 24(%eax), %ecx |
| ; X86-NEXT: movl 28(%eax), %eax |
| ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload |
| ; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movzbl 12(%ebp), %ecx |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: sarl $31, %eax |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) |
| ; X86-NEXT: andl $3, %ecx |
| ; X86-NEXT: movl 16(%esp,%ecx,8), %eax |
| ; X86-NEXT: movl 20(%esp,%ecx,8), %edx |
| ; X86-NEXT: leal -12(%ebp), %esp |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl |
| %a0 = load i256, ptr %p0 |
| %m1 = mul i256 %a1, 64 |
| %b = ashr i256 %a0, %m1 |
| %r = trunc i256 %b to i64 |
| ret i64 %r |
| } |