blob: 3b8e766ae1bf469f425c40b5b447f842b9432d73 [file] [log] [blame] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s -check-prefixes=CHECK,SSE
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s -check-prefixes=CHECK,SSE
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s -check-prefixes=CHECK,AVX2
; RUN: llc < %s -mtriple=x86_64-- -mcpu=knl | FileCheck %s -check-prefixes=CHECK,AVX512,AVX512F
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s -check-prefixes=CHECK,AVX512,AVX512VL
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 -mattr=+AVX512vbmi2 | FileCheck %s -check-prefixes=CHECK,AVX512,AVX512VBMI
; RUN: llc < %s -mtriple=i686-- | FileCheck %s -check-prefixes=X86
define i256 @shl_i256(i256 %a0, i256 %a1) nounwind {
; SSE-LABEL: shl_i256:
; SSE: # %bb.0:
; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movl %r9d, %eax
; SSE-NEXT: shrb $3, %al
; SSE-NEXT: andb $24, %al
; SSE-NEXT: negb %al
; SSE-NEXT: movsbq %al, %rax
; SSE-NEXT: movq -32(%rsp,%rax), %rdx
; SSE-NEXT: movq -24(%rsp,%rax), %rsi
; SSE-NEXT: movq %rsi, %r8
; SSE-NEXT: movl %r9d, %ecx
; SSE-NEXT: shldq %cl, %rdx, %r8
; SSE-NEXT: movq -16(%rsp,%rax), %r10
; SSE-NEXT: shldq %cl, %rsi, %r10
; SSE-NEXT: movq -40(%rsp,%rax), %rax
; SSE-NEXT: movq %rax, %rsi
; SSE-NEXT: shlq %cl, %rsi
; SSE-NEXT: shldq %cl, %rax, %rdx
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: movq %r10, 24(%rdi)
; SSE-NEXT: movq %r8, 16(%rdi)
; SSE-NEXT: movq %rdx, 8(%rdi)
; SSE-NEXT: movq %rsi, (%rdi)
; SSE-NEXT: retq
;
; AVX2-LABEL: shl_i256:
; AVX2: # %bb.0:
; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movl %r9d, %eax
; AVX2-NEXT: shrb $3, %al
; AVX2-NEXT: andb $24, %al
; AVX2-NEXT: negb %al
; AVX2-NEXT: movsbq %al, %rdx
; AVX2-NEXT: movq -32(%rsp,%rdx), %rsi
; AVX2-NEXT: movq -24(%rsp,%rdx), %rax
; AVX2-NEXT: movq %rax, %r8
; AVX2-NEXT: movl %r9d, %ecx
; AVX2-NEXT: shldq %cl, %rsi, %r8
; AVX2-NEXT: movq -16(%rsp,%rdx), %r10
; AVX2-NEXT: shldq %cl, %rax, %r10
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: movq -40(%rsp,%rdx), %rdx
; AVX2-NEXT: shlxq %r9, %rdx, %rdi
; AVX2-NEXT: shldq %cl, %rdx, %rsi
; AVX2-NEXT: movq %r10, 24(%rax)
; AVX2-NEXT: movq %r8, 16(%rax)
; AVX2-NEXT: movq %rsi, 8(%rax)
; AVX2-NEXT: movq %rdi, (%rax)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512F-LABEL: shl_i256:
; AVX512F: # %bb.0:
; AVX512F-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movl %r9d, %eax
; AVX512F-NEXT: shrb $3, %al
; AVX512F-NEXT: andb $24, %al
; AVX512F-NEXT: negb %al
; AVX512F-NEXT: movsbq %al, %rdx
; AVX512F-NEXT: movq -32(%rsp,%rdx), %rsi
; AVX512F-NEXT: movq -24(%rsp,%rdx), %rax
; AVX512F-NEXT: movq %rax, %r8
; AVX512F-NEXT: movl %r9d, %ecx
; AVX512F-NEXT: shldq %cl, %rsi, %r8
; AVX512F-NEXT: movq -16(%rsp,%rdx), %r10
; AVX512F-NEXT: shldq %cl, %rax, %r10
; AVX512F-NEXT: movq %rdi, %rax
; AVX512F-NEXT: movq -40(%rsp,%rdx), %rdx
; AVX512F-NEXT: shlxq %r9, %rdx, %rdi
; AVX512F-NEXT: shldq %cl, %rdx, %rsi
; AVX512F-NEXT: movq %r10, 24(%rax)
; AVX512F-NEXT: movq %r8, 16(%rax)
; AVX512F-NEXT: movq %rsi, 8(%rax)
; AVX512F-NEXT: movq %rdi, (%rax)
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: shl_i256:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movl %r9d, %eax
; AVX512VL-NEXT: shrb $3, %al
; AVX512VL-NEXT: andb $24, %al
; AVX512VL-NEXT: negb %al
; AVX512VL-NEXT: movsbq %al, %rax
; AVX512VL-NEXT: movq -32(%rsp,%rax), %rdx
; AVX512VL-NEXT: movq -24(%rsp,%rax), %rsi
; AVX512VL-NEXT: movq %rsi, %r8
; AVX512VL-NEXT: movl %r9d, %ecx
; AVX512VL-NEXT: shldq %cl, %rdx, %r8
; AVX512VL-NEXT: movq -16(%rsp,%rax), %r10
; AVX512VL-NEXT: shldq %cl, %rsi, %r10
; AVX512VL-NEXT: movq -40(%rsp,%rax), %rsi
; AVX512VL-NEXT: shldq %cl, %rsi, %rdx
; AVX512VL-NEXT: movq %rdi, %rax
; AVX512VL-NEXT: shlxq %r9, %rsi, %rcx
; AVX512VL-NEXT: movq %r10, 24(%rdi)
; AVX512VL-NEXT: movq %r8, 16(%rdi)
; AVX512VL-NEXT: movq %rdx, 8(%rdi)
; AVX512VL-NEXT: movq %rcx, (%rdi)
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512VBMI-LABEL: shl_i256:
; AVX512VBMI: # %bb.0:
; AVX512VBMI-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movl %r9d, %eax
; AVX512VBMI-NEXT: shrb $3, %al
; AVX512VBMI-NEXT: andb $24, %al
; AVX512VBMI-NEXT: negb %al
; AVX512VBMI-NEXT: movsbq %al, %rax
; AVX512VBMI-NEXT: movq -32(%rsp,%rax), %rdx
; AVX512VBMI-NEXT: movq -24(%rsp,%rax), %rsi
; AVX512VBMI-NEXT: movq %rsi, %r8
; AVX512VBMI-NEXT: movl %r9d, %ecx
; AVX512VBMI-NEXT: shldq %cl, %rdx, %r8
; AVX512VBMI-NEXT: movq -16(%rsp,%rax), %r10
; AVX512VBMI-NEXT: shldq %cl, %rsi, %r10
; AVX512VBMI-NEXT: movq -40(%rsp,%rax), %rsi
; AVX512VBMI-NEXT: shldq %cl, %rsi, %rdx
; AVX512VBMI-NEXT: movq %rdi, %rax
; AVX512VBMI-NEXT: shlxq %r9, %rsi, %rcx
; AVX512VBMI-NEXT: movq %r10, 24(%rdi)
; AVX512VBMI-NEXT: movq %r8, 16(%rdi)
; AVX512VBMI-NEXT: movq %rdx, 8(%rdi)
; AVX512VBMI-NEXT: movq %rcx, (%rdi)
; AVX512VBMI-NEXT: vzeroupper
; AVX512VBMI-NEXT: retq
;
; X86-LABEL: shl_i256:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $112, %esp
; X86-NEXT: movl 44(%ebp), %ecx
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 16(%ebp), %edx
; X86-NEXT: movl 20(%ebp), %esi
; X86-NEXT: movl 40(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl 36(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl 32(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl 28(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl 24(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $3, %al
; X86-NEXT: andb $28, %al
; X86-NEXT: negb %al
; X86-NEXT: movsbl %al, %eax
; X86-NEXT: movl 68(%esp,%eax), %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 72(%esp,%eax), %edx
; X86-NEXT: movl %edx, %edi
; X86-NEXT: shldl %cl, %esi, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 76(%esp,%eax), %esi
; X86-NEXT: movl %esi, %edi
; X86-NEXT: shldl %cl, %edx, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 80(%esp,%eax), %edx
; X86-NEXT: movl %edx, %edi
; X86-NEXT: shldl %cl, %esi, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 84(%esp,%eax), %esi
; X86-NEXT: movl %esi, %ebx
; X86-NEXT: shldl %cl, %edx, %ebx
; X86-NEXT: movl 88(%esp,%eax), %edi
; X86-NEXT: movl %edi, %edx
; X86-NEXT: shldl %cl, %esi, %edx
; X86-NEXT: movl 64(%esp,%eax), %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 92(%esp,%eax), %esi
; X86-NEXT: shldl %cl, %edi, %esi
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %esi, 28(%eax)
; X86-NEXT: movl %edx, 24(%eax)
; X86-NEXT: movl %ebx, 20(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, 16(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, 12(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-NEXT: movl %edi, %edx
; X86-NEXT: shll %cl, %edx
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: shldl %cl, %edi, %esi
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%r = shl i256 %a0, %a1
ret i256 %r
}
define i256 @lshr_i256(i256 %a0, i256 %a1) nounwind {
; SSE-LABEL: lshr_i256:
; SSE: # %bb.0:
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movl %r9d, %eax
; SSE-NEXT: shrb $6, %al
; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: movq -56(%rsp,%rax,8), %rdx
; SSE-NEXT: movq -64(%rsp,%rax,8), %rsi
; SSE-NEXT: movq %rsi, %r8
; SSE-NEXT: movl %r9d, %ecx
; SSE-NEXT: shrdq %cl, %rdx, %r8
; SSE-NEXT: movq -48(%rsp,%rax,8), %r10
; SSE-NEXT: shrdq %cl, %r10, %rdx
; SSE-NEXT: movq -72(%rsp,%rax,8), %r11
; SSE-NEXT: shrdq %cl, %rsi, %r11
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: shrq %cl, %r10
; SSE-NEXT: movq %r10, 24(%rdi)
; SSE-NEXT: movq %rdx, 16(%rdi)
; SSE-NEXT: movq %r8, 8(%rdi)
; SSE-NEXT: movq %r11, (%rdi)
; SSE-NEXT: retq
;
; AVX2-LABEL: lshr_i256:
; AVX2: # %bb.0:
; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movl %r9d, %eax
; AVX2-NEXT: shrb $6, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX2-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX2-NEXT: movq %rsi, %r8
; AVX2-NEXT: movl %r9d, %ecx
; AVX2-NEXT: shrdq %cl, %rdx, %r8
; AVX2-NEXT: movq -72(%rsp,%rax,8), %r10
; AVX2-NEXT: movq -48(%rsp,%rax,8), %r11
; AVX2-NEXT: shrdq %cl, %r11, %rdx
; AVX2-NEXT: shrdq %cl, %rsi, %r10
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: shrxq %r9, %r11, %rcx
; AVX2-NEXT: movq %rcx, 24(%rdi)
; AVX2-NEXT: movq %rdx, 16(%rdi)
; AVX2-NEXT: movq %r8, 8(%rdi)
; AVX2-NEXT: movq %r10, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512F-LABEL: lshr_i256:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movl %r9d, %eax
; AVX512F-NEXT: shrb $6, %al
; AVX512F-NEXT: movzbl %al, %eax
; AVX512F-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX512F-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX512F-NEXT: movq %rsi, %r8
; AVX512F-NEXT: movl %r9d, %ecx
; AVX512F-NEXT: shrdq %cl, %rdx, %r8
; AVX512F-NEXT: movq -72(%rsp,%rax,8), %r10
; AVX512F-NEXT: movq -48(%rsp,%rax,8), %r11
; AVX512F-NEXT: shrdq %cl, %r11, %rdx
; AVX512F-NEXT: shrdq %cl, %rsi, %r10
; AVX512F-NEXT: movq %rdi, %rax
; AVX512F-NEXT: shrxq %r9, %r11, %rcx
; AVX512F-NEXT: movq %rcx, 24(%rdi)
; AVX512F-NEXT: movq %rdx, 16(%rdi)
; AVX512F-NEXT: movq %r8, 8(%rdi)
; AVX512F-NEXT: movq %r10, (%rdi)
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: lshr_i256:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movl %r9d, %eax
; AVX512VL-NEXT: shrb $6, %al
; AVX512VL-NEXT: movzbl %al, %eax
; AVX512VL-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX512VL-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX512VL-NEXT: movq %rsi, %r8
; AVX512VL-NEXT: movl %r9d, %ecx
; AVX512VL-NEXT: shrdq %cl, %rdx, %r8
; AVX512VL-NEXT: movq -48(%rsp,%rax,8), %r10
; AVX512VL-NEXT: shrdq %cl, %r10, %rdx
; AVX512VL-NEXT: movq -72(%rsp,%rax,8), %r11
; AVX512VL-NEXT: shrdq %cl, %rsi, %r11
; AVX512VL-NEXT: movq %rdi, %rax
; AVX512VL-NEXT: shrxq %r9, %r10, %rcx
; AVX512VL-NEXT: movq %rcx, 24(%rdi)
; AVX512VL-NEXT: movq %rdx, 16(%rdi)
; AVX512VL-NEXT: movq %r8, 8(%rdi)
; AVX512VL-NEXT: movq %r11, (%rdi)
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512VBMI-LABEL: lshr_i256:
; AVX512VBMI: # %bb.0:
; AVX512VBMI-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movl %r9d, %eax
; AVX512VBMI-NEXT: shrb $6, %al
; AVX512VBMI-NEXT: movzbl %al, %eax
; AVX512VBMI-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX512VBMI-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX512VBMI-NEXT: movq %rsi, %r8
; AVX512VBMI-NEXT: movl %r9d, %ecx
; AVX512VBMI-NEXT: shrdq %cl, %rdx, %r8
; AVX512VBMI-NEXT: movq -48(%rsp,%rax,8), %r10
; AVX512VBMI-NEXT: shrdq %cl, %r10, %rdx
; AVX512VBMI-NEXT: movq -72(%rsp,%rax,8), %r11
; AVX512VBMI-NEXT: shrdq %cl, %rsi, %r11
; AVX512VBMI-NEXT: movq %rdi, %rax
; AVX512VBMI-NEXT: shrxq %r9, %r10, %rcx
; AVX512VBMI-NEXT: movq %rcx, 24(%rdi)
; AVX512VBMI-NEXT: movq %rdx, 16(%rdi)
; AVX512VBMI-NEXT: movq %r8, 8(%rdi)
; AVX512VBMI-NEXT: movq %r11, (%rdi)
; AVX512VBMI-NEXT: vzeroupper
; AVX512VBMI-NEXT: retq
;
; X86-LABEL: lshr_i256:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $112, %esp
; X86-NEXT: movl 44(%ebp), %ecx
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 16(%ebp), %edx
; X86-NEXT: movl 20(%ebp), %esi
; X86-NEXT: movl 40(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl 36(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl 32(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl 28(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl 24(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $5, %al
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: movl 40(%esp,%eax,4), %edx
; X86-NEXT: movl 36(%esp,%eax,4), %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shrdl %cl, %edx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 44(%esp,%eax,4), %esi
; X86-NEXT: shrdl %cl, %esi, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 48(%esp,%eax,4), %ebx
; X86-NEXT: shrdl %cl, %ebx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 52(%esp,%eax,4), %edi
; X86-NEXT: shrdl %cl, %edi, %ebx
; X86-NEXT: movl 56(%esp,%eax,4), %esi
; X86-NEXT: shrdl %cl, %esi, %edi
; X86-NEXT: movl 32(%esp,%eax,4), %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 60(%esp,%eax,4), %edx
; X86-NEXT: shrdl %cl, %edx, %esi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: shrl %cl, %edx
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edx, 28(%eax)
; X86-NEXT: movl %esi, 24(%eax)
; X86-NEXT: movl %edi, 20(%eax)
; X86-NEXT: movl %ebx, 16(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 4(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, (%eax)
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%r = lshr i256 %a0, %a1
ret i256 %r
}
define i256 @ashr_i256(i256 %a0, i256 %a1) nounwind {
; SSE-LABEL: ashr_i256:
; SSE: # %bb.0:
; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; SSE-NEXT: sarq $63, %r8
; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movl %r9d, %eax
; SSE-NEXT: shrb $6, %al
; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: movq -56(%rsp,%rax,8), %rdx
; SSE-NEXT: movq -64(%rsp,%rax,8), %rsi
; SSE-NEXT: movq %rsi, %r8
; SSE-NEXT: movl %r9d, %ecx
; SSE-NEXT: shrdq %cl, %rdx, %r8
; SSE-NEXT: movq -48(%rsp,%rax,8), %r10
; SSE-NEXT: shrdq %cl, %r10, %rdx
; SSE-NEXT: movq -72(%rsp,%rax,8), %r11
; SSE-NEXT: shrdq %cl, %rsi, %r11
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: sarq %cl, %r10
; SSE-NEXT: movq %r10, 24(%rdi)
; SSE-NEXT: movq %rdx, 16(%rdi)
; SSE-NEXT: movq %r8, 8(%rdi)
; SSE-NEXT: movq %r11, (%rdi)
; SSE-NEXT: retq
;
; AVX2-LABEL: ashr_i256:
; AVX2: # %bb.0:
; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: sarq $63, %r8
; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movl %r9d, %eax
; AVX2-NEXT: shrb $6, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX2-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX2-NEXT: movq %rsi, %r8
; AVX2-NEXT: movl %r9d, %ecx
; AVX2-NEXT: shrdq %cl, %rdx, %r8
; AVX2-NEXT: movq -72(%rsp,%rax,8), %r10
; AVX2-NEXT: movq -48(%rsp,%rax,8), %r11
; AVX2-NEXT: shrdq %cl, %r11, %rdx
; AVX2-NEXT: shrdq %cl, %rsi, %r10
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: sarxq %r9, %r11, %rcx
; AVX2-NEXT: movq %rcx, 24(%rdi)
; AVX2-NEXT: movq %rdx, 16(%rdi)
; AVX2-NEXT: movq %r8, 8(%rdi)
; AVX2-NEXT: movq %r10, (%rdi)
; AVX2-NEXT: retq
;
; AVX512F-LABEL: ashr_i256:
; AVX512F: # %bb.0:
; AVX512F-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: sarq $63, %r8
; AVX512F-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movl %r9d, %eax
; AVX512F-NEXT: shrb $6, %al
; AVX512F-NEXT: movzbl %al, %eax
; AVX512F-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX512F-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX512F-NEXT: movq %rsi, %r8
; AVX512F-NEXT: movl %r9d, %ecx
; AVX512F-NEXT: shrdq %cl, %rdx, %r8
; AVX512F-NEXT: movq -72(%rsp,%rax,8), %r10
; AVX512F-NEXT: movq -48(%rsp,%rax,8), %r11
; AVX512F-NEXT: shrdq %cl, %r11, %rdx
; AVX512F-NEXT: shrdq %cl, %rsi, %r10
; AVX512F-NEXT: movq %rdi, %rax
; AVX512F-NEXT: sarxq %r9, %r11, %rcx
; AVX512F-NEXT: movq %rcx, 24(%rdi)
; AVX512F-NEXT: movq %rdx, 16(%rdi)
; AVX512F-NEXT: movq %r8, 8(%rdi)
; AVX512F-NEXT: movq %r10, (%rdi)
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: ashr_i256:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: sarq $63, %r8
; AVX512VL-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movl %r9d, %eax
; AVX512VL-NEXT: shrb $6, %al
; AVX512VL-NEXT: movzbl %al, %eax
; AVX512VL-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX512VL-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX512VL-NEXT: movq %rsi, %r8
; AVX512VL-NEXT: movl %r9d, %ecx
; AVX512VL-NEXT: shrdq %cl, %rdx, %r8
; AVX512VL-NEXT: movq -48(%rsp,%rax,8), %r10
; AVX512VL-NEXT: shrdq %cl, %r10, %rdx
; AVX512VL-NEXT: movq -72(%rsp,%rax,8), %r11
; AVX512VL-NEXT: shrdq %cl, %rsi, %r11
; AVX512VL-NEXT: movq %rdi, %rax
; AVX512VL-NEXT: sarxq %r9, %r10, %rcx
; AVX512VL-NEXT: movq %rcx, 24(%rdi)
; AVX512VL-NEXT: movq %rdx, 16(%rdi)
; AVX512VL-NEXT: movq %r8, 8(%rdi)
; AVX512VL-NEXT: movq %r11, (%rdi)
; AVX512VL-NEXT: retq
;
; AVX512VBMI-LABEL: ashr_i256:
; AVX512VBMI: # %bb.0:
; AVX512VBMI-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: sarq $63, %r8
; AVX512VBMI-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %r8, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movl %r9d, %eax
; AVX512VBMI-NEXT: shrb $6, %al
; AVX512VBMI-NEXT: movzbl %al, %eax
; AVX512VBMI-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX512VBMI-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX512VBMI-NEXT: movq %rsi, %r8
; AVX512VBMI-NEXT: movl %r9d, %ecx
; AVX512VBMI-NEXT: shrdq %cl, %rdx, %r8
; AVX512VBMI-NEXT: movq -48(%rsp,%rax,8), %r10
; AVX512VBMI-NEXT: shrdq %cl, %r10, %rdx
; AVX512VBMI-NEXT: movq -72(%rsp,%rax,8), %r11
; AVX512VBMI-NEXT: shrdq %cl, %rsi, %r11
; AVX512VBMI-NEXT: movq %rdi, %rax
; AVX512VBMI-NEXT: sarxq %r9, %r10, %rcx
; AVX512VBMI-NEXT: movq %rcx, 24(%rdi)
; AVX512VBMI-NEXT: movq %rdx, 16(%rdi)
; AVX512VBMI-NEXT: movq %r8, 8(%rdi)
; AVX512VBMI-NEXT: movq %r11, (%rdi)
; AVX512VBMI-NEXT: retq
;
; X86-LABEL: ashr_i256:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $112, %esp
; X86-NEXT: movl 44(%ebp), %ecx
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl 16(%ebp), %edx
; X86-NEXT: movl 20(%ebp), %esi
; X86-NEXT: movl 36(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl 32(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl 28(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl 24(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl 40(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: sarl $31, %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $5, %al
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: movl 40(%esp,%eax,4), %edx
; X86-NEXT: movl 36(%esp,%eax,4), %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shrdl %cl, %edx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 44(%esp,%eax,4), %esi
; X86-NEXT: shrdl %cl, %esi, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 48(%esp,%eax,4), %ebx
; X86-NEXT: shrdl %cl, %ebx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 52(%esp,%eax,4), %edi
; X86-NEXT: shrdl %cl, %edi, %ebx
; X86-NEXT: movl 56(%esp,%eax,4), %esi
; X86-NEXT: shrdl %cl, %esi, %edi
; X86-NEXT: movl 32(%esp,%eax,4), %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 60(%esp,%eax,4), %edx
; X86-NEXT: shrdl %cl, %edx, %esi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: sarl %cl, %edx
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edx, 28(%eax)
; X86-NEXT: movl %esi, 24(%eax)
; X86-NEXT: movl %edi, 20(%eax)
; X86-NEXT: movl %ebx, 16(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 4(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, (%eax)
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%r = ashr i256 %a0, %a1
ret i256 %r
}
define i256 @shl_i256_load(ptr %p0, i256 %a1) nounwind {
; SSE-LABEL: shl_i256_load:
; SSE: # %bb.0:
; SSE-NEXT: movq %rdx, %rcx
; SSE-NEXT: movaps (%rsi), %xmm0
; SSE-NEXT: movaps 16(%rsi), %xmm1
; SSE-NEXT: xorps %xmm2, %xmm2
; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movl %ecx, %eax
; SSE-NEXT: shrb $3, %al
; SSE-NEXT: andb $24, %al
; SSE-NEXT: negb %al
; SSE-NEXT: movsbq %al, %rax
; SSE-NEXT: movq -32(%rsp,%rax), %rdx
; SSE-NEXT: movq -24(%rsp,%rax), %rsi
; SSE-NEXT: movq %rsi, %r8
; SSE-NEXT: shldq %cl, %rdx, %r8
; SSE-NEXT: movq -16(%rsp,%rax), %r9
; SSE-NEXT: shldq %cl, %rsi, %r9
; SSE-NEXT: movq -40(%rsp,%rax), %rax
; SSE-NEXT: movq %rax, %rsi
; SSE-NEXT: shlq %cl, %rsi
; SSE-NEXT: # kill: def $cl killed $cl killed $rcx
; SSE-NEXT: shldq %cl, %rax, %rdx
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: movq %r9, 24(%rdi)
; SSE-NEXT: movq %r8, 16(%rdi)
; SSE-NEXT: movq %rdx, 8(%rdi)
; SSE-NEXT: movq %rsi, (%rdi)
; SSE-NEXT: retq
;
; AVX2-LABEL: shl_i256_load:
; AVX2: # %bb.0:
; AVX2-NEXT: movq %rdx, %rcx
; AVX2-NEXT: vmovups (%rsi), %ymm0
; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movl %ecx, %eax
; AVX2-NEXT: shrb $3, %al
; AVX2-NEXT: andb $24, %al
; AVX2-NEXT: negb %al
; AVX2-NEXT: movsbq %al, %rdx
; AVX2-NEXT: movq -32(%rsp,%rdx), %rsi
; AVX2-NEXT: movq -24(%rsp,%rdx), %rax
; AVX2-NEXT: movq %rax, %r8
; AVX2-NEXT: shldq %cl, %rsi, %r8
; AVX2-NEXT: movq -16(%rsp,%rdx), %r9
; AVX2-NEXT: shldq %cl, %rax, %r9
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: movq -40(%rsp,%rdx), %rdx
; AVX2-NEXT: shlxq %rcx, %rdx, %rdi
; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx
; AVX2-NEXT: shldq %cl, %rdx, %rsi
; AVX2-NEXT: movq %r9, 24(%rax)
; AVX2-NEXT: movq %r8, 16(%rax)
; AVX2-NEXT: movq %rsi, 8(%rax)
; AVX2-NEXT: movq %rdi, (%rax)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512F-LABEL: shl_i256_load:
; AVX512F: # %bb.0:
; AVX512F-NEXT: movq %rdx, %rcx
; AVX512F-NEXT: vmovups (%rsi), %ymm0
; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movl %ecx, %eax
; AVX512F-NEXT: shrb $3, %al
; AVX512F-NEXT: andb $24, %al
; AVX512F-NEXT: negb %al
; AVX512F-NEXT: movsbq %al, %rdx
; AVX512F-NEXT: movq -32(%rsp,%rdx), %rsi
; AVX512F-NEXT: movq -24(%rsp,%rdx), %rax
; AVX512F-NEXT: movq %rax, %r8
; AVX512F-NEXT: shldq %cl, %rsi, %r8
; AVX512F-NEXT: movq -16(%rsp,%rdx), %r9
; AVX512F-NEXT: shldq %cl, %rax, %r9
; AVX512F-NEXT: movq %rdi, %rax
; AVX512F-NEXT: movq -40(%rsp,%rdx), %rdx
; AVX512F-NEXT: shlxq %rcx, %rdx, %rdi
; AVX512F-NEXT: # kill: def $cl killed $cl killed $rcx
; AVX512F-NEXT: shldq %cl, %rdx, %rsi
; AVX512F-NEXT: movq %r9, 24(%rax)
; AVX512F-NEXT: movq %r8, 16(%rax)
; AVX512F-NEXT: movq %rsi, 8(%rax)
; AVX512F-NEXT: movq %rdi, (%rax)
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: shl_i256_load:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: movq %rdx, %rcx
; AVX512VL-NEXT: vmovups (%rsi), %ymm0
; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movl %ecx, %eax
; AVX512VL-NEXT: shrb $3, %al
; AVX512VL-NEXT: andb $24, %al
; AVX512VL-NEXT: negb %al
; AVX512VL-NEXT: movsbq %al, %rax
; AVX512VL-NEXT: movq -32(%rsp,%rax), %rdx
; AVX512VL-NEXT: movq -24(%rsp,%rax), %rsi
; AVX512VL-NEXT: movq %rsi, %r8
; AVX512VL-NEXT: shldq %cl, %rdx, %r8
; AVX512VL-NEXT: movq -16(%rsp,%rax), %r9
; AVX512VL-NEXT: shldq %cl, %rsi, %r9
; AVX512VL-NEXT: movq -40(%rsp,%rax), %rsi
; AVX512VL-NEXT: shldq %cl, %rsi, %rdx
; AVX512VL-NEXT: movq %rdi, %rax
; AVX512VL-NEXT: shlxq %rcx, %rsi, %rcx
; AVX512VL-NEXT: movq %r9, 24(%rdi)
; AVX512VL-NEXT: movq %r8, 16(%rdi)
; AVX512VL-NEXT: movq %rdx, 8(%rdi)
; AVX512VL-NEXT: movq %rcx, (%rdi)
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512VBMI-LABEL: shl_i256_load:
; AVX512VBMI: # %bb.0:
; AVX512VBMI-NEXT: movq %rdx, %rcx
; AVX512VBMI-NEXT: vmovups (%rsi), %ymm0
; AVX512VBMI-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512VBMI-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movl %ecx, %eax
; AVX512VBMI-NEXT: shrb $3, %al
; AVX512VBMI-NEXT: andb $24, %al
; AVX512VBMI-NEXT: negb %al
; AVX512VBMI-NEXT: movsbq %al, %rax
; AVX512VBMI-NEXT: movq -32(%rsp,%rax), %rdx
; AVX512VBMI-NEXT: movq -24(%rsp,%rax), %rsi
; AVX512VBMI-NEXT: movq %rsi, %r8
; AVX512VBMI-NEXT: shldq %cl, %rdx, %r8
; AVX512VBMI-NEXT: movq -16(%rsp,%rax), %r9
; AVX512VBMI-NEXT: shldq %cl, %rsi, %r9
; AVX512VBMI-NEXT: movq -40(%rsp,%rax), %rsi
; AVX512VBMI-NEXT: shldq %cl, %rsi, %rdx
; AVX512VBMI-NEXT: movq %rdi, %rax
; AVX512VBMI-NEXT: shlxq %rcx, %rsi, %rcx
; AVX512VBMI-NEXT: movq %r9, 24(%rdi)
; AVX512VBMI-NEXT: movq %r8, 16(%rdi)
; AVX512VBMI-NEXT: movq %rdx, 8(%rdi)
; AVX512VBMI-NEXT: movq %rcx, (%rdi)
; AVX512VBMI-NEXT: vzeroupper
; AVX512VBMI-NEXT: retq
;
; X86-LABEL: shl_i256_load:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $112, %esp
; X86-NEXT: movl 12(%ebp), %ecx
; X86-NEXT: movl (%ecx), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 4(%ecx), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 8(%ecx), %esi
; X86-NEXT: movl 12(%ecx), %edi
; X86-NEXT: movl 16(%ecx), %ebx
; X86-NEXT: movl 20(%ecx), %edx
; X86-NEXT: movl 24(%ecx), %eax
; X86-NEXT: movl 28(%ecx), %ecx
; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
; X86-NEXT: movl 16(%ebp), %ecx
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $3, %al
; X86-NEXT: andb $28, %al
; X86-NEXT: negb %al
; X86-NEXT: movsbl %al, %eax
; X86-NEXT: movl 68(%esp,%eax), %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 72(%esp,%eax), %edx
; X86-NEXT: movl %edx, %edi
; X86-NEXT: shldl %cl, %esi, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 76(%esp,%eax), %esi
; X86-NEXT: movl %esi, %edi
; X86-NEXT: shldl %cl, %edx, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 80(%esp,%eax), %edx
; X86-NEXT: movl %edx, %edi
; X86-NEXT: shldl %cl, %esi, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 84(%esp,%eax), %esi
; X86-NEXT: movl %esi, %ebx
; X86-NEXT: shldl %cl, %edx, %ebx
; X86-NEXT: movl 88(%esp,%eax), %edi
; X86-NEXT: movl %edi, %edx
; X86-NEXT: shldl %cl, %esi, %edx
; X86-NEXT: movl 64(%esp,%eax), %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 92(%esp,%eax), %esi
; X86-NEXT: shldl %cl, %edi, %esi
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %esi, 28(%eax)
; X86-NEXT: movl %edx, 24(%eax)
; X86-NEXT: movl %ebx, 20(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, 16(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, 12(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-NEXT: movl %edi, %edx
; X86-NEXT: shll %cl, %edx
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: shldl %cl, %edi, %esi
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%a0 = load i256, ptr %p0
%r = shl i256 %a0, %a1
ret i256 %r
}
define i256 @lshr_i256_load(ptr %p0, i256 %a1) nounwind {
; SSE-LABEL: lshr_i256_load:
; SSE: # %bb.0:
; SSE-NEXT: movq %rdx, %rcx
; SSE-NEXT: movaps (%rsi), %xmm0
; SSE-NEXT: movaps 16(%rsi), %xmm1
; SSE-NEXT: xorps %xmm2, %xmm2
; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movl %ecx, %eax
; SSE-NEXT: shrb $6, %al
; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: movq -56(%rsp,%rax,8), %rdx
; SSE-NEXT: movq -64(%rsp,%rax,8), %rsi
; SSE-NEXT: movq %rsi, %r8
; SSE-NEXT: shrdq %cl, %rdx, %r8
; SSE-NEXT: movq -48(%rsp,%rax,8), %r9
; SSE-NEXT: shrdq %cl, %r9, %rdx
; SSE-NEXT: movq -72(%rsp,%rax,8), %r10
; SSE-NEXT: shrdq %cl, %rsi, %r10
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: # kill: def $cl killed $cl killed $rcx
; SSE-NEXT: shrq %cl, %r9
; SSE-NEXT: movq %r9, 24(%rdi)
; SSE-NEXT: movq %rdx, 16(%rdi)
; SSE-NEXT: movq %r8, 8(%rdi)
; SSE-NEXT: movq %r10, (%rdi)
; SSE-NEXT: retq
;
; AVX2-LABEL: lshr_i256_load:
; AVX2: # %bb.0:
; AVX2-NEXT: movq %rdx, %rcx
; AVX2-NEXT: vmovups (%rsi), %ymm0
; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movl %ecx, %eax
; AVX2-NEXT: shrb $6, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX2-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX2-NEXT: movq %rsi, %r8
; AVX2-NEXT: shrdq %cl, %rdx, %r8
; AVX2-NEXT: movq -72(%rsp,%rax,8), %r9
; AVX2-NEXT: movq -48(%rsp,%rax,8), %r10
; AVX2-NEXT: shrdq %cl, %r10, %rdx
; AVX2-NEXT: shrdq %cl, %rsi, %r9
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: shrxq %rcx, %r10, %rcx
; AVX2-NEXT: movq %rcx, 24(%rdi)
; AVX2-NEXT: movq %rdx, 16(%rdi)
; AVX2-NEXT: movq %r8, 8(%rdi)
; AVX2-NEXT: movq %r9, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512F-LABEL: lshr_i256_load:
; AVX512F: # %bb.0:
; AVX512F-NEXT: movq %rdx, %rcx
; AVX512F-NEXT: vmovups (%rsi), %ymm0
; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movl %ecx, %eax
; AVX512F-NEXT: shrb $6, %al
; AVX512F-NEXT: movzbl %al, %eax
; AVX512F-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX512F-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX512F-NEXT: movq %rsi, %r8
; AVX512F-NEXT: shrdq %cl, %rdx, %r8
; AVX512F-NEXT: movq -72(%rsp,%rax,8), %r9
; AVX512F-NEXT: movq -48(%rsp,%rax,8), %r10
; AVX512F-NEXT: shrdq %cl, %r10, %rdx
; AVX512F-NEXT: shrdq %cl, %rsi, %r9
; AVX512F-NEXT: movq %rdi, %rax
; AVX512F-NEXT: shrxq %rcx, %r10, %rcx
; AVX512F-NEXT: movq %rcx, 24(%rdi)
; AVX512F-NEXT: movq %rdx, 16(%rdi)
; AVX512F-NEXT: movq %r8, 8(%rdi)
; AVX512F-NEXT: movq %r9, (%rdi)
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: lshr_i256_load:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: movq %rdx, %rcx
; AVX512VL-NEXT: vmovups (%rsi), %ymm0
; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movl %ecx, %eax
; AVX512VL-NEXT: shrb $6, %al
; AVX512VL-NEXT: movzbl %al, %eax
; AVX512VL-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX512VL-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX512VL-NEXT: movq %rsi, %r8
; AVX512VL-NEXT: shrdq %cl, %rdx, %r8
; AVX512VL-NEXT: movq -48(%rsp,%rax,8), %r9
; AVX512VL-NEXT: shrdq %cl, %r9, %rdx
; AVX512VL-NEXT: movq -72(%rsp,%rax,8), %r10
; AVX512VL-NEXT: shrdq %cl, %rsi, %r10
; AVX512VL-NEXT: movq %rdi, %rax
; AVX512VL-NEXT: shrxq %rcx, %r9, %rcx
; AVX512VL-NEXT: movq %rcx, 24(%rdi)
; AVX512VL-NEXT: movq %rdx, 16(%rdi)
; AVX512VL-NEXT: movq %r8, 8(%rdi)
; AVX512VL-NEXT: movq %r10, (%rdi)
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512VBMI-LABEL: lshr_i256_load:
; AVX512VBMI: # %bb.0:
; AVX512VBMI-NEXT: movq %rdx, %rcx
; AVX512VBMI-NEXT: vmovups (%rsi), %ymm0
; AVX512VBMI-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512VBMI-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movl %ecx, %eax
; AVX512VBMI-NEXT: shrb $6, %al
; AVX512VBMI-NEXT: movzbl %al, %eax
; AVX512VBMI-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX512VBMI-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX512VBMI-NEXT: movq %rsi, %r8
; AVX512VBMI-NEXT: shrdq %cl, %rdx, %r8
; AVX512VBMI-NEXT: movq -48(%rsp,%rax,8), %r9
; AVX512VBMI-NEXT: shrdq %cl, %r9, %rdx
; AVX512VBMI-NEXT: movq -72(%rsp,%rax,8), %r10
; AVX512VBMI-NEXT: shrdq %cl, %rsi, %r10
; AVX512VBMI-NEXT: movq %rdi, %rax
; AVX512VBMI-NEXT: shrxq %rcx, %r9, %rcx
; AVX512VBMI-NEXT: movq %rcx, 24(%rdi)
; AVX512VBMI-NEXT: movq %rdx, 16(%rdi)
; AVX512VBMI-NEXT: movq %r8, 8(%rdi)
; AVX512VBMI-NEXT: movq %r10, (%rdi)
; AVX512VBMI-NEXT: vzeroupper
; AVX512VBMI-NEXT: retq
;
; X86-LABEL: lshr_i256_load:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $112, %esp
; X86-NEXT: movl 12(%ebp), %ecx
; X86-NEXT: movl (%ecx), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 4(%ecx), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 8(%ecx), %esi
; X86-NEXT: movl 12(%ecx), %edi
; X86-NEXT: movl 16(%ecx), %ebx
; X86-NEXT: movl 20(%ecx), %edx
; X86-NEXT: movl 24(%ecx), %eax
; X86-NEXT: movl 28(%ecx), %ecx
; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
; X86-NEXT: movl 16(%ebp), %ecx
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $5, %al
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: movl 40(%esp,%eax,4), %edx
; X86-NEXT: movl 36(%esp,%eax,4), %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shrdl %cl, %edx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 44(%esp,%eax,4), %esi
; X86-NEXT: shrdl %cl, %esi, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 48(%esp,%eax,4), %ebx
; X86-NEXT: shrdl %cl, %ebx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 52(%esp,%eax,4), %edi
; X86-NEXT: shrdl %cl, %edi, %ebx
; X86-NEXT: movl 56(%esp,%eax,4), %esi
; X86-NEXT: shrdl %cl, %esi, %edi
; X86-NEXT: movl 32(%esp,%eax,4), %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 60(%esp,%eax,4), %edx
; X86-NEXT: shrdl %cl, %edx, %esi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: shrl %cl, %edx
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edx, 28(%eax)
; X86-NEXT: movl %esi, 24(%eax)
; X86-NEXT: movl %edi, 20(%eax)
; X86-NEXT: movl %ebx, 16(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 4(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, (%eax)
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%a0 = load i256, ptr %p0
%r = lshr i256 %a0, %a1
ret i256 %r
}
define i256 @ashr_i256_load(ptr %p0, i256 %a1) nounwind {
; SSE-LABEL: ashr_i256_load:
; SSE: # %bb.0:
; SSE-NEXT: movq %rdx, %rcx
; SSE-NEXT: movaps (%rsi), %xmm0
; SSE-NEXT: movq 16(%rsi), %rax
; SSE-NEXT: movq 24(%rsi), %rdx
; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: sarq $63, %rdx
; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movl %ecx, %eax
; SSE-NEXT: shrb $6, %al
; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: movq -56(%rsp,%rax,8), %rdx
; SSE-NEXT: movq -64(%rsp,%rax,8), %rsi
; SSE-NEXT: movq %rsi, %r8
; SSE-NEXT: shrdq %cl, %rdx, %r8
; SSE-NEXT: movq -48(%rsp,%rax,8), %r9
; SSE-NEXT: shrdq %cl, %r9, %rdx
; SSE-NEXT: movq -72(%rsp,%rax,8), %r10
; SSE-NEXT: shrdq %cl, %rsi, %r10
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: # kill: def $cl killed $cl killed $rcx
; SSE-NEXT: sarq %cl, %r9
; SSE-NEXT: movq %r9, 24(%rdi)
; SSE-NEXT: movq %rdx, 16(%rdi)
; SSE-NEXT: movq %r8, 8(%rdi)
; SSE-NEXT: movq %r10, (%rdi)
; SSE-NEXT: retq
;
; AVX2-LABEL: ashr_i256_load:
; AVX2: # %bb.0:
; AVX2-NEXT: movq %rdx, %rcx
; AVX2-NEXT: vmovaps (%rsi), %xmm0
; AVX2-NEXT: movq 16(%rsi), %rax
; AVX2-NEXT: movq 24(%rsi), %rdx
; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: sarq $63, %rdx
; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movl %ecx, %eax
; AVX2-NEXT: shrb $6, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX2-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX2-NEXT: movq %rsi, %r8
; AVX2-NEXT: shrdq %cl, %rdx, %r8
; AVX2-NEXT: movq -72(%rsp,%rax,8), %r9
; AVX2-NEXT: movq -48(%rsp,%rax,8), %r10
; AVX2-NEXT: shrdq %cl, %r10, %rdx
; AVX2-NEXT: shrdq %cl, %rsi, %r9
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: sarxq %rcx, %r10, %rcx
; AVX2-NEXT: movq %rcx, 24(%rdi)
; AVX2-NEXT: movq %rdx, 16(%rdi)
; AVX2-NEXT: movq %r8, 8(%rdi)
; AVX2-NEXT: movq %r9, (%rdi)
; AVX2-NEXT: retq
;
; AVX512F-LABEL: ashr_i256_load:
; AVX512F: # %bb.0:
; AVX512F-NEXT: movq %rdx, %rcx
; AVX512F-NEXT: vmovaps (%rsi), %xmm0
; AVX512F-NEXT: movq 16(%rsi), %rax
; AVX512F-NEXT: movq 24(%rsi), %rdx
; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: sarq $63, %rdx
; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movl %ecx, %eax
; AVX512F-NEXT: shrb $6, %al
; AVX512F-NEXT: movzbl %al, %eax
; AVX512F-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX512F-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX512F-NEXT: movq %rsi, %r8
; AVX512F-NEXT: shrdq %cl, %rdx, %r8
; AVX512F-NEXT: movq -72(%rsp,%rax,8), %r9
; AVX512F-NEXT: movq -48(%rsp,%rax,8), %r10
; AVX512F-NEXT: shrdq %cl, %r10, %rdx
; AVX512F-NEXT: shrdq %cl, %rsi, %r9
; AVX512F-NEXT: movq %rdi, %rax
; AVX512F-NEXT: sarxq %rcx, %r10, %rcx
; AVX512F-NEXT: movq %rcx, 24(%rdi)
; AVX512F-NEXT: movq %rdx, 16(%rdi)
; AVX512F-NEXT: movq %r8, 8(%rdi)
; AVX512F-NEXT: movq %r9, (%rdi)
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: ashr_i256_load:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: movq %rdx, %rcx
; AVX512VL-NEXT: vmovaps (%rsi), %xmm0
; AVX512VL-NEXT: movq 16(%rsi), %rax
; AVX512VL-NEXT: movq 24(%rsi), %rdx
; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: sarq $63, %rdx
; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movl %ecx, %eax
; AVX512VL-NEXT: shrb $6, %al
; AVX512VL-NEXT: movzbl %al, %eax
; AVX512VL-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX512VL-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX512VL-NEXT: movq %rsi, %r8
; AVX512VL-NEXT: shrdq %cl, %rdx, %r8
; AVX512VL-NEXT: movq -48(%rsp,%rax,8), %r9
; AVX512VL-NEXT: shrdq %cl, %r9, %rdx
; AVX512VL-NEXT: movq -72(%rsp,%rax,8), %r10
; AVX512VL-NEXT: shrdq %cl, %rsi, %r10
; AVX512VL-NEXT: movq %rdi, %rax
; AVX512VL-NEXT: sarxq %rcx, %r9, %rcx
; AVX512VL-NEXT: movq %rcx, 24(%rdi)
; AVX512VL-NEXT: movq %rdx, 16(%rdi)
; AVX512VL-NEXT: movq %r8, 8(%rdi)
; AVX512VL-NEXT: movq %r10, (%rdi)
; AVX512VL-NEXT: retq
;
; AVX512VBMI-LABEL: ashr_i256_load:
; AVX512VBMI: # %bb.0:
; AVX512VBMI-NEXT: movq %rdx, %rcx
; AVX512VBMI-NEXT: vmovaps (%rsi), %xmm0
; AVX512VBMI-NEXT: movq 16(%rsi), %rax
; AVX512VBMI-NEXT: movq 24(%rsi), %rdx
; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: sarq $63, %rdx
; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movl %ecx, %eax
; AVX512VBMI-NEXT: shrb $6, %al
; AVX512VBMI-NEXT: movzbl %al, %eax
; AVX512VBMI-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX512VBMI-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX512VBMI-NEXT: movq %rsi, %r8
; AVX512VBMI-NEXT: shrdq %cl, %rdx, %r8
; AVX512VBMI-NEXT: movq -48(%rsp,%rax,8), %r9
; AVX512VBMI-NEXT: shrdq %cl, %r9, %rdx
; AVX512VBMI-NEXT: movq -72(%rsp,%rax,8), %r10
; AVX512VBMI-NEXT: shrdq %cl, %rsi, %r10
; AVX512VBMI-NEXT: movq %rdi, %rax
; AVX512VBMI-NEXT: sarxq %rcx, %r9, %rcx
; AVX512VBMI-NEXT: movq %rcx, 24(%rdi)
; AVX512VBMI-NEXT: movq %rdx, 16(%rdi)
; AVX512VBMI-NEXT: movq %r8, 8(%rdi)
; AVX512VBMI-NEXT: movq %r10, (%rdi)
; AVX512VBMI-NEXT: retq
;
; X86-LABEL: ashr_i256_load:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $112, %esp
; X86-NEXT: movl 12(%ebp), %eax
; X86-NEXT: movl (%eax), %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 4(%eax), %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 8(%eax), %edi
; X86-NEXT: movl 12(%eax), %esi
; X86-NEXT: movl 16(%eax), %ebx
; X86-NEXT: movl 20(%eax), %edx
; X86-NEXT: movl 24(%eax), %ecx
; X86-NEXT: movl 28(%eax), %eax
; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X86-NEXT: movl 16(%ebp), %ecx
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: sarl $31, %eax
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $5, %al
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: movl 40(%esp,%eax,4), %edx
; X86-NEXT: movl 36(%esp,%eax,4), %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shrdl %cl, %edx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 44(%esp,%eax,4), %esi
; X86-NEXT: shrdl %cl, %esi, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 48(%esp,%eax,4), %ebx
; X86-NEXT: shrdl %cl, %ebx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 52(%esp,%eax,4), %edi
; X86-NEXT: shrdl %cl, %edi, %ebx
; X86-NEXT: movl 56(%esp,%eax,4), %esi
; X86-NEXT: shrdl %cl, %esi, %edi
; X86-NEXT: movl 32(%esp,%eax,4), %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 60(%esp,%eax,4), %edx
; X86-NEXT: shrdl %cl, %edx, %esi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: sarl %cl, %edx
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edx, 28(%eax)
; X86-NEXT: movl %esi, 24(%eax)
; X86-NEXT: movl %edi, 20(%eax)
; X86-NEXT: movl %ebx, 16(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 4(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, (%eax)
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%a0 = load i256, ptr %p0
%r = ashr i256 %a0, %a1
ret i256 %r
}
define i256 @shl_i256_1(i256 %a0) nounwind {
; CHECK-LABEL: shl_i256_1:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: shldq $1, %rcx, %r8
; CHECK-NEXT: shldq $1, %rdx, %rcx
; CHECK-NEXT: shldq $1, %rsi, %rdx
; CHECK-NEXT: addq %rsi, %rsi
; CHECK-NEXT: movq %r8, 24(%rdi)
; CHECK-NEXT: movq %rcx, 16(%rdi)
; CHECK-NEXT: movq %rdx, 8(%rdi)
; CHECK-NEXT: movq %rsi, (%rdi)
; CHECK-NEXT: retq
;
; X86-LABEL: shl_i256_1:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: shldl $1, %ecx, %edx
; X86-NEXT: movl %edx, 28(%eax)
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: shldl $1, %edx, %ecx
; X86-NEXT: movl %ecx, 24(%eax)
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shldl $1, %ecx, %edx
; X86-NEXT: movl %edx, 20(%eax)
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: shldl $1, %edx, %ecx
; X86-NEXT: movl %ecx, 16(%eax)
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shldl $1, %ecx, %edx
; X86-NEXT: movl %edx, 12(%eax)
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: shldl $1, %edx, %ecx
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shldl $1, %ecx, %edx
; X86-NEXT: movl %edx, 4(%eax)
; X86-NEXT: addl %ecx, %ecx
; X86-NEXT: movl %ecx, (%eax)
; X86-NEXT: retl $4
%r = shl i256 %a0, 1
ret i256 %r
}
define i256 @lshr_i256_1(i256 %a0) nounwind {
; CHECK-LABEL: lshr_i256_1:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: shrdq $1, %rdx, %rsi
; CHECK-NEXT: shrdq $1, %rcx, %rdx
; CHECK-NEXT: shrdq $1, %r8, %rcx
; CHECK-NEXT: shrq %r8
; CHECK-NEXT: movq %r8, 24(%rdi)
; CHECK-NEXT: movq %rcx, 16(%rdi)
; CHECK-NEXT: movq %rdx, 8(%rdi)
; CHECK-NEXT: movq %rsi, (%rdi)
; CHECK-NEXT: retq
;
; X86-LABEL: lshr_i256_1:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $8, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, %esi
; X86-NEXT: shldl $31, %eax, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shrdl $1, %eax, %ecx
; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shldl $31, %eax, %ebp
; X86-NEXT: shrdl $1, %eax, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %ebx, %esi
; X86-NEXT: shldl $31, %eax, %esi
; X86-NEXT: shrdl $1, %eax, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrdl $1, %eax, %ebx
; X86-NEXT: shrl %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %eax, 28(%ecx)
; X86-NEXT: movl %ebx, 24(%ecx)
; X86-NEXT: movl %esi, 20(%ecx)
; X86-NEXT: movl %edx, 16(%ecx)
; X86-NEXT: movl %ebp, 12(%ecx)
; X86-NEXT: movl %edi, 8(%ecx)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: movl %eax, 4(%ecx)
; X86-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-NEXT: movl %eax, (%ecx)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: addl $8, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%r = lshr i256 %a0, 1
ret i256 %r
}
define i256 @ashr_i256_1(i256 %a0) nounwind {
; CHECK-LABEL: ashr_i256_1:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: shrdq $1, %rdx, %rsi
; CHECK-NEXT: shrdq $1, %rcx, %rdx
; CHECK-NEXT: shrdq $1, %r8, %rcx
; CHECK-NEXT: sarq %r8
; CHECK-NEXT: movq %r8, 24(%rdi)
; CHECK-NEXT: movq %rcx, 16(%rdi)
; CHECK-NEXT: movq %rdx, 8(%rdi)
; CHECK-NEXT: movq %rsi, (%rdi)
; CHECK-NEXT: retq
;
; X86-LABEL: ashr_i256_1:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: subl $8, %esp
; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: movl %edi, %esi
; X86-NEXT: shldl $31, %eax, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shrdl $1, %eax, %ecx
; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill
; X86-NEXT: movl %edx, %ebp
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shldl $31, %eax, %ebp
; X86-NEXT: shrdl $1, %eax, %edi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl %ebx, %esi
; X86-NEXT: shldl $31, %eax, %esi
; X86-NEXT: shrdl $1, %eax, %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: shrdl $1, %eax, %ebx
; X86-NEXT: sarl %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl %eax, 28(%ecx)
; X86-NEXT: movl %ebx, 24(%ecx)
; X86-NEXT: movl %esi, 20(%ecx)
; X86-NEXT: movl %edx, 16(%ecx)
; X86-NEXT: movl %ebp, 12(%ecx)
; X86-NEXT: movl %edi, 8(%ecx)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: movl %eax, 4(%ecx)
; X86-NEXT: movl (%esp), %eax # 4-byte Reload
; X86-NEXT: movl %eax, (%ecx)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: addl $8, %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%r = ashr i256 %a0, 1
ret i256 %r
}
define i256 @shl_i256_200(i256 %a0) nounwind {
; SSE-LABEL: shl_i256_200:
; SSE: # %bb.0:
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: shlq $8, %rsi
; SSE-NEXT: movq %rsi, 24(%rdi)
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: movaps %xmm0, (%rdi)
; SSE-NEXT: movq $0, 16(%rdi)
; SSE-NEXT: retq
;
; AVX2-LABEL: shl_i256_200:
; AVX2: # %bb.0:
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: shlq $8, %rsi
; AVX2-NEXT: movq %rsi, 24(%rdi)
; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vmovaps %xmm0, (%rdi)
; AVX2-NEXT: movq $0, 16(%rdi)
; AVX2-NEXT: retq
;
; AVX512-LABEL: shl_i256_200:
; AVX512: # %bb.0:
; AVX512-NEXT: movq %rdi, %rax
; AVX512-NEXT: shlq $8, %rsi
; AVX512-NEXT: movq %rsi, 24(%rdi)
; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovaps %xmm0, (%rdi)
; AVX512-NEXT: movq $0, 16(%rdi)
; AVX512-NEXT: retq
;
; X86-LABEL: shl_i256_200:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: shll $8, %ecx
; X86-NEXT: movl %edx, 28(%eax)
; X86-NEXT: movl %ecx, 24(%eax)
; X86-NEXT: movl $0, 20(%eax)
; X86-NEXT: movl $0, 16(%eax)
; X86-NEXT: movl $0, 12(%eax)
; X86-NEXT: movl $0, 8(%eax)
; X86-NEXT: movl $0, 4(%eax)
; X86-NEXT: movl $0, (%eax)
; X86-NEXT: retl $4
%r = shl i256 %a0, 200
ret i256 %r
}
define i256 @lshr_i256_200(i256 %a0) nounwind {
; SSE-LABEL: lshr_i256_200:
; SSE: # %bb.0:
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: shrq $8, %r8
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: movups %xmm0, 8(%rdi)
; SSE-NEXT: movq %r8, (%rdi)
; SSE-NEXT: movq $0, 24(%rdi)
; SSE-NEXT: retq
;
; AVX2-LABEL: lshr_i256_200:
; AVX2: # %bb.0:
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: shrq $8, %r8
; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vmovups %xmm0, 8(%rdi)
; AVX2-NEXT: movq %r8, (%rdi)
; AVX2-NEXT: movq $0, 24(%rdi)
; AVX2-NEXT: retq
;
; AVX512-LABEL: lshr_i256_200:
; AVX512: # %bb.0:
; AVX512-NEXT: movq %rdi, %rax
; AVX512-NEXT: shrq $8, %r8
; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovups %xmm0, 8(%rdi)
; AVX512-NEXT: movq %r8, (%rdi)
; AVX512-NEXT: movq $0, 24(%rdi)
; AVX512-NEXT: retq
;
; X86-LABEL: lshr_i256_200:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: shrl $8, %edx
; X86-NEXT: movl %edx, 4(%eax)
; X86-NEXT: movl %ecx, (%eax)
; X86-NEXT: movl $0, 28(%eax)
; X86-NEXT: movl $0, 24(%eax)
; X86-NEXT: movl $0, 20(%eax)
; X86-NEXT: movl $0, 16(%eax)
; X86-NEXT: movl $0, 12(%eax)
; X86-NEXT: movl $0, 8(%eax)
; X86-NEXT: retl $4
%r = lshr i256 %a0, 200
ret i256 %r
}
define i256 @ashr_i256_200(i256 %a0) nounwind {
; CHECK-LABEL: ashr_i256_200:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: movq %r8, %rcx
; CHECK-NEXT: sarq $8, %rcx
; CHECK-NEXT: sarq $63, %r8
; CHECK-NEXT: movq %r8, 24(%rdi)
; CHECK-NEXT: movq %r8, 16(%rdi)
; CHECK-NEXT: movq %r8, 8(%rdi)
; CHECK-NEXT: movq %rcx, (%rdi)
; CHECK-NEXT: retq
;
; X86-LABEL: ashr_i256_200:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl %edx, %esi
; X86-NEXT: sarl $8, %esi
; X86-NEXT: sarl $31, %edx
; X86-NEXT: movl %edx, 28(%eax)
; X86-NEXT: movl %edx, 24(%eax)
; X86-NEXT: movl %edx, 20(%eax)
; X86-NEXT: movl %edx, 16(%eax)
; X86-NEXT: movl %edx, 12(%eax)
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %ecx, (%eax)
; X86-NEXT: popl %esi
; X86-NEXT: retl $4
%r = ashr i256 %a0, 200
ret i256 %r
}
define i256 @shl_i256_255(i256 %a0) nounwind {
; SSE-LABEL: shl_i256_255:
; SSE: # %bb.0:
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: shlq $63, %rsi
; SSE-NEXT: movq %rsi, 24(%rdi)
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: movaps %xmm0, (%rdi)
; SSE-NEXT: movq $0, 16(%rdi)
; SSE-NEXT: retq
;
; AVX2-LABEL: shl_i256_255:
; AVX2: # %bb.0:
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: shlq $63, %rsi
; AVX2-NEXT: movq %rsi, 24(%rdi)
; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vmovaps %xmm0, (%rdi)
; AVX2-NEXT: movq $0, 16(%rdi)
; AVX2-NEXT: retq
;
; AVX512-LABEL: shl_i256_255:
; AVX512: # %bb.0:
; AVX512-NEXT: movq %rdi, %rax
; AVX512-NEXT: shlq $63, %rsi
; AVX512-NEXT: movq %rsi, 24(%rdi)
; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovaps %xmm0, (%rdi)
; AVX512-NEXT: movq $0, 16(%rdi)
; AVX512-NEXT: retq
;
; X86-LABEL: shl_i256_255:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shll $31, %ecx
; X86-NEXT: movl %ecx, 28(%eax)
; X86-NEXT: movl $0, 24(%eax)
; X86-NEXT: movl $0, 20(%eax)
; X86-NEXT: movl $0, 16(%eax)
; X86-NEXT: movl $0, 12(%eax)
; X86-NEXT: movl $0, 8(%eax)
; X86-NEXT: movl $0, 4(%eax)
; X86-NEXT: movl $0, (%eax)
; X86-NEXT: retl $4
%r = shl i256 %a0, 255
ret i256 %r
}
define i256 @lshr_i256_255(i256 %a0) nounwind {
; SSE-LABEL: lshr_i256_255:
; SSE: # %bb.0:
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: shrq $63, %r8
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: movups %xmm0, 8(%rdi)
; SSE-NEXT: movq %r8, (%rdi)
; SSE-NEXT: movq $0, 24(%rdi)
; SSE-NEXT: retq
;
; AVX2-LABEL: lshr_i256_255:
; AVX2: # %bb.0:
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: shrq $63, %r8
; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vmovups %xmm0, 8(%rdi)
; AVX2-NEXT: movq %r8, (%rdi)
; AVX2-NEXT: movq $0, 24(%rdi)
; AVX2-NEXT: retq
;
; AVX512-LABEL: lshr_i256_255:
; AVX512: # %bb.0:
; AVX512-NEXT: movq %rdi, %rax
; AVX512-NEXT: shrq $63, %r8
; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovups %xmm0, 8(%rdi)
; AVX512-NEXT: movq %r8, (%rdi)
; AVX512-NEXT: movq $0, 24(%rdi)
; AVX512-NEXT: retq
;
; X86-LABEL: lshr_i256_255:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: shrl $31, %ecx
; X86-NEXT: movl %ecx, (%eax)
; X86-NEXT: movl $0, 28(%eax)
; X86-NEXT: movl $0, 24(%eax)
; X86-NEXT: movl $0, 20(%eax)
; X86-NEXT: movl $0, 16(%eax)
; X86-NEXT: movl $0, 12(%eax)
; X86-NEXT: movl $0, 8(%eax)
; X86-NEXT: movl $0, 4(%eax)
; X86-NEXT: retl $4
%r = lshr i256 %a0, 255
ret i256 %r
}
define i256 @ashr_i256_255(i256 %a0) nounwind {
; CHECK-LABEL: ashr_i256_255:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: sarq $63, %r8
; CHECK-NEXT: movq %r8, 24(%rdi)
; CHECK-NEXT: movq %r8, 16(%rdi)
; CHECK-NEXT: movq %r8, 8(%rdi)
; CHECK-NEXT: movq %r8, (%rdi)
; CHECK-NEXT: retq
;
; X86-LABEL: ashr_i256_255:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: sarl $31, %ecx
; X86-NEXT: movl %ecx, 28(%eax)
; X86-NEXT: movl %ecx, 24(%eax)
; X86-NEXT: movl %ecx, 20(%eax)
; X86-NEXT: movl %ecx, 16(%eax)
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl %ecx, 4(%eax)
; X86-NEXT: movl %ecx, (%eax)
; X86-NEXT: retl $4
%r = ashr i256 %a0, 255
ret i256 %r
}
define i256 @shl_1_i256(i256 %a0) nounwind {
; SSE-LABEL: shl_1_i256:
; SSE: # %bb.0:
; SSE-NEXT: movq %rsi, %rcx
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq $1, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movl %ecx, %eax
; SSE-NEXT: shrb $3, %al
; SSE-NEXT: andb $24, %al
; SSE-NEXT: negb %al
; SSE-NEXT: movsbq %al, %rax
; SSE-NEXT: movq -32(%rsp,%rax), %rdx
; SSE-NEXT: movq -24(%rsp,%rax), %rsi
; SSE-NEXT: movq %rsi, %r8
; SSE-NEXT: shldq %cl, %rdx, %r8
; SSE-NEXT: movq -16(%rsp,%rax), %r9
; SSE-NEXT: shldq %cl, %rsi, %r9
; SSE-NEXT: movq -40(%rsp,%rax), %rax
; SSE-NEXT: movq %rax, %rsi
; SSE-NEXT: shlq %cl, %rsi
; SSE-NEXT: # kill: def $cl killed $cl killed $rcx
; SSE-NEXT: shldq %cl, %rax, %rdx
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: movq %r9, 24(%rdi)
; SSE-NEXT: movq %r8, 16(%rdi)
; SSE-NEXT: movq %rdx, 8(%rdi)
; SSE-NEXT: movq %rsi, (%rdi)
; SSE-NEXT: retq
;
; AVX2-LABEL: shl_1_i256:
; AVX2: # %bb.0:
; AVX2-NEXT: movq %rsi, %rcx
; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: vmovss {{.*#+}} xmm0 = [1,0,0,0]
; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movl %ecx, %eax
; AVX2-NEXT: shrb $3, %al
; AVX2-NEXT: andb $24, %al
; AVX2-NEXT: negb %al
; AVX2-NEXT: movsbq %al, %rdx
; AVX2-NEXT: movq -32(%rsp,%rdx), %rsi
; AVX2-NEXT: movq -24(%rsp,%rdx), %rax
; AVX2-NEXT: movq %rax, %r8
; AVX2-NEXT: shldq %cl, %rsi, %r8
; AVX2-NEXT: movq -16(%rsp,%rdx), %r9
; AVX2-NEXT: shldq %cl, %rax, %r9
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: movq -40(%rsp,%rdx), %rdx
; AVX2-NEXT: shlxq %rcx, %rdx, %rdi
; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx
; AVX2-NEXT: shldq %cl, %rdx, %rsi
; AVX2-NEXT: movq %r9, 24(%rax)
; AVX2-NEXT: movq %r8, 16(%rax)
; AVX2-NEXT: movq %rsi, 8(%rax)
; AVX2-NEXT: movq %rdi, (%rax)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512F-LABEL: shl_1_i256:
; AVX512F: # %bb.0:
; AVX512F-NEXT: movq %rsi, %rcx
; AVX512F-NEXT: vmovaps {{.*#+}} zmm0 = [0,0,0,0,1,0,0,0]
; AVX512F-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movl %ecx, %eax
; AVX512F-NEXT: shrb $3, %al
; AVX512F-NEXT: andb $24, %al
; AVX512F-NEXT: negb %al
; AVX512F-NEXT: movsbq %al, %rdx
; AVX512F-NEXT: movq -32(%rsp,%rdx), %rsi
; AVX512F-NEXT: movq -24(%rsp,%rdx), %rax
; AVX512F-NEXT: movq %rax, %r8
; AVX512F-NEXT: shldq %cl, %rsi, %r8
; AVX512F-NEXT: movq -16(%rsp,%rdx), %r9
; AVX512F-NEXT: shldq %cl, %rax, %r9
; AVX512F-NEXT: movq %rdi, %rax
; AVX512F-NEXT: movq -40(%rsp,%rdx), %rdx
; AVX512F-NEXT: shlxq %rcx, %rdx, %rdi
; AVX512F-NEXT: # kill: def $cl killed $cl killed $rcx
; AVX512F-NEXT: shldq %cl, %rdx, %rsi
; AVX512F-NEXT: movq %r9, 24(%rax)
; AVX512F-NEXT: movq %r8, 16(%rax)
; AVX512F-NEXT: movq %rsi, 8(%rax)
; AVX512F-NEXT: movq %rdi, (%rax)
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: shl_1_i256:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: movq %rsi, %rcx
; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: vmovaps {{.*#+}} xmm0 = [1,0,0,0]
; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movl %ecx, %eax
; AVX512VL-NEXT: shrb $3, %al
; AVX512VL-NEXT: andb $24, %al
; AVX512VL-NEXT: negb %al
; AVX512VL-NEXT: movsbq %al, %rax
; AVX512VL-NEXT: movq -32(%rsp,%rax), %rdx
; AVX512VL-NEXT: movq -24(%rsp,%rax), %rsi
; AVX512VL-NEXT: movq %rsi, %r8
; AVX512VL-NEXT: shldq %cl, %rdx, %r8
; AVX512VL-NEXT: movq -16(%rsp,%rax), %r9
; AVX512VL-NEXT: shldq %cl, %rsi, %r9
; AVX512VL-NEXT: movq -40(%rsp,%rax), %rsi
; AVX512VL-NEXT: shldq %cl, %rsi, %rdx
; AVX512VL-NEXT: movq %rdi, %rax
; AVX512VL-NEXT: shlxq %rcx, %rsi, %rcx
; AVX512VL-NEXT: movq %r9, 24(%rdi)
; AVX512VL-NEXT: movq %r8, 16(%rdi)
; AVX512VL-NEXT: movq %rdx, 8(%rdi)
; AVX512VL-NEXT: movq %rcx, (%rdi)
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512VBMI-LABEL: shl_1_i256:
; AVX512VBMI: # %bb.0:
; AVX512VBMI-NEXT: movq %rsi, %rcx
; AVX512VBMI-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: vmovaps {{.*#+}} xmm0 = [1,0,0,0]
; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movl %ecx, %eax
; AVX512VBMI-NEXT: shrb $3, %al
; AVX512VBMI-NEXT: andb $24, %al
; AVX512VBMI-NEXT: negb %al
; AVX512VBMI-NEXT: movsbq %al, %rax
; AVX512VBMI-NEXT: movq -32(%rsp,%rax), %rdx
; AVX512VBMI-NEXT: movq -24(%rsp,%rax), %rsi
; AVX512VBMI-NEXT: movq %rsi, %r8
; AVX512VBMI-NEXT: shldq %cl, %rdx, %r8
; AVX512VBMI-NEXT: movq -16(%rsp,%rax), %r9
; AVX512VBMI-NEXT: shldq %cl, %rsi, %r9
; AVX512VBMI-NEXT: movq -40(%rsp,%rax), %rsi
; AVX512VBMI-NEXT: shldq %cl, %rsi, %rdx
; AVX512VBMI-NEXT: movq %rdi, %rax
; AVX512VBMI-NEXT: shlxq %rcx, %rsi, %rcx
; AVX512VBMI-NEXT: movq %r9, 24(%rdi)
; AVX512VBMI-NEXT: movq %r8, 16(%rdi)
; AVX512VBMI-NEXT: movq %rdx, 8(%rdi)
; AVX512VBMI-NEXT: movq %rcx, (%rdi)
; AVX512VBMI-NEXT: vzeroupper
; AVX512VBMI-NEXT: retq
;
; X86-LABEL: shl_1_i256:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $112, %esp
; X86-NEXT: movl 12(%ebp), %ecx
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $3, %al
; X86-NEXT: andb $28, %al
; X86-NEXT: negb %al
; X86-NEXT: movsbl %al, %eax
; X86-NEXT: movl 68(%esp,%eax), %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 72(%esp,%eax), %edx
; X86-NEXT: movl %edx, %edi
; X86-NEXT: shldl %cl, %esi, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 76(%esp,%eax), %esi
; X86-NEXT: movl %esi, %edi
; X86-NEXT: shldl %cl, %edx, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 80(%esp,%eax), %edx
; X86-NEXT: movl %edx, %edi
; X86-NEXT: shldl %cl, %esi, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 84(%esp,%eax), %esi
; X86-NEXT: movl %esi, %ebx
; X86-NEXT: shldl %cl, %edx, %ebx
; X86-NEXT: movl 88(%esp,%eax), %edi
; X86-NEXT: movl %edi, %edx
; X86-NEXT: shldl %cl, %esi, %edx
; X86-NEXT: movl 64(%esp,%eax), %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 92(%esp,%eax), %esi
; X86-NEXT: shldl %cl, %edi, %esi
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %esi, 28(%eax)
; X86-NEXT: movl %edx, 24(%eax)
; X86-NEXT: movl %ebx, 20(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, 16(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, 12(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-NEXT: movl %edi, %edx
; X86-NEXT: shll %cl, %edx
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: shldl %cl, %edi, %esi
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%r = shl i256 1, %a0
ret i256 %r
}
define i256 @lshr_signbit_i256(i256 %a0) nounwind {
; SSE-LABEL: lshr_signbit_i256:
; SSE: # %bb.0:
; SSE-NEXT: movq %rsi, %rcx
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movl %ecx, %eax
; SSE-NEXT: shrb $6, %al
; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: movq -56(%rsp,%rax,8), %rdx
; SSE-NEXT: movq -64(%rsp,%rax,8), %rsi
; SSE-NEXT: movq %rsi, %r8
; SSE-NEXT: shrdq %cl, %rdx, %r8
; SSE-NEXT: movq -48(%rsp,%rax,8), %r9
; SSE-NEXT: shrdq %cl, %r9, %rdx
; SSE-NEXT: movq -72(%rsp,%rax,8), %r10
; SSE-NEXT: shrdq %cl, %rsi, %r10
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: # kill: def $cl killed $cl killed $rcx
; SSE-NEXT: shrq %cl, %r9
; SSE-NEXT: movq %r9, 24(%rdi)
; SSE-NEXT: movq %rdx, 16(%rdi)
; SSE-NEXT: movq %r8, 8(%rdi)
; SSE-NEXT: movq %r10, (%rdi)
; SSE-NEXT: retq
;
; AVX2-LABEL: lshr_signbit_i256:
; AVX2: # %bb.0:
; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rsi, %rcx
; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,9223372036854775808]
; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movl %ecx, %eax
; AVX2-NEXT: shrb $6, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX2-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX2-NEXT: movq %rsi, %r8
; AVX2-NEXT: shrdq %cl, %rdx, %r8
; AVX2-NEXT: movq -72(%rsp,%rax,8), %r9
; AVX2-NEXT: movq -48(%rsp,%rax,8), %r10
; AVX2-NEXT: shrdq %cl, %r10, %rdx
; AVX2-NEXT: shrdq %cl, %rsi, %r9
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: shrxq %rcx, %r10, %rcx
; AVX2-NEXT: movq %rcx, 24(%rdi)
; AVX2-NEXT: movq %rdx, 16(%rdi)
; AVX2-NEXT: movq %r8, 8(%rdi)
; AVX2-NEXT: movq %r9, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512F-LABEL: lshr_signbit_i256:
; AVX512F: # %bb.0:
; AVX512F-NEXT: movq %rsi, %rcx
; AVX512F-NEXT: vmovaps {{.*#+}} zmm0 = [0,0,0,9223372036854775808,0,0,0,0]
; AVX512F-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movl %ecx, %eax
; AVX512F-NEXT: shrb $6, %al
; AVX512F-NEXT: movzbl %al, %eax
; AVX512F-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX512F-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX512F-NEXT: movq %rsi, %r8
; AVX512F-NEXT: shrdq %cl, %rdx, %r8
; AVX512F-NEXT: movq -72(%rsp,%rax,8), %r9
; AVX512F-NEXT: movq -48(%rsp,%rax,8), %r10
; AVX512F-NEXT: shrdq %cl, %r10, %rdx
; AVX512F-NEXT: shrdq %cl, %rsi, %r9
; AVX512F-NEXT: movq %rdi, %rax
; AVX512F-NEXT: shrxq %rcx, %r10, %rcx
; AVX512F-NEXT: movq %rcx, 24(%rdi)
; AVX512F-NEXT: movq %rdx, 16(%rdi)
; AVX512F-NEXT: movq %r8, 8(%rdi)
; AVX512F-NEXT: movq %r9, (%rdi)
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: lshr_signbit_i256:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: movq %rsi, %rcx
; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,9223372036854775808]
; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movl %ecx, %eax
; AVX512VL-NEXT: shrb $6, %al
; AVX512VL-NEXT: movzbl %al, %eax
; AVX512VL-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX512VL-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX512VL-NEXT: movq %rsi, %r8
; AVX512VL-NEXT: shrdq %cl, %rdx, %r8
; AVX512VL-NEXT: movq -48(%rsp,%rax,8), %r9
; AVX512VL-NEXT: shrdq %cl, %r9, %rdx
; AVX512VL-NEXT: movq -72(%rsp,%rax,8), %r10
; AVX512VL-NEXT: shrdq %cl, %rsi, %r10
; AVX512VL-NEXT: movq %rdi, %rax
; AVX512VL-NEXT: shrxq %rcx, %r9, %rcx
; AVX512VL-NEXT: movq %rcx, 24(%rdi)
; AVX512VL-NEXT: movq %rdx, 16(%rdi)
; AVX512VL-NEXT: movq %r8, 8(%rdi)
; AVX512VL-NEXT: movq %r10, (%rdi)
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512VBMI-LABEL: lshr_signbit_i256:
; AVX512VBMI: # %bb.0:
; AVX512VBMI-NEXT: movq %rsi, %rcx
; AVX512VBMI-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,9223372036854775808]
; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movl %ecx, %eax
; AVX512VBMI-NEXT: shrb $6, %al
; AVX512VBMI-NEXT: movzbl %al, %eax
; AVX512VBMI-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX512VBMI-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX512VBMI-NEXT: movq %rsi, %r8
; AVX512VBMI-NEXT: shrdq %cl, %rdx, %r8
; AVX512VBMI-NEXT: movq -48(%rsp,%rax,8), %r9
; AVX512VBMI-NEXT: shrdq %cl, %r9, %rdx
; AVX512VBMI-NEXT: movq -72(%rsp,%rax,8), %r10
; AVX512VBMI-NEXT: shrdq %cl, %rsi, %r10
; AVX512VBMI-NEXT: movq %rdi, %rax
; AVX512VBMI-NEXT: shrxq %rcx, %r9, %rcx
; AVX512VBMI-NEXT: movq %rcx, 24(%rdi)
; AVX512VBMI-NEXT: movq %rdx, 16(%rdi)
; AVX512VBMI-NEXT: movq %r8, 8(%rdi)
; AVX512VBMI-NEXT: movq %r10, (%rdi)
; AVX512VBMI-NEXT: vzeroupper
; AVX512VBMI-NEXT: retq
;
; X86-LABEL: lshr_signbit_i256:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $112, %esp
; X86-NEXT: movl 12(%ebp), %ecx
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-2147483648, {{[0-9]+}}(%esp) # imm = 0x80000000
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $5, %al
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: movl 40(%esp,%eax,4), %edx
; X86-NEXT: movl 36(%esp,%eax,4), %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shrdl %cl, %edx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 44(%esp,%eax,4), %esi
; X86-NEXT: shrdl %cl, %esi, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 48(%esp,%eax,4), %ebx
; X86-NEXT: shrdl %cl, %ebx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 52(%esp,%eax,4), %edi
; X86-NEXT: shrdl %cl, %edi, %ebx
; X86-NEXT: movl 56(%esp,%eax,4), %esi
; X86-NEXT: shrdl %cl, %esi, %edi
; X86-NEXT: movl 32(%esp,%eax,4), %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 60(%esp,%eax,4), %edx
; X86-NEXT: shrdl %cl, %edx, %esi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: shrl %cl, %edx
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edx, 28(%eax)
; X86-NEXT: movl %esi, 24(%eax)
; X86-NEXT: movl %edi, 20(%eax)
; X86-NEXT: movl %ebx, 16(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 4(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, (%eax)
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%s = shl i256 1, 255
%r = lshr i256 %s, %a0
ret i256 %r
}
define i256 @ashr_signbit_i256(i256 %a0) nounwind {
; SSE-LABEL: ashr_signbit_i256:
; SSE: # %bb.0:
; SSE-NEXT: movq %rsi, %rcx
; SSE-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movl %ecx, %eax
; SSE-NEXT: shrb $6, %al
; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: movq -56(%rsp,%rax,8), %rdx
; SSE-NEXT: movq -64(%rsp,%rax,8), %rsi
; SSE-NEXT: movq %rsi, %r8
; SSE-NEXT: shrdq %cl, %rdx, %r8
; SSE-NEXT: movq -48(%rsp,%rax,8), %r9
; SSE-NEXT: shrdq %cl, %r9, %rdx
; SSE-NEXT: movq -72(%rsp,%rax,8), %r10
; SSE-NEXT: shrdq %cl, %rsi, %r10
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: # kill: def $cl killed $cl killed $rcx
; SSE-NEXT: sarq %cl, %r9
; SSE-NEXT: movq %r9, 24(%rdi)
; SSE-NEXT: movq %rdx, 16(%rdi)
; SSE-NEXT: movq %r8, 8(%rdi)
; SSE-NEXT: movq %r10, (%rdi)
; SSE-NEXT: retq
;
; AVX2-LABEL: ashr_signbit_i256:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX2-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rsi, %rcx
; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,9223372036854775808]
; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movl %ecx, %eax
; AVX2-NEXT: shrb $6, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX2-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX2-NEXT: movq %rsi, %r8
; AVX2-NEXT: shrdq %cl, %rdx, %r8
; AVX2-NEXT: movq -72(%rsp,%rax,8), %r9
; AVX2-NEXT: movq -48(%rsp,%rax,8), %r10
; AVX2-NEXT: shrdq %cl, %r10, %rdx
; AVX2-NEXT: shrdq %cl, %rsi, %r9
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: sarxq %rcx, %r10, %rcx
; AVX2-NEXT: movq %rcx, 24(%rdi)
; AVX2-NEXT: movq %rdx, 16(%rdi)
; AVX2-NEXT: movq %r8, 8(%rdi)
; AVX2-NEXT: movq %r9, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512F-LABEL: ashr_signbit_i256:
; AVX512F: # %bb.0:
; AVX512F-NEXT: movq %rsi, %rcx
; AVX512F-NEXT: vmovaps {{.*#+}} zmm0 = [0,0,0,9223372036854775808,18446744073709551615,18446744073709551615,18446744073709551615,18446744073709551615]
; AVX512F-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movl %ecx, %eax
; AVX512F-NEXT: shrb $6, %al
; AVX512F-NEXT: movzbl %al, %eax
; AVX512F-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX512F-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX512F-NEXT: movq %rsi, %r8
; AVX512F-NEXT: shrdq %cl, %rdx, %r8
; AVX512F-NEXT: movq -72(%rsp,%rax,8), %r9
; AVX512F-NEXT: movq -48(%rsp,%rax,8), %r10
; AVX512F-NEXT: shrdq %cl, %r10, %rdx
; AVX512F-NEXT: shrdq %cl, %rsi, %r9
; AVX512F-NEXT: movq %rdi, %rax
; AVX512F-NEXT: sarxq %rcx, %r10, %rcx
; AVX512F-NEXT: movq %rcx, 24(%rdi)
; AVX512F-NEXT: movq %rdx, 16(%rdi)
; AVX512F-NEXT: movq %r8, 8(%rdi)
; AVX512F-NEXT: movq %r9, (%rdi)
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: ashr_signbit_i256:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: movq %rsi, %rcx
; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512VL-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,9223372036854775808]
; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movl %ecx, %eax
; AVX512VL-NEXT: shrb $6, %al
; AVX512VL-NEXT: movzbl %al, %eax
; AVX512VL-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX512VL-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX512VL-NEXT: movq %rsi, %r8
; AVX512VL-NEXT: shrdq %cl, %rdx, %r8
; AVX512VL-NEXT: movq -48(%rsp,%rax,8), %r9
; AVX512VL-NEXT: shrdq %cl, %r9, %rdx
; AVX512VL-NEXT: movq -72(%rsp,%rax,8), %r10
; AVX512VL-NEXT: shrdq %cl, %rsi, %r10
; AVX512VL-NEXT: movq %rdi, %rax
; AVX512VL-NEXT: sarxq %rcx, %r9, %rcx
; AVX512VL-NEXT: movq %rcx, 24(%rdi)
; AVX512VL-NEXT: movq %rdx, 16(%rdi)
; AVX512VL-NEXT: movq %r8, 8(%rdi)
; AVX512VL-NEXT: movq %r10, (%rdi)
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512VBMI-LABEL: ashr_signbit_i256:
; AVX512VBMI: # %bb.0:
; AVX512VBMI-NEXT: movq %rsi, %rcx
; AVX512VBMI-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512VBMI-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,9223372036854775808]
; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movl %ecx, %eax
; AVX512VBMI-NEXT: shrb $6, %al
; AVX512VBMI-NEXT: movzbl %al, %eax
; AVX512VBMI-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX512VBMI-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX512VBMI-NEXT: movq %rsi, %r8
; AVX512VBMI-NEXT: shrdq %cl, %rdx, %r8
; AVX512VBMI-NEXT: movq -48(%rsp,%rax,8), %r9
; AVX512VBMI-NEXT: shrdq %cl, %r9, %rdx
; AVX512VBMI-NEXT: movq -72(%rsp,%rax,8), %r10
; AVX512VBMI-NEXT: shrdq %cl, %rsi, %r10
; AVX512VBMI-NEXT: movq %rdi, %rax
; AVX512VBMI-NEXT: sarxq %rcx, %r9, %rcx
; AVX512VBMI-NEXT: movq %rcx, 24(%rdi)
; AVX512VBMI-NEXT: movq %rdx, 16(%rdi)
; AVX512VBMI-NEXT: movq %r8, 8(%rdi)
; AVX512VBMI-NEXT: movq %r10, (%rdi)
; AVX512VBMI-NEXT: vzeroupper
; AVX512VBMI-NEXT: retq
;
; X86-LABEL: ashr_signbit_i256:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $112, %esp
; X86-NEXT: movl 12(%ebp), %ecx
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-2147483648, {{[0-9]+}}(%esp) # imm = 0x80000000
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $5, %al
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: movl 40(%esp,%eax,4), %edx
; X86-NEXT: movl 36(%esp,%eax,4), %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shrdl %cl, %edx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 44(%esp,%eax,4), %esi
; X86-NEXT: shrdl %cl, %esi, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 48(%esp,%eax,4), %ebx
; X86-NEXT: shrdl %cl, %ebx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 52(%esp,%eax,4), %edi
; X86-NEXT: shrdl %cl, %edi, %ebx
; X86-NEXT: movl 56(%esp,%eax,4), %esi
; X86-NEXT: shrdl %cl, %esi, %edi
; X86-NEXT: movl 32(%esp,%eax,4), %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 60(%esp,%eax,4), %edx
; X86-NEXT: shrdl %cl, %edx, %esi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: sarl %cl, %edx
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edx, 28(%eax)
; X86-NEXT: movl %esi, 24(%eax)
; X86-NEXT: movl %edi, 20(%eax)
; X86-NEXT: movl %ebx, 16(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 4(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, (%eax)
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%s = shl i256 1, 255
%r = ashr i256 %s, %a0
ret i256 %r
}
define i256 @shl_allbits_i256(i256 %a0) nounwind {
; SSE-LABEL: shl_allbits_i256:
; SSE: # %bb.0:
; SSE-NEXT: movq %rsi, %rcx
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movl %ecx, %eax
; SSE-NEXT: shrb $3, %al
; SSE-NEXT: andb $24, %al
; SSE-NEXT: negb %al
; SSE-NEXT: movsbq %al, %rax
; SSE-NEXT: movq -32(%rsp,%rax), %rdx
; SSE-NEXT: movq -24(%rsp,%rax), %rsi
; SSE-NEXT: movq %rsi, %r8
; SSE-NEXT: shldq %cl, %rdx, %r8
; SSE-NEXT: movq -16(%rsp,%rax), %r9
; SSE-NEXT: shldq %cl, %rsi, %r9
; SSE-NEXT: movq -40(%rsp,%rax), %rax
; SSE-NEXT: movq %rax, %rsi
; SSE-NEXT: shlq %cl, %rsi
; SSE-NEXT: # kill: def $cl killed $cl killed $rcx
; SSE-NEXT: shldq %cl, %rax, %rdx
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: movq %r9, 24(%rdi)
; SSE-NEXT: movq %r8, 16(%rdi)
; SSE-NEXT: movq %rdx, 8(%rdi)
; SSE-NEXT: movq %rsi, (%rdi)
; SSE-NEXT: retq
;
; AVX2-LABEL: shl_allbits_i256:
; AVX2: # %bb.0:
; AVX2-NEXT: movq %rsi, %rcx
; AVX2-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX2-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movl %ecx, %eax
; AVX2-NEXT: shrb $3, %al
; AVX2-NEXT: andb $24, %al
; AVX2-NEXT: negb %al
; AVX2-NEXT: movsbq %al, %rdx
; AVX2-NEXT: movq -32(%rsp,%rdx), %rsi
; AVX2-NEXT: movq -24(%rsp,%rdx), %rax
; AVX2-NEXT: movq %rax, %r8
; AVX2-NEXT: shldq %cl, %rsi, %r8
; AVX2-NEXT: movq -16(%rsp,%rdx), %r9
; AVX2-NEXT: shldq %cl, %rax, %r9
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: movq -40(%rsp,%rdx), %rdx
; AVX2-NEXT: shlxq %rcx, %rdx, %rdi
; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx
; AVX2-NEXT: shldq %cl, %rdx, %rsi
; AVX2-NEXT: movq %r9, 24(%rax)
; AVX2-NEXT: movq %r8, 16(%rax)
; AVX2-NEXT: movq %rsi, 8(%rax)
; AVX2-NEXT: movq %rdi, (%rax)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512F-LABEL: shl_allbits_i256:
; AVX512F: # %bb.0:
; AVX512F-NEXT: movq %rsi, %rcx
; AVX512F-NEXT: vmovaps {{.*#+}} zmm0 = [0,0,0,0,18446744073709551615,18446744073709551615,18446744073709551615,18446744073709551615]
; AVX512F-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movl %ecx, %eax
; AVX512F-NEXT: shrb $3, %al
; AVX512F-NEXT: andb $24, %al
; AVX512F-NEXT: negb %al
; AVX512F-NEXT: movsbq %al, %rdx
; AVX512F-NEXT: movq -32(%rsp,%rdx), %rsi
; AVX512F-NEXT: movq -24(%rsp,%rdx), %rax
; AVX512F-NEXT: movq %rax, %r8
; AVX512F-NEXT: shldq %cl, %rsi, %r8
; AVX512F-NEXT: movq -16(%rsp,%rdx), %r9
; AVX512F-NEXT: shldq %cl, %rax, %r9
; AVX512F-NEXT: movq %rdi, %rax
; AVX512F-NEXT: movq -40(%rsp,%rdx), %rdx
; AVX512F-NEXT: shlxq %rcx, %rdx, %rdi
; AVX512F-NEXT: # kill: def $cl killed $cl killed $rcx
; AVX512F-NEXT: shldq %cl, %rdx, %rsi
; AVX512F-NEXT: movq %r9, 24(%rax)
; AVX512F-NEXT: movq %r8, 16(%rax)
; AVX512F-NEXT: movq %rsi, 8(%rax)
; AVX512F-NEXT: movq %rdi, (%rax)
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: shl_allbits_i256:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: movq %rsi, %rcx
; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512VL-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movl %ecx, %eax
; AVX512VL-NEXT: shrb $3, %al
; AVX512VL-NEXT: andb $24, %al
; AVX512VL-NEXT: negb %al
; AVX512VL-NEXT: movsbq %al, %rax
; AVX512VL-NEXT: movq -32(%rsp,%rax), %rdx
; AVX512VL-NEXT: movq -24(%rsp,%rax), %rsi
; AVX512VL-NEXT: movq %rsi, %r8
; AVX512VL-NEXT: shldq %cl, %rdx, %r8
; AVX512VL-NEXT: movq -16(%rsp,%rax), %r9
; AVX512VL-NEXT: shldq %cl, %rsi, %r9
; AVX512VL-NEXT: movq -40(%rsp,%rax), %rsi
; AVX512VL-NEXT: shldq %cl, %rsi, %rdx
; AVX512VL-NEXT: movq %rdi, %rax
; AVX512VL-NEXT: shlxq %rcx, %rsi, %rcx
; AVX512VL-NEXT: movq %r9, 24(%rdi)
; AVX512VL-NEXT: movq %r8, 16(%rdi)
; AVX512VL-NEXT: movq %rdx, 8(%rdi)
; AVX512VL-NEXT: movq %rcx, (%rdi)
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512VBMI-LABEL: shl_allbits_i256:
; AVX512VBMI: # %bb.0:
; AVX512VBMI-NEXT: movq %rsi, %rcx
; AVX512VBMI-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512VBMI-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VBMI-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movl %ecx, %eax
; AVX512VBMI-NEXT: shrb $3, %al
; AVX512VBMI-NEXT: andb $24, %al
; AVX512VBMI-NEXT: negb %al
; AVX512VBMI-NEXT: movsbq %al, %rax
; AVX512VBMI-NEXT: movq -32(%rsp,%rax), %rdx
; AVX512VBMI-NEXT: movq -24(%rsp,%rax), %rsi
; AVX512VBMI-NEXT: movq %rsi, %r8
; AVX512VBMI-NEXT: shldq %cl, %rdx, %r8
; AVX512VBMI-NEXT: movq -16(%rsp,%rax), %r9
; AVX512VBMI-NEXT: shldq %cl, %rsi, %r9
; AVX512VBMI-NEXT: movq -40(%rsp,%rax), %rsi
; AVX512VBMI-NEXT: shldq %cl, %rsi, %rdx
; AVX512VBMI-NEXT: movq %rdi, %rax
; AVX512VBMI-NEXT: shlxq %rcx, %rsi, %rcx
; AVX512VBMI-NEXT: movq %r9, 24(%rdi)
; AVX512VBMI-NEXT: movq %r8, 16(%rdi)
; AVX512VBMI-NEXT: movq %rdx, 8(%rdi)
; AVX512VBMI-NEXT: movq %rcx, (%rdi)
; AVX512VBMI-NEXT: vzeroupper
; AVX512VBMI-NEXT: retq
;
; X86-LABEL: shl_allbits_i256:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $112, %esp
; X86-NEXT: movl 12(%ebp), %ecx
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $3, %al
; X86-NEXT: andb $28, %al
; X86-NEXT: negb %al
; X86-NEXT: movsbl %al, %eax
; X86-NEXT: movl 68(%esp,%eax), %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 72(%esp,%eax), %edx
; X86-NEXT: movl %edx, %edi
; X86-NEXT: shldl %cl, %esi, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 76(%esp,%eax), %esi
; X86-NEXT: movl %esi, %edi
; X86-NEXT: shldl %cl, %edx, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 80(%esp,%eax), %edx
; X86-NEXT: movl %edx, %edi
; X86-NEXT: shldl %cl, %esi, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 84(%esp,%eax), %esi
; X86-NEXT: movl %esi, %ebx
; X86-NEXT: shldl %cl, %edx, %ebx
; X86-NEXT: movl 88(%esp,%eax), %edi
; X86-NEXT: movl %edi, %edx
; X86-NEXT: shldl %cl, %esi, %edx
; X86-NEXT: movl 64(%esp,%eax), %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 92(%esp,%eax), %esi
; X86-NEXT: shldl %cl, %edi, %esi
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %esi, 28(%eax)
; X86-NEXT: movl %edx, 24(%eax)
; X86-NEXT: movl %ebx, 20(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, 16(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, 12(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, 8(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-NEXT: movl %edi, %edx
; X86-NEXT: shll %cl, %edx
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: shldl %cl, %edi, %esi
; X86-NEXT: movl %esi, 4(%eax)
; X86-NEXT: movl %edx, (%eax)
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%r = shl i256 -1, %a0
ret i256 %r
}
define i256 @lshr_allbits_i256(i256 %a0) nounwind {
; SSE-LABEL: lshr_allbits_i256:
; SSE: # %bb.0:
; SSE-NEXT: movq %rsi, %rcx
; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq $-1, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movl %ecx, %eax
; SSE-NEXT: shrb $6, %al
; SSE-NEXT: movzbl %al, %eax
; SSE-NEXT: movq -56(%rsp,%rax,8), %rdx
; SSE-NEXT: movq -64(%rsp,%rax,8), %rsi
; SSE-NEXT: movq %rsi, %r8
; SSE-NEXT: shrdq %cl, %rdx, %r8
; SSE-NEXT: movq -48(%rsp,%rax,8), %r9
; SSE-NEXT: shrdq %cl, %r9, %rdx
; SSE-NEXT: movq -72(%rsp,%rax,8), %r10
; SSE-NEXT: shrdq %cl, %rsi, %r10
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: # kill: def $cl killed $cl killed $rcx
; SSE-NEXT: shrq %cl, %r9
; SSE-NEXT: movq %r9, 24(%rdi)
; SSE-NEXT: movq %rdx, 16(%rdi)
; SSE-NEXT: movq %r8, 8(%rdi)
; SSE-NEXT: movq %r10, (%rdi)
; SSE-NEXT: retq
;
; AVX2-LABEL: lshr_allbits_i256:
; AVX2: # %bb.0:
; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rsi, %rcx
; AVX2-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX2-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movl %ecx, %eax
; AVX2-NEXT: shrb $6, %al
; AVX2-NEXT: movzbl %al, %eax
; AVX2-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX2-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX2-NEXT: movq %rsi, %r8
; AVX2-NEXT: shrdq %cl, %rdx, %r8
; AVX2-NEXT: movq -72(%rsp,%rax,8), %r9
; AVX2-NEXT: movq -48(%rsp,%rax,8), %r10
; AVX2-NEXT: shrdq %cl, %r10, %rdx
; AVX2-NEXT: shrdq %cl, %rsi, %r9
; AVX2-NEXT: movq %rdi, %rax
; AVX2-NEXT: shrxq %rcx, %r10, %rcx
; AVX2-NEXT: movq %rcx, 24(%rdi)
; AVX2-NEXT: movq %rdx, 16(%rdi)
; AVX2-NEXT: movq %r8, 8(%rdi)
; AVX2-NEXT: movq %r9, (%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512F-LABEL: lshr_allbits_i256:
; AVX512F: # %bb.0:
; AVX512F-NEXT: movq %rsi, %rcx
; AVX512F-NEXT: vmovaps {{.*#+}} zmm0 = [18446744073709551615,18446744073709551615,18446744073709551615,18446744073709551615,0,0,0,0]
; AVX512F-NEXT: vmovups %zmm0, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movl %ecx, %eax
; AVX512F-NEXT: shrb $6, %al
; AVX512F-NEXT: movzbl %al, %eax
; AVX512F-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX512F-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX512F-NEXT: movq %rsi, %r8
; AVX512F-NEXT: shrdq %cl, %rdx, %r8
; AVX512F-NEXT: movq -72(%rsp,%rax,8), %r9
; AVX512F-NEXT: movq -48(%rsp,%rax,8), %r10
; AVX512F-NEXT: shrdq %cl, %r10, %rdx
; AVX512F-NEXT: shrdq %cl, %rsi, %r9
; AVX512F-NEXT: movq %rdi, %rax
; AVX512F-NEXT: shrxq %rcx, %r10, %rcx
; AVX512F-NEXT: movq %rcx, 24(%rdi)
; AVX512F-NEXT: movq %rdx, 16(%rdi)
; AVX512F-NEXT: movq %r8, 8(%rdi)
; AVX512F-NEXT: movq %r9, (%rdi)
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: lshr_allbits_i256:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %rsi, %rcx
; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512VL-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movl %ecx, %eax
; AVX512VL-NEXT: shrb $6, %al
; AVX512VL-NEXT: movzbl %al, %eax
; AVX512VL-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX512VL-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX512VL-NEXT: movq %rsi, %r8
; AVX512VL-NEXT: shrdq %cl, %rdx, %r8
; AVX512VL-NEXT: movq -48(%rsp,%rax,8), %r9
; AVX512VL-NEXT: shrdq %cl, %r9, %rdx
; AVX512VL-NEXT: movq -72(%rsp,%rax,8), %r10
; AVX512VL-NEXT: shrdq %cl, %rsi, %r10
; AVX512VL-NEXT: movq %rdi, %rax
; AVX512VL-NEXT: shrxq %rcx, %r9, %rcx
; AVX512VL-NEXT: movq %rcx, 24(%rdi)
; AVX512VL-NEXT: movq %rdx, 16(%rdi)
; AVX512VL-NEXT: movq %r8, 8(%rdi)
; AVX512VL-NEXT: movq %r10, (%rdi)
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512VBMI-LABEL: lshr_allbits_i256:
; AVX512VBMI: # %bb.0:
; AVX512VBMI-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512VBMI-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %rsi, %rcx
; AVX512VBMI-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512VBMI-NEXT: vmovdqu %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movl %ecx, %eax
; AVX512VBMI-NEXT: shrb $6, %al
; AVX512VBMI-NEXT: movzbl %al, %eax
; AVX512VBMI-NEXT: movq -56(%rsp,%rax,8), %rdx
; AVX512VBMI-NEXT: movq -64(%rsp,%rax,8), %rsi
; AVX512VBMI-NEXT: movq %rsi, %r8
; AVX512VBMI-NEXT: shrdq %cl, %rdx, %r8
; AVX512VBMI-NEXT: movq -48(%rsp,%rax,8), %r9
; AVX512VBMI-NEXT: shrdq %cl, %r9, %rdx
; AVX512VBMI-NEXT: movq -72(%rsp,%rax,8), %r10
; AVX512VBMI-NEXT: shrdq %cl, %rsi, %r10
; AVX512VBMI-NEXT: movq %rdi, %rax
; AVX512VBMI-NEXT: shrxq %rcx, %r9, %rcx
; AVX512VBMI-NEXT: movq %rcx, 24(%rdi)
; AVX512VBMI-NEXT: movq %rdx, 16(%rdi)
; AVX512VBMI-NEXT: movq %r8, 8(%rdi)
; AVX512VBMI-NEXT: movq %r10, (%rdi)
; AVX512VBMI-NEXT: vzeroupper
; AVX512VBMI-NEXT: retq
;
; X86-LABEL: lshr_allbits_i256:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $112, %esp
; X86-NEXT: movl 12(%ebp), %ecx
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl $-1, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $5, %al
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: movl 40(%esp,%eax,4), %edx
; X86-NEXT: movl 36(%esp,%eax,4), %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: shrdl %cl, %edx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 44(%esp,%eax,4), %esi
; X86-NEXT: shrdl %cl, %esi, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 48(%esp,%eax,4), %ebx
; X86-NEXT: shrdl %cl, %ebx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 52(%esp,%eax,4), %edi
; X86-NEXT: shrdl %cl, %edi, %ebx
; X86-NEXT: movl 56(%esp,%eax,4), %esi
; X86-NEXT: shrdl %cl, %esi, %edi
; X86-NEXT: movl 32(%esp,%eax,4), %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 60(%esp,%eax,4), %edx
; X86-NEXT: shrdl %cl, %edx, %esi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shrdl %cl, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: shrl %cl, %edx
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl %edx, 28(%eax)
; X86-NEXT: movl %esi, 24(%eax)
; X86-NEXT: movl %edi, 20(%eax)
; X86-NEXT: movl %ebx, 16(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 12(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 8(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, 4(%eax)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, (%eax)
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl $4
%r = lshr i256 -1, %a0
ret i256 %r
}
define i64 @lshr_extract_i256_i64(i256 %a0, i256 %a1) nounwind {
; SSE-LABEL: lshr_extract_i256_i64:
; SSE: # %bb.0:
; SSE-NEXT: xorps %xmm0, %xmm0
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movl %r8d, %eax
; SSE-NEXT: shrb $6, %al
; SSE-NEXT: movzbl %al, %ecx
; SSE-NEXT: movq -72(%rsp,%rcx,8), %rax
; SSE-NEXT: movq -64(%rsp,%rcx,8), %rdx
; SSE-NEXT: movl %r8d, %ecx
; SSE-NEXT: shrdq %cl, %rdx, %rax
; SSE-NEXT: retq
;
; AVX2-LABEL: lshr_extract_i256_i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movl %r8d, %eax
; AVX2-NEXT: shrb $6, %al
; AVX2-NEXT: movzbl %al, %ecx
; AVX2-NEXT: movq -72(%rsp,%rcx,8), %rax
; AVX2-NEXT: movq -64(%rsp,%rcx,8), %rdx
; AVX2-NEXT: movl %r8d, %ecx
; AVX2-NEXT: shrdq %cl, %rdx, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512F-LABEL: lshr_extract_i256_i64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movl %r8d, %eax
; AVX512F-NEXT: shrb $6, %al
; AVX512F-NEXT: movzbl %al, %ecx
; AVX512F-NEXT: movq -72(%rsp,%rcx,8), %rax
; AVX512F-NEXT: movq -64(%rsp,%rcx,8), %rdx
; AVX512F-NEXT: movl %r8d, %ecx
; AVX512F-NEXT: shrdq %cl, %rdx, %rax
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: lshr_extract_i256_i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movl %r8d, %eax
; AVX512VL-NEXT: shrb $6, %al
; AVX512VL-NEXT: movzbl %al, %ecx
; AVX512VL-NEXT: movq -72(%rsp,%rcx,8), %rax
; AVX512VL-NEXT: movq -64(%rsp,%rcx,8), %rdx
; AVX512VL-NEXT: movl %r8d, %ecx
; AVX512VL-NEXT: shrdq %cl, %rdx, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512VBMI-LABEL: lshr_extract_i256_i64:
; AVX512VBMI: # %bb.0:
; AVX512VBMI-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movl %r8d, %eax
; AVX512VBMI-NEXT: shrb $6, %al
; AVX512VBMI-NEXT: movzbl %al, %ecx
; AVX512VBMI-NEXT: movq -72(%rsp,%rcx,8), %rax
; AVX512VBMI-NEXT: movq -64(%rsp,%rcx,8), %rdx
; AVX512VBMI-NEXT: movl %r8d, %ecx
; AVX512VBMI-NEXT: shrdq %cl, %rdx, %rax
; AVX512VBMI-NEXT: vzeroupper
; AVX512VBMI-NEXT: retq
;
; X86-LABEL: lshr_extract_i256_i64:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $64, %esp
; X86-NEXT: movl 40(%ebp), %ecx
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl 12(%ebp), %edx
; X86-NEXT: movl 16(%ebp), %esi
; X86-NEXT: movl 36(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl 32(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl 28(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl 24(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl 20(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $5, %al
; X86-NEXT: movzbl %al, %edx
; X86-NEXT: movl 8(%esp,%edx,4), %esi
; X86-NEXT: movl (%esp,%edx,4), %eax
; X86-NEXT: movl 4(%esp,%edx,4), %edi
; X86-NEXT: movl %edi, %edx
; X86-NEXT: shrdl %cl, %esi, %edx
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: shrdl %cl, %edi, %eax
; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebp
; X86-NEXT: retl
%b = lshr i256 %a0, %a1
%r = trunc i256 %b to i64
ret i64 %r
}
define i64 @ashr_extract_i256_i64(i256 %a0, i256 %a1) nounwind {
; CHECK-LABEL: ashr_extract_i256_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rdi, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: sarq $63, %rcx
; CHECK-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; CHECK-NEXT: movl %r8d, %eax
; CHECK-NEXT: shrb $6, %al
; CHECK-NEXT: movzbl %al, %ecx
; CHECK-NEXT: movq -72(%rsp,%rcx,8), %rax
; CHECK-NEXT: movq -64(%rsp,%rcx,8), %rdx
; CHECK-NEXT: movl %r8d, %ecx
; CHECK-NEXT: shrdq %cl, %rdx, %rax
; CHECK-NEXT: retq
;
; X86-LABEL: ashr_extract_i256_i64:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $64, %esp
; X86-NEXT: movl 40(%ebp), %ecx
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl 12(%ebp), %edx
; X86-NEXT: movl 16(%ebp), %esi
; X86-NEXT: movl 32(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl 28(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl 24(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl 20(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl 36(%ebp), %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, (%esp)
; X86-NEXT: sarl $31, %edi
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $5, %al
; X86-NEXT: movzbl %al, %edx
; X86-NEXT: movl 8(%esp,%edx,4), %esi
; X86-NEXT: movl (%esp,%edx,4), %eax
; X86-NEXT: movl 4(%esp,%edx,4), %edi
; X86-NEXT: movl %edi, %edx
; X86-NEXT: shrdl %cl, %esi, %edx
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: shrdl %cl, %edi, %eax
; X86-NEXT: leal -8(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebp
; X86-NEXT: retl
%b = ashr i256 %a0, %a1
%r = trunc i256 %b to i64
ret i64 %r
}
define i64 @lshr_extract_load_i256_i64(ptr %p0, i256 %a1) nounwind {
; SSE-LABEL: lshr_extract_load_i256_i64:
; SSE: # %bb.0:
; SSE-NEXT: movq %rsi, %rcx
; SSE-NEXT: movaps (%rdi), %xmm0
; SSE-NEXT: movaps 16(%rdi), %xmm1
; SSE-NEXT: xorps %xmm2, %xmm2
; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm2, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movl %ecx, %eax
; SSE-NEXT: shrb $6, %al
; SSE-NEXT: movzbl %al, %edx
; SSE-NEXT: movq -72(%rsp,%rdx,8), %rax
; SSE-NEXT: movq -64(%rsp,%rdx,8), %rdx
; SSE-NEXT: # kill: def $cl killed $cl killed $rcx
; SSE-NEXT: shrdq %cl, %rdx, %rax
; SSE-NEXT: retq
;
; AVX2-LABEL: lshr_extract_load_i256_i64:
; AVX2: # %bb.0:
; AVX2-NEXT: movq %rsi, %rcx
; AVX2-NEXT: vmovups (%rdi), %ymm0
; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movl %ecx, %eax
; AVX2-NEXT: shrb $6, %al
; AVX2-NEXT: movzbl %al, %edx
; AVX2-NEXT: movq -72(%rsp,%rdx,8), %rax
; AVX2-NEXT: movq -64(%rsp,%rdx,8), %rdx
; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx
; AVX2-NEXT: shrdq %cl, %rdx, %rax
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512F-LABEL: lshr_extract_load_i256_i64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: movq %rsi, %rcx
; AVX512F-NEXT: vmovups (%rdi), %ymm0
; AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512F-NEXT: movl %ecx, %eax
; AVX512F-NEXT: shrb $6, %al
; AVX512F-NEXT: movzbl %al, %edx
; AVX512F-NEXT: movq -72(%rsp,%rdx,8), %rax
; AVX512F-NEXT: movq -64(%rsp,%rdx,8), %rdx
; AVX512F-NEXT: # kill: def $cl killed $cl killed $rcx
; AVX512F-NEXT: shrdq %cl, %rdx, %rax
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: lshr_extract_load_i256_i64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: movq %rsi, %rcx
; AVX512VL-NEXT: vmovups (%rdi), %ymm0
; AVX512VL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VL-NEXT: movl %ecx, %eax
; AVX512VL-NEXT: shrb $6, %al
; AVX512VL-NEXT: movzbl %al, %edx
; AVX512VL-NEXT: movq -72(%rsp,%rdx,8), %rax
; AVX512VL-NEXT: movq -64(%rsp,%rdx,8), %rdx
; AVX512VL-NEXT: # kill: def $cl killed $cl killed $rcx
; AVX512VL-NEXT: shrdq %cl, %rdx, %rax
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512VBMI-LABEL: lshr_extract_load_i256_i64:
; AVX512VBMI: # %bb.0:
; AVX512VBMI-NEXT: movq %rsi, %rcx
; AVX512VBMI-NEXT: vmovups (%rdi), %ymm0
; AVX512VBMI-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512VBMI-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: vmovups %ymm0, -{{[0-9]+}}(%rsp)
; AVX512VBMI-NEXT: movl %ecx, %eax
; AVX512VBMI-NEXT: shrb $6, %al
; AVX512VBMI-NEXT: movzbl %al, %edx
; AVX512VBMI-NEXT: movq -72(%rsp,%rdx,8), %rax
; AVX512VBMI-NEXT: movq -64(%rsp,%rdx,8), %rdx
; AVX512VBMI-NEXT: # kill: def $cl killed $cl killed $rcx
; AVX512VBMI-NEXT: shrdq %cl, %rdx, %rax
; AVX512VBMI-NEXT: vzeroupper
; AVX512VBMI-NEXT: retq
;
; X86-LABEL: lshr_extract_load_i256_i64:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $96, %esp
; X86-NEXT: movl 8(%ebp), %ecx
; X86-NEXT: movl (%ecx), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 4(%ecx), %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 8(%ecx), %esi
; X86-NEXT: movl 12(%ecx), %edi
; X86-NEXT: movl 16(%ecx), %ebx
; X86-NEXT: movl 20(%ecx), %edx
; X86-NEXT: movl 24(%ecx), %eax
; X86-NEXT: movl 28(%ecx), %ecx
; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
; X86-NEXT: movl 12(%ebp), %ecx
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl $0, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $5, %al
; X86-NEXT: movzbl %al, %edx
; X86-NEXT: movl 24(%esp,%edx,4), %esi
; X86-NEXT: movl 16(%esp,%edx,4), %eax
; X86-NEXT: movl 20(%esp,%edx,4), %edi
; X86-NEXT: movl %edi, %edx
; X86-NEXT: shrdl %cl, %esi, %edx
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: shrdl %cl, %edi, %eax
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl
%a0 = load i256, ptr %p0
%b = lshr i256 %a0, %a1
%r = trunc i256 %b to i64
ret i64 %r
}
define i64 @ashr_extract_load_i256_i64(ptr %p0, i256 %a1) nounwind {
; SSE-LABEL: ashr_extract_load_i256_i64:
; SSE: # %bb.0:
; SSE-NEXT: movq %rsi, %rcx
; SSE-NEXT: movaps (%rdi), %xmm0
; SSE-NEXT: movq 16(%rdi), %rax
; SSE-NEXT: movq 24(%rdi), %rdx
; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: sarq $63, %rdx
; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movl %ecx, %eax
; SSE-NEXT: shrb $6, %al
; SSE-NEXT: movzbl %al, %edx
; SSE-NEXT: movq -72(%rsp,%rdx,8), %rax
; SSE-NEXT: movq -64(%rsp,%rdx,8), %rdx
; SSE-NEXT: # kill: def $cl killed $cl killed $rcx
; SSE-NEXT: shrdq %cl, %rdx, %rax
; SSE-NEXT: retq
;
; AVX2-LABEL: ashr_extract_load_i256_i64:
; AVX2: # %bb.0:
; AVX2-NEXT: movq %rsi, %rcx
; AVX2-NEXT: vmovaps (%rdi), %xmm0
; AVX2-NEXT: movq 16(%rdi), %rax
; AVX2-NEXT: movq 24(%rdi), %rdx
; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: sarq $63, %rdx
; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movl %ecx, %eax
; AVX2-NEXT: shrb $6, %al
; AVX2-NEXT: movzbl %al, %edx
; AVX2-NEXT: movq -72(%rsp,%rdx,8), %rax
; AVX2-NEXT: movq -64(%rsp,%rdx,8), %rdx
; AVX2-NEXT: # kill: def $cl killed $cl killed $rcx
; AVX2-NEXT: shrdq %cl, %rdx, %rax
; AVX2-NEXT: retq
;
; AVX512-LABEL: ashr_extract_load_i256_i64:
; AVX512: # %bb.0:
; AVX512-NEXT: movq %rsi, %rcx
; AVX512-NEXT: vmovaps (%rdi), %xmm0
; AVX512-NEXT: movq 16(%rdi), %rax
; AVX512-NEXT: movq 24(%rdi), %rdx
; AVX512-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: sarq $63, %rdx
; AVX512-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: movq %rdx, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: movl %ecx, %eax
; AVX512-NEXT: shrb $6, %al
; AVX512-NEXT: movzbl %al, %edx
; AVX512-NEXT: movq -72(%rsp,%rdx,8), %rax
; AVX512-NEXT: movq -64(%rsp,%rdx,8), %rdx
; AVX512-NEXT: # kill: def $cl killed $cl killed $rcx
; AVX512-NEXT: shrdq %cl, %rdx, %rax
; AVX512-NEXT: retq
;
; X86-LABEL: ashr_extract_load_i256_i64:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $96, %esp
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl (%eax), %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 4(%eax), %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 8(%eax), %edi
; X86-NEXT: movl 12(%eax), %esi
; X86-NEXT: movl 16(%eax), %ebx
; X86-NEXT: movl 20(%eax), %edx
; X86-NEXT: movl 24(%eax), %ecx
; X86-NEXT: movl 28(%eax), %eax
; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X86-NEXT: movl 12(%ebp), %ecx
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: sarl $31, %eax
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $5, %al
; X86-NEXT: movzbl %al, %edx
; X86-NEXT: movl 24(%esp,%edx,4), %esi
; X86-NEXT: movl 16(%esp,%edx,4), %eax
; X86-NEXT: movl 20(%esp,%edx,4), %edi
; X86-NEXT: movl %edi, %edx
; X86-NEXT: shrdl %cl, %esi, %edx
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: shrdl %cl, %edi, %eax
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl
%a0 = load i256, ptr %p0
%b = ashr i256 %a0, %a1
%r = trunc i256 %b to i64
ret i64 %r
}
define i64 @lshr_extract_idx_load_i256_i64(ptr %p0, i256 %a1) nounwind {
; CHECK-LABEL: lshr_extract_idx_load_i256_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: andl $3, %esi
; CHECK-NEXT: movq (%rdi,%rsi,8), %rax
; CHECK-NEXT: retq
;
; X86-LABEL: lshr_extract_idx_load_i256_i64:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: andl $3, %edx
; X86-NEXT: movl (%ecx,%edx,8), %eax
; X86-NEXT: movl 4(%ecx,%edx,8), %edx
; X86-NEXT: retl
%a0 = load i256, ptr %p0
%m1 = mul i256 %a1, 64
%b = lshr i256 %a0, %m1
%r = trunc i256 %b to i64
ret i64 %r
}
define i64 @ashr_extract_idx_load_i256_i64(ptr %p0, i256 %a1) nounwind {
; SSE-LABEL: ashr_extract_idx_load_i256_i64:
; SSE: # %bb.0:
; SSE-NEXT: movaps (%rdi), %xmm0
; SSE-NEXT: movq 16(%rdi), %rax
; SSE-NEXT: movq 24(%rdi), %rcx
; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: sarq $63, %rcx
; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; SSE-NEXT: andb $3, %sil
; SSE-NEXT: movzbl %sil, %eax
; SSE-NEXT: movq -72(%rsp,%rax,8), %rax
; SSE-NEXT: retq
;
; AVX2-LABEL: ashr_extract_idx_load_i256_i64:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovaps (%rdi), %xmm0
; AVX2-NEXT: movq 16(%rdi), %rax
; AVX2-NEXT: movq 24(%rdi), %rcx
; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: sarq $63, %rcx
; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX2-NEXT: andb $3, %sil
; AVX2-NEXT: movzbl %sil, %eax
; AVX2-NEXT: movq -72(%rsp,%rax,8), %rax
; AVX2-NEXT: retq
;
; AVX512-LABEL: ashr_extract_idx_load_i256_i64:
; AVX512: # %bb.0:
; AVX512-NEXT: vmovaps (%rdi), %xmm0
; AVX512-NEXT: movq 16(%rdi), %rax
; AVX512-NEXT: movq 24(%rdi), %rcx
; AVX512-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: sarq $63, %rcx
; AVX512-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: movq %rcx, -{{[0-9]+}}(%rsp)
; AVX512-NEXT: andb $3, %sil
; AVX512-NEXT: movzbl %sil, %eax
; AVX512-NEXT: movq -72(%rsp,%rax,8), %rax
; AVX512-NEXT: retq
;
; X86-LABEL: ashr_extract_idx_load_i256_i64:
; X86: # %bb.0:
; X86-NEXT: pushl %ebp
; X86-NEXT: movl %esp, %ebp
; X86-NEXT: pushl %ebx
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $96, %esp
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: movl (%eax), %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 4(%eax), %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 8(%eax), %esi
; X86-NEXT: movl 12(%eax), %edi
; X86-NEXT: movl 16(%eax), %ebx
; X86-NEXT: movl 20(%eax), %edx
; X86-NEXT: movl 24(%eax), %ecx
; X86-NEXT: movl 28(%eax), %eax
; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp)
; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; X86-NEXT: movzbl 12(%ebp), %ecx
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: sarl $31, %eax
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: andl $3, %ecx
; X86-NEXT: movl 16(%esp,%ecx,8), %eax
; X86-NEXT: movl 20(%esp,%ecx,8), %edx
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
; X86-NEXT: popl %ebp
; X86-NEXT: retl
%a0 = load i256, ptr %p0
%m1 = mul i256 %a1, 64
%b = ashr i256 %a0, %m1
%r = trunc i256 %b to i64
ret i64 %r
}