blob: be6f2a6d0519272342c071dd316993e91433a18b [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,CHECK-NOBMI,CHECK-NOBMI-SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2 | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-BMI2-SSE2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2,+avx | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-AVX,CHECK-AVX12,CHECK-AVX1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2,+avx2 | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-AVX,CHECK-AVX12,CHECK-AVX2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2,+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-AVX,CHECK-AVX512
define i3 @sign_i3(i3 %a) {
; CHECK-LABEL: sign_i3:
; CHECK: # %bb.0:
; CHECK-NEXT: andb $4, %dil
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: negb %dil
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: orb $1, %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%c = icmp sgt i3 %a, -1
%res = select i1 %c, i3 1, i3 -1
ret i3 %res
}
define i4 @sign_i4(i4 %a) {
; CHECK-LABEL: sign_i4:
; CHECK: # %bb.0:
; CHECK-NEXT: andb $8, %dil
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: negb %dil
; CHECK-NEXT: sbbl %eax, %eax
; CHECK-NEXT: orb $1, %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%c = icmp sgt i4 %a, -1
%res = select i1 %c, i4 1, i4 -1
ret i4 %res
}
define i8 @sign_i8(i8 %a) {
; CHECK-LABEL: sign_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: sarb $7, %al
; CHECK-NEXT: orb $1, %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq
%c = icmp sgt i8 %a, -1
%res = select i1 %c, i8 1, i8 -1
ret i8 %res
}
define i16 @sign_i16(i16 %a) {
; CHECK-LABEL: sign_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: movswl %di, %eax
; CHECK-NEXT: sarl $15, %eax
; CHECK-NEXT: orl $1, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%c = icmp sgt i16 %a, -1
%res = select i1 %c, i16 1, i16 -1
ret i16 %res
}
define i32 @sign_i32(i32 %a) {
; CHECK-LABEL: sign_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: sarl $31, %eax
; CHECK-NEXT: orl $1, %eax
; CHECK-NEXT: retq
%c = icmp sgt i32 %a, -1
%res = select i1 %c, i32 1, i32 -1
ret i32 %res
}
define i64 @sign_i64(i64 %a) {
; CHECK-LABEL: sign_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: sarq $63, %rax
; CHECK-NEXT: orq $1, %rax
; CHECK-NEXT: retq
%c = icmp sgt i64 %a, -1
%res = select i1 %c, i64 1, i64 -1
ret i64 %res
}
define i64 @not_sign_i64(i64 %a) {
; CHECK-LABEL: not_sign_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testq %rdi, %rdi
; CHECK-NEXT: setg %al
; CHECK-NEXT: leaq -1(%rax,%rax), %rax
; CHECK-NEXT: retq
%c = icmp sgt i64 %a, 0
%res = select i1 %c, i64 1, i64 -1
ret i64 %res
}
define i64 @not_sign_i64_sgt_neg1_zero_or_neg1(i64 %a) {
; CHECK-LABEL: not_sign_i64_sgt_neg1_zero_or_neg1:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: sarq $63, %rax
; CHECK-NEXT: retq
%c = icmp sgt i64 %a, -1
%res = select i1 %c, i64 0, i64 -1
ret i64 %res
}
define i64 @not_sign_i64_sgt_neg1_one_or_zero(i64 %a) {
; CHECK-LABEL: not_sign_i64_sgt_neg1_one_or_zero:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: notq %rax
; CHECK-NEXT: shrq $63, %rax
; CHECK-NEXT: retq
%c = icmp sgt i64 %a, -1
%res = select i1 %c, i64 1, i64 0
ret i64 %res
}
define i64 @not_sign_i64_ugt_neg1(i64 %a) {
; CHECK-LABEL: not_sign_i64_ugt_neg1:
; CHECK: # %bb.0:
; CHECK-NEXT: movq $-1, %rax
; CHECK-NEXT: retq
%c = icmp ugt i64 %a, -1
%res = select i1 %c, i64 1, i64 -1
ret i64 %res
}
define <7 x i8> @sign_7xi8(<7 x i8> %a) {
; CHECK-NOBMI-LABEL: sign_7xi8:
; CHECK-NOBMI: # %bb.0:
; CHECK-NOBMI-NEXT: movq %rdi, %rax
; CHECK-NOBMI-NEXT: movl {{[0-9]+}}(%rsp), %edi
; CHECK-NOBMI-NEXT: shll $8, %edi
; CHECK-NOBMI-NEXT: movzbl %r9b, %r9d
; CHECK-NOBMI-NEXT: orl %edi, %r9d
; CHECK-NOBMI-NEXT: movzbl %sil, %esi
; CHECK-NOBMI-NEXT: movzbl %dl, %edx
; CHECK-NOBMI-NEXT: shll $8, %edx
; CHECK-NOBMI-NEXT: orl %esi, %edx
; CHECK-NOBMI-NEXT: movzbl %cl, %ecx
; CHECK-NOBMI-NEXT: shll $16, %ecx
; CHECK-NOBMI-NEXT: orl %edx, %ecx
; CHECK-NOBMI-NEXT: shll $24, %r8d
; CHECK-NOBMI-NEXT: orl %ecx, %r8d
; CHECK-NOBMI-NEXT: movd %r8d, %xmm0
; CHECK-NOBMI-NEXT: pinsrw $2, %r9d, %xmm0
; CHECK-NOBMI-NEXT: pinsrw $3, {{[0-9]+}}(%rsp), %xmm0
; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm1
; CHECK-NOBMI-NEXT: pcmpgtb %xmm0, %xmm1
; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-NOBMI-NEXT: movd %xmm1, (%rax)
; CHECK-NOBMI-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
; CHECK-NOBMI-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; CHECK-NOBMI-NEXT: movb %cl, 6(%rax)
; CHECK-NOBMI-NEXT: pextrw $2, %xmm1, %ecx
; CHECK-NOBMI-NEXT: movw %cx, 4(%rax)
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI2-SSE2-LABEL: sign_7xi8:
; CHECK-BMI2-SSE2: # %bb.0:
; CHECK-BMI2-SSE2-NEXT: movq %rdi, %rax
; CHECK-BMI2-SSE2-NEXT: movl {{[0-9]+}}(%rsp), %edi
; CHECK-BMI2-SSE2-NEXT: shll $8, %edi
; CHECK-BMI2-SSE2-NEXT: movzbl %r9b, %r9d
; CHECK-BMI2-SSE2-NEXT: orl %edi, %r9d
; CHECK-BMI2-SSE2-NEXT: movzbl %sil, %esi
; CHECK-BMI2-SSE2-NEXT: movzbl %dl, %edx
; CHECK-BMI2-SSE2-NEXT: shll $8, %edx
; CHECK-BMI2-SSE2-NEXT: orl %esi, %edx
; CHECK-BMI2-SSE2-NEXT: movzbl %cl, %ecx
; CHECK-BMI2-SSE2-NEXT: shll $16, %ecx
; CHECK-BMI2-SSE2-NEXT: orl %edx, %ecx
; CHECK-BMI2-SSE2-NEXT: shll $24, %r8d
; CHECK-BMI2-SSE2-NEXT: orl %ecx, %r8d
; CHECK-BMI2-SSE2-NEXT: movd %r8d, %xmm0
; CHECK-BMI2-SSE2-NEXT: pinsrw $2, %r9d, %xmm0
; CHECK-BMI2-SSE2-NEXT: pinsrw $3, {{[0-9]+}}(%rsp), %xmm0
; CHECK-BMI2-SSE2-NEXT: pxor %xmm1, %xmm1
; CHECK-BMI2-SSE2-NEXT: pcmpgtb %xmm0, %xmm1
; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-BMI2-SSE2-NEXT: movd %xmm1, (%rax)
; CHECK-BMI2-SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp)
; CHECK-BMI2-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx
; CHECK-BMI2-SSE2-NEXT: movb %cl, 6(%rax)
; CHECK-BMI2-SSE2-NEXT: pextrw $2, %xmm1, %ecx
; CHECK-BMI2-SSE2-NEXT: movw %cx, 4(%rax)
; CHECK-BMI2-SSE2-NEXT: retq
;
; CHECK-AVX12-LABEL: sign_7xi8:
; CHECK-AVX12: # %bb.0:
; CHECK-AVX12-NEXT: movq %rdi, %rax
; CHECK-AVX12-NEXT: vmovd %esi, %xmm0
; CHECK-AVX12-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
; CHECK-AVX12-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; CHECK-AVX12-NEXT: vpinsrb $3, %r8d, %xmm0, %xmm0
; CHECK-AVX12-NEXT: vpinsrb $4, %r9d, %xmm0, %xmm0
; CHECK-AVX12-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0
; CHECK-AVX12-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0
; CHECK-AVX12-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX12-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; CHECK-AVX12-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX12-NEXT: vpextrb $6, %xmm0, 6(%rdi)
; CHECK-AVX12-NEXT: vpextrw $2, %xmm0, 4(%rdi)
; CHECK-AVX12-NEXT: vmovd %xmm0, (%rdi)
; CHECK-AVX12-NEXT: retq
;
; CHECK-AVX512-LABEL: sign_7xi8:
; CHECK-AVX512: # %bb.0:
; CHECK-AVX512-NEXT: movq %rdi, %rax
; CHECK-AVX512-NEXT: vmovd %esi, %xmm0
; CHECK-AVX512-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
; CHECK-AVX512-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; CHECK-AVX512-NEXT: vpinsrb $3, %r8d, %xmm0, %xmm0
; CHECK-AVX512-NEXT: vpinsrb $4, %r9d, %xmm0, %xmm0
; CHECK-AVX512-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0
; CHECK-AVX512-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0
; CHECK-AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; CHECK-AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-AVX512-NEXT: vpextrb $6, %xmm0, 6(%rdi)
; CHECK-AVX512-NEXT: vpextrw $2, %xmm0, 4(%rdi)
; CHECK-AVX512-NEXT: vmovd %xmm0, (%rdi)
; CHECK-AVX512-NEXT: retq
%c = icmp sgt <7 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%res = select <7 x i1> %c, <7 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <7 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
ret <7 x i8> %res
}
define <8 x i8> @sign_8xi8(<8 x i8> %a) {
; CHECK-NOBMI-LABEL: sign_8xi8:
; CHECK-NOBMI: # %bb.0:
; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm1
; CHECK-NOBMI-NEXT: pcmpgtb %xmm0, %xmm1
; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm0
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI2-SSE2-LABEL: sign_8xi8:
; CHECK-BMI2-SSE2: # %bb.0:
; CHECK-BMI2-SSE2-NEXT: pxor %xmm1, %xmm1
; CHECK-BMI2-SSE2-NEXT: pcmpgtb %xmm0, %xmm1
; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-BMI2-SSE2-NEXT: movdqa %xmm1, %xmm0
; CHECK-BMI2-SSE2-NEXT: retq
;
; CHECK-AVX12-LABEL: sign_8xi8:
; CHECK-AVX12: # %bb.0:
; CHECK-AVX12-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX12-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; CHECK-AVX12-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX12-NEXT: retq
;
; CHECK-AVX512-LABEL: sign_8xi8:
; CHECK-AVX512: # %bb.0:
; CHECK-AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; CHECK-AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-AVX512-NEXT: retq
%c = icmp sgt <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%res = select <8 x i1> %c, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
ret <8 x i8> %res
}
define <16 x i8> @sign_16xi8(<16 x i8> %a) {
; CHECK-NOBMI-LABEL: sign_16xi8:
; CHECK-NOBMI: # %bb.0:
; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm1
; CHECK-NOBMI-NEXT: pcmpgtb %xmm0, %xmm1
; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm0
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI2-SSE2-LABEL: sign_16xi8:
; CHECK-BMI2-SSE2: # %bb.0:
; CHECK-BMI2-SSE2-NEXT: pxor %xmm1, %xmm1
; CHECK-BMI2-SSE2-NEXT: pcmpgtb %xmm0, %xmm1
; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-BMI2-SSE2-NEXT: movdqa %xmm1, %xmm0
; CHECK-BMI2-SSE2-NEXT: retq
;
; CHECK-AVX12-LABEL: sign_16xi8:
; CHECK-AVX12: # %bb.0:
; CHECK-AVX12-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX12-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; CHECK-AVX12-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX12-NEXT: retq
;
; CHECK-AVX512-LABEL: sign_16xi8:
; CHECK-AVX512: # %bb.0:
; CHECK-AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1
; CHECK-AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; CHECK-AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-AVX512-NEXT: retq
%c = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
%res = select <16 x i1> %c, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
ret <16 x i8> %res
}
define <3 x i32> @sign_3xi32(<3 x i32> %a) {
; CHECK-NOBMI-LABEL: sign_3xi32:
; CHECK-NOBMI: # %bb.0:
; CHECK-NOBMI-NEXT: psrad $31, %xmm0
; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI2-SSE2-LABEL: sign_3xi32:
; CHECK-BMI2-SSE2: # %bb.0:
; CHECK-BMI2-SSE2-NEXT: psrad $31, %xmm0
; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-BMI2-SSE2-NEXT: retq
;
; CHECK-AVX1-LABEL: sign_3xi32:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: retq
;
; CHECK-AVX2-LABEL: sign_3xi32:
; CHECK-AVX2: # %bb.0:
; CHECK-AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-LABEL: sign_3xi32:
; CHECK-AVX512: # %bb.0:
; CHECK-AVX512-NEXT: vpsrad $31, %xmm0, %xmm0
; CHECK-AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-AVX512-NEXT: retq
%c = icmp sgt <3 x i32> %a, <i32 -1, i32 -1, i32 -1>
%res = select <3 x i1> %c, <3 x i32> <i32 1, i32 1, i32 1>, <3 x i32> <i32 -1, i32 -1, i32 -1>
ret <3 x i32> %res
}
define <4 x i32> @sign_4xi32(<4 x i32> %a) {
; CHECK-NOBMI-LABEL: sign_4xi32:
; CHECK-NOBMI: # %bb.0:
; CHECK-NOBMI-NEXT: psrad $31, %xmm0
; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI2-SSE2-LABEL: sign_4xi32:
; CHECK-BMI2-SSE2: # %bb.0:
; CHECK-BMI2-SSE2-NEXT: psrad $31, %xmm0
; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-BMI2-SSE2-NEXT: retq
;
; CHECK-AVX1-LABEL: sign_4xi32:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: retq
;
; CHECK-AVX2-LABEL: sign_4xi32:
; CHECK-AVX2: # %bb.0:
; CHECK-AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-LABEL: sign_4xi32:
; CHECK-AVX512: # %bb.0:
; CHECK-AVX512-NEXT: vpsrad $31, %xmm0, %xmm0
; CHECK-AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
; CHECK-AVX512-NEXT: retq
%c = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
%res = select <4 x i1> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
ret <4 x i32> %res
}
define <4 x i32> @sign_4xi32_multi_use(<4 x i32> %a) {
; CHECK-NOBMI-LABEL: sign_4xi32_multi_use:
; CHECK-NOBMI: # %bb.0:
; CHECK-NOBMI-NEXT: subq $24, %rsp
; CHECK-NOBMI-NEXT: .cfi_def_cfa_offset 32
; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm2
; CHECK-NOBMI-NEXT: psrad $31, %xmm2
; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; CHECK-NOBMI-NEXT: movdqa %xmm2, (%rsp) # 16-byte Spill
; CHECK-NOBMI-NEXT: pcmpgtd %xmm1, %xmm0
; CHECK-NOBMI-NEXT: callq use_4xi1@PLT
; CHECK-NOBMI-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-NOBMI-NEXT: addq $24, %rsp
; CHECK-NOBMI-NEXT: .cfi_def_cfa_offset 8
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI2-SSE2-LABEL: sign_4xi32_multi_use:
; CHECK-BMI2-SSE2: # %bb.0:
; CHECK-BMI2-SSE2-NEXT: subq $24, %rsp
; CHECK-BMI2-SSE2-NEXT: .cfi_def_cfa_offset 32
; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-BMI2-SSE2-NEXT: movdqa %xmm0, %xmm2
; CHECK-BMI2-SSE2-NEXT: psrad $31, %xmm2
; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; CHECK-BMI2-SSE2-NEXT: movdqa %xmm2, (%rsp) # 16-byte Spill
; CHECK-BMI2-SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; CHECK-BMI2-SSE2-NEXT: callq use_4xi1@PLT
; CHECK-BMI2-SSE2-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-BMI2-SSE2-NEXT: addq $24, %rsp
; CHECK-BMI2-SSE2-NEXT: .cfi_def_cfa_offset 8
; CHECK-BMI2-SSE2-NEXT: retq
;
; CHECK-AVX1-LABEL: sign_4xi32_multi_use:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: subq $24, %rsp
; CHECK-AVX1-NEXT: .cfi_def_cfa_offset 32
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpsrad $31, %xmm0, %xmm2
; CHECK-AVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; CHECK-AVX1-NEXT: vmovdqa %xmm2, (%rsp) # 16-byte Spill
; CHECK-AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: callq use_4xi1@PLT
; CHECK-AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-AVX1-NEXT: addq $24, %rsp
; CHECK-AVX1-NEXT: .cfi_def_cfa_offset 8
; CHECK-AVX1-NEXT: retq
;
; CHECK-AVX2-LABEL: sign_4xi32_multi_use:
; CHECK-AVX2: # %bb.0:
; CHECK-AVX2-NEXT: subq $24, %rsp
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 32
; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; CHECK-AVX2-NEXT: vpsrad $31, %xmm0, %xmm2
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [1,1,1,1]
; CHECK-AVX2-NEXT: vpor %xmm3, %xmm2, %xmm2
; CHECK-AVX2-NEXT: vmovdqa %xmm2, (%rsp) # 16-byte Spill
; CHECK-AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: callq use_4xi1@PLT
; CHECK-AVX2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-AVX2-NEXT: addq $24, %rsp
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-LABEL: sign_4xi32_multi_use:
; CHECK-AVX512: # %bb.0:
; CHECK-AVX512-NEXT: subq $24, %rsp
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 32
; CHECK-AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; CHECK-AVX512-NEXT: vpsrad $31, %xmm0, %xmm2
; CHECK-AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm2
; CHECK-AVX512-NEXT: vmovdqa %xmm2, (%rsp) # 16-byte Spill
; CHECK-AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; CHECK-AVX512-NEXT: callq use_4xi1@PLT
; CHECK-AVX512-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; CHECK-AVX512-NEXT: addq $24, %rsp
; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8
; CHECK-AVX512-NEXT: retq
%c = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
%res = select <4 x i1> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
call void @use_4xi1(<4 x i1> %c)
ret <4 x i32> %res
}
; Second icmp operand breaks sign pattern.
define <4 x i32> @not_sign_4xi32(<4 x i32> %a) {
; CHECK-NOBMI-LABEL: not_sign_4xi32:
; CHECK-NOBMI: # %bb.0:
; CHECK-NOBMI-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm0
; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI2-SSE2-LABEL: not_sign_4xi32:
; CHECK-BMI2-SSE2: # %bb.0:
; CHECK-BMI2-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-BMI2-SSE2-NEXT: pxor %xmm1, %xmm0
; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-BMI2-SSE2-NEXT: retq
;
; CHECK-AVX1-LABEL: not_sign_4xi32:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0
; CHECK-AVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX1-NEXT: retq
;
; CHECK-AVX2-LABEL: not_sign_4xi32:
; CHECK-AVX2: # %bb.0:
; CHECK-AVX2-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
; CHECK-AVX2-NEXT: retq
;
; CHECK-AVX512-LABEL: not_sign_4xi32:
; CHECK-AVX512: # %bb.0:
; CHECK-AVX512-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; CHECK-AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 {%k1} = [1,1,1,1]
; CHECK-AVX512-NEXT: retq
%c = icmp sgt <4 x i32> %a, <i32 1, i32 -1, i32 -1, i32 -1>
%res = select <4 x i1> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
ret <4 x i32> %res
}
; First select operand breaks sign pattern.
define <4 x i32> @not_sign_4xi32_select_true_breaks_pattern(<4 x i32> %a) {
; CHECK-NOBMI-LABEL: not_sign_4xi32_select_true_breaks_pattern:
; CHECK-NOBMI: # %bb.0:
; CHECK-NOBMI-NEXT: psrad $31, %xmm0
; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI2-SSE2-LABEL: not_sign_4xi32_select_true_breaks_pattern:
; CHECK-BMI2-SSE2: # %bb.0:
; CHECK-BMI2-SSE2-NEXT: psrad $31, %xmm0
; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-BMI2-SSE2-NEXT: retq
;
; CHECK-AVX-LABEL: not_sign_4xi32_select_true_breaks_pattern:
; CHECK-AVX: # %bb.0:
; CHECK-AVX-NEXT: vpsrad $31, %xmm0, %xmm0
; CHECK-AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-AVX-NEXT: retq
%c = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
%res = select <4 x i1> %c, <4 x i32> <i32 1, i32 1, i32 -1, i32 1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
ret <4 x i32> %res
}
; Second select operand breaks sign pattern.
define <4 x i32> @not_sign_4xi32_select_false_breaks_pattern(<4 x i32> %a) {
; CHECK-NOBMI-LABEL: not_sign_4xi32_select_false_breaks_pattern:
; CHECK-NOBMI: # %bb.0:
; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NOBMI-NEXT: pcmpgtd %xmm1, %xmm0
; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm1
; CHECK-NOBMI-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-NOBMI-NEXT: psubd %xmm0, %xmm1
; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm0
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI2-SSE2-LABEL: not_sign_4xi32_select_false_breaks_pattern:
; CHECK-BMI2-SSE2: # %bb.0:
; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-BMI2-SSE2-NEXT: pcmpgtd %xmm1, %xmm0
; CHECK-BMI2-SSE2-NEXT: movdqa %xmm0, %xmm1
; CHECK-BMI2-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; CHECK-BMI2-SSE2-NEXT: psubd %xmm0, %xmm1
; CHECK-BMI2-SSE2-NEXT: movdqa %xmm1, %xmm0
; CHECK-BMI2-SSE2-NEXT: retq
;
; CHECK-AVX12-LABEL: not_sign_4xi32_select_false_breaks_pattern:
; CHECK-AVX12: # %bb.0:
; CHECK-AVX12-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1]
; CHECK-AVX12-NEXT: vblendvps %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
; CHECK-AVX12-NEXT: retq
;
; CHECK-AVX512-LABEL: not_sign_4xi32_select_false_breaks_pattern:
; CHECK-AVX512: # %bb.0:
; CHECK-AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; CHECK-AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1
; CHECK-AVX512-NEXT: vpmovsxbd {{.*#+}} xmm0 = [4294967295,4294967295,4294967295,1]
; CHECK-AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 {%k1} = [1,1,1,1]
; CHECK-AVX512-NEXT: retq
%c = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
%res = select <4 x i1> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 1>
ret <4 x i32> %res
}
; i65 is not legal.
define <4 x i65> @sign_4xi65(<4 x i65> %a) {
; CHECK-NOBMI-LABEL: sign_4xi65:
; CHECK-NOBMI: # %bb.0:
; CHECK-NOBMI-NEXT: movq %rdi, %rax
; CHECK-NOBMI-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; CHECK-NOBMI-NEXT: andl $1, %ecx
; CHECK-NOBMI-NEXT: movq %rcx, %rsi
; CHECK-NOBMI-NEXT: negq %rsi
; CHECK-NOBMI-NEXT: movq {{[0-9]+}}(%rsp), %rdi
; CHECK-NOBMI-NEXT: andl $1, %edi
; CHECK-NOBMI-NEXT: movq %rdi, %r10
; CHECK-NOBMI-NEXT: negq %r10
; CHECK-NOBMI-NEXT: andl $1, %edx
; CHECK-NOBMI-NEXT: negq %rdx
; CHECK-NOBMI-NEXT: andl $1, %r8d
; CHECK-NOBMI-NEXT: negq %r8
; CHECK-NOBMI-NEXT: leaq (%r8,%r8), %r9
; CHECK-NOBMI-NEXT: movq %rdx, %xmm0
; CHECK-NOBMI-NEXT: orq $1, %rdx
; CHECK-NOBMI-NEXT: movq %rdx, (%rax)
; CHECK-NOBMI-NEXT: andl $15, %r10d
; CHECK-NOBMI-NEXT: movb %r10b, 32(%rax)
; CHECK-NOBMI-NEXT: movl %esi, %r10d
; CHECK-NOBMI-NEXT: andl $1, %r10d
; CHECK-NOBMI-NEXT: shldq $2, %rsi, %r10
; CHECK-NOBMI-NEXT: shll $3, %edi
; CHECK-NOBMI-NEXT: subq %rdi, %r10
; CHECK-NOBMI-NEXT: orq $8, %r10
; CHECK-NOBMI-NEXT: movq %r10, 24(%rax)
; CHECK-NOBMI-NEXT: movl %r8d, %esi
; CHECK-NOBMI-NEXT: andl $1, %esi
; CHECK-NOBMI-NEXT: shldq $1, %r8, %rsi
; CHECK-NOBMI-NEXT: shll $2, %ecx
; CHECK-NOBMI-NEXT: subq %rcx, %rsi
; CHECK-NOBMI-NEXT: orq $4, %rsi
; CHECK-NOBMI-NEXT: movq %rsi, 16(%rax)
; CHECK-NOBMI-NEXT: movq %rdx, %xmm1
; CHECK-NOBMI-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
; CHECK-NOBMI-NEXT: movq %xmm0, %rcx
; CHECK-NOBMI-NEXT: orq $2, %r9
; CHECK-NOBMI-NEXT: subq %rcx, %r9
; CHECK-NOBMI-NEXT: movq %r9, 8(%rax)
; CHECK-NOBMI-NEXT: retq
;
; CHECK-BMI2-SSE2-LABEL: sign_4xi65:
; CHECK-BMI2-SSE2: # %bb.0:
; CHECK-BMI2-SSE2-NEXT: movq %rdi, %rax
; CHECK-BMI2-SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; CHECK-BMI2-SSE2-NEXT: andl $1, %ecx
; CHECK-BMI2-SSE2-NEXT: movq %rcx, %rsi
; CHECK-BMI2-SSE2-NEXT: negq %rsi
; CHECK-BMI2-SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rdi
; CHECK-BMI2-SSE2-NEXT: andl $1, %edi
; CHECK-BMI2-SSE2-NEXT: movq %rdi, %r10
; CHECK-BMI2-SSE2-NEXT: negq %r10
; CHECK-BMI2-SSE2-NEXT: andl $1, %edx
; CHECK-BMI2-SSE2-NEXT: negq %rdx
; CHECK-BMI2-SSE2-NEXT: andl $1, %r8d
; CHECK-BMI2-SSE2-NEXT: negq %r8
; CHECK-BMI2-SSE2-NEXT: leaq (%r8,%r8), %r9
; CHECK-BMI2-SSE2-NEXT: movq %rdx, %xmm0
; CHECK-BMI2-SSE2-NEXT: orq $1, %rdx
; CHECK-BMI2-SSE2-NEXT: movq %rdx, (%rax)
; CHECK-BMI2-SSE2-NEXT: andl $15, %r10d
; CHECK-BMI2-SSE2-NEXT: movb %r10b, 32(%rax)
; CHECK-BMI2-SSE2-NEXT: movl %esi, %r10d
; CHECK-BMI2-SSE2-NEXT: andl $1, %r10d
; CHECK-BMI2-SSE2-NEXT: shldq $2, %rsi, %r10
; CHECK-BMI2-SSE2-NEXT: shll $3, %edi
; CHECK-BMI2-SSE2-NEXT: subq %rdi, %r10
; CHECK-BMI2-SSE2-NEXT: orq $8, %r10
; CHECK-BMI2-SSE2-NEXT: movq %r10, 24(%rax)
; CHECK-BMI2-SSE2-NEXT: movl %r8d, %esi
; CHECK-BMI2-SSE2-NEXT: andl $1, %esi
; CHECK-BMI2-SSE2-NEXT: shldq $1, %r8, %rsi
; CHECK-BMI2-SSE2-NEXT: shll $2, %ecx
; CHECK-BMI2-SSE2-NEXT: subq %rcx, %rsi
; CHECK-BMI2-SSE2-NEXT: orq $4, %rsi
; CHECK-BMI2-SSE2-NEXT: movq %rsi, 16(%rax)
; CHECK-BMI2-SSE2-NEXT: movq %rdx, %xmm1
; CHECK-BMI2-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; CHECK-BMI2-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
; CHECK-BMI2-SSE2-NEXT: movq %xmm0, %rcx
; CHECK-BMI2-SSE2-NEXT: orq $2, %r9
; CHECK-BMI2-SSE2-NEXT: subq %rcx, %r9
; CHECK-BMI2-SSE2-NEXT: movq %r9, 8(%rax)
; CHECK-BMI2-SSE2-NEXT: retq
;
; CHECK-AVX12-LABEL: sign_4xi65:
; CHECK-AVX12: # %bb.0:
; CHECK-AVX12-NEXT: movq %rdi, %rax
; CHECK-AVX12-NEXT: andl $1, %r8d
; CHECK-AVX12-NEXT: movq %r8, %rsi
; CHECK-AVX12-NEXT: negq %rsi
; CHECK-AVX12-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; CHECK-AVX12-NEXT: andl $1, %ecx
; CHECK-AVX12-NEXT: movq %rcx, %r9
; CHECK-AVX12-NEXT: negq %r9
; CHECK-AVX12-NEXT: movq {{[0-9]+}}(%rsp), %rdi
; CHECK-AVX12-NEXT: andl $1, %edi
; CHECK-AVX12-NEXT: movq %rdi, %r10
; CHECK-AVX12-NEXT: negq %r10
; CHECK-AVX12-NEXT: andl $1, %edx
; CHECK-AVX12-NEXT: movq %rdx, %r11
; CHECK-AVX12-NEXT: negq %r11
; CHECK-AVX12-NEXT: orq $1, %r11
; CHECK-AVX12-NEXT: movq %r11, (%rax)
; CHECK-AVX12-NEXT: andl $15, %r10d
; CHECK-AVX12-NEXT: movb %r10b, 32(%rax)
; CHECK-AVX12-NEXT: addl %r8d, %r8d
; CHECK-AVX12-NEXT: subq %r8, %rdx
; CHECK-AVX12-NEXT: orq $2, %rdx
; CHECK-AVX12-NEXT: movq %rdx, 8(%rax)
; CHECK-AVX12-NEXT: movl %r9d, %edx
; CHECK-AVX12-NEXT: andl $1, %edx
; CHECK-AVX12-NEXT: shldq $2, %r9, %rdx
; CHECK-AVX12-NEXT: shll $3, %edi
; CHECK-AVX12-NEXT: subq %rdi, %rdx
; CHECK-AVX12-NEXT: orq $8, %rdx
; CHECK-AVX12-NEXT: movq %rdx, 24(%rax)
; CHECK-AVX12-NEXT: movl %esi, %edx
; CHECK-AVX12-NEXT: andl $1, %edx
; CHECK-AVX12-NEXT: shldq $1, %rsi, %rdx
; CHECK-AVX12-NEXT: shll $2, %ecx
; CHECK-AVX12-NEXT: subq %rcx, %rdx
; CHECK-AVX12-NEXT: orq $4, %rdx
; CHECK-AVX12-NEXT: movq %rdx, 16(%rax)
; CHECK-AVX12-NEXT: retq
;
; CHECK-AVX512-LABEL: sign_4xi65:
; CHECK-AVX512: # %bb.0:
; CHECK-AVX512-NEXT: movq %rdi, %rax
; CHECK-AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; CHECK-AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; CHECK-AVX512-NEXT: andl $1, %r8d
; CHECK-AVX512-NEXT: andl $1, %edx
; CHECK-AVX512-NEXT: andl $1, %esi
; CHECK-AVX512-NEXT: andl $1, %ecx
; CHECK-AVX512-NEXT: xorl %edi, %edi
; CHECK-AVX512-NEXT: cmpq $1, %rcx
; CHECK-AVX512-NEXT: movl $0, %ecx
; CHECK-AVX512-NEXT: sbbl %ecx, %ecx
; CHECK-AVX512-NEXT: kmovw %ecx, %k0
; CHECK-AVX512-NEXT: cmpq $1, %rsi
; CHECK-AVX512-NEXT: movl $0, %ecx
; CHECK-AVX512-NEXT: sbbl %ecx, %ecx
; CHECK-AVX512-NEXT: kmovw %ecx, %k1
; CHECK-AVX512-NEXT: cmpq $1, %rdx
; CHECK-AVX512-NEXT: movl $0, %ecx
; CHECK-AVX512-NEXT: sbbl %ecx, %ecx
; CHECK-AVX512-NEXT: kmovw %ecx, %k2
; CHECK-AVX512-NEXT: cmpq $1, %r8
; CHECK-AVX512-NEXT: sbbl %edi, %edi
; CHECK-AVX512-NEXT: kmovw %edi, %k3
; CHECK-AVX512-NEXT: kshiftlw $4, %k3, %k3
; CHECK-AVX512-NEXT: kshiftlw $12, %k2, %k2
; CHECK-AVX512-NEXT: kshiftrw $12, %k2, %k2
; CHECK-AVX512-NEXT: korw %k3, %k2, %k2
; CHECK-AVX512-NEXT: kshiftlw $4, %k1, %k1
; CHECK-AVX512-NEXT: kshiftlw $12, %k0, %k0
; CHECK-AVX512-NEXT: kshiftrw $12, %k0, %k0
; CHECK-AVX512-NEXT: korw %k1, %k0, %k0
; CHECK-AVX512-NEXT: kunpckbw %k2, %k0, %k1
; CHECK-AVX512-NEXT: vpmovsxbq {{.*#+}} zmm0 = [18446744073709551615,1,18446744073709551615,1,18446744073709551615,1,18446744073709551615,1]
; CHECK-AVX512-NEXT: vmovdqa32 {{.*#+}} zmm0 {%k1} = [1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0]
; CHECK-AVX512-NEXT: vmovq %xmm0, (%rax)
; CHECK-AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm1
; CHECK-AVX512-NEXT: vmovq %xmm1, %rcx
; CHECK-AVX512-NEXT: vpextrq $1, %xmm1, %rdx
; CHECK-AVX512-NEXT: shldq $2, %rcx, %rdx
; CHECK-AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm1
; CHECK-AVX512-NEXT: vmovq %xmm1, %rsi
; CHECK-AVX512-NEXT: leaq (%rdx,%rsi,8), %rdx
; CHECK-AVX512-NEXT: movq %rdx, 24(%rax)
; CHECK-AVX512-NEXT: vpextrq $1, %xmm0, %rdx
; CHECK-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
; CHECK-AVX512-NEXT: vmovq %xmm0, %rdi
; CHECK-AVX512-NEXT: leaq (%rdx,%rdi,2), %rdx
; CHECK-AVX512-NEXT: movq %rdx, 8(%rax)
; CHECK-AVX512-NEXT: vpextrq $1, %xmm0, %rdx
; CHECK-AVX512-NEXT: shldq $1, %rdi, %rdx
; CHECK-AVX512-NEXT: leaq (%rdx,%rcx,4), %rcx
; CHECK-AVX512-NEXT: movq %rcx, 16(%rax)
; CHECK-AVX512-NEXT: vpextrq $1, %xmm1, %rcx
; CHECK-AVX512-NEXT: shldq $3, %rsi, %rcx
; CHECK-AVX512-NEXT: andl $15, %ecx
; CHECK-AVX512-NEXT: movb %cl, 32(%rax)
; CHECK-AVX512-NEXT: vzeroupper
; CHECK-AVX512-NEXT: retq
%c = icmp sgt <4 x i65> %a, <i65 -1, i65 -1, i65 -1, i65 -1>
%res = select <4 x i1> %c, <4 x i65> <i65 1, i65 1, i65 1, i65 1>, <4 x i65 > <i65 -1, i65 -1, i65 -1, i65 -1>
ret <4 x i65> %res
}
define i32 @or_neg(i32 %x, i32 %y) {
; CHECK-LABEL: or_neg:
; CHECK: # %bb.0:
; CHECK-NEXT: orl $1, %edi
; CHECK-NEXT: negl %edi
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: setg %al
; CHECK-NEXT: retq
%3 = or i32 %x, 1
%4 = sub i32 0, %3
%5 = icmp sgt i32 %4, %y
%6 = zext i1 %5 to i32
ret i32 %6
}
define i32 @or_neg_ugt(i32 %x, i32 %y) {
; CHECK-LABEL: or_neg_ugt:
; CHECK: # %bb.0:
; CHECK-NEXT: orl $1, %edi
; CHECK-NEXT: negl %edi
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: seta %al
; CHECK-NEXT: retq
%3 = or i32 %x, 1
%4 = sub i32 0, %3
%5 = icmp ugt i32 %4, %y
%6 = zext i1 %5 to i32
ret i32 %6
}
; Negative test
define i32 @or_neg_no_smin(i32 %x, i32 %y) {
; CHECK-LABEL: or_neg_no_smin:
; CHECK: # %bb.0:
; CHECK-NEXT: negl %edi
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: setg %al
; CHECK-NEXT: retq
%4 = sub i32 0, %x
%5 = icmp sgt i32 %4, %y
%6 = zext i1 %5 to i32
ret i32 %6
}
; Negative test
define i32 @or_neg_ult_no_zero(i32 %x, i32 %y) {
; CHECK-LABEL: or_neg_ult_no_zero:
; CHECK: # %bb.0:
; CHECK-NEXT: negl %edi
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: setb %al
; CHECK-NEXT: retq
%4 = sub i32 0, %x
%5 = icmp ult i32 %4, %y
%6 = zext i1 %5 to i32
ret i32 %6
}
define i32 @or_neg_no_smin_but_zero(i32 %x, i32 %y) {
; CHECK-LABEL: or_neg_no_smin_but_zero:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: cmovgl %edi, %ecx
; CHECK-NEXT: negl %ecx
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %esi, %ecx
; CHECK-NEXT: setg %al
; CHECK-NEXT: retq
%3 = call i32 @llvm.smax.i32(i32 %x, i32 0)
%4 = sub i32 0, %3
%5 = icmp sgt i32 %4, %y
%6 = zext i1 %5 to i32
ret i32 %6
}
define i32 @or_neg_slt_zero_but_no_smin(i32 %x, i32 %y) {
; CHECK-LABEL: or_neg_slt_zero_but_no_smin:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpl $9, %edi
; CHECK-NEXT: movl $9, %ecx
; CHECK-NEXT: cmovbl %edi, %ecx
; CHECK-NEXT: negl %ecx
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %esi, %ecx
; CHECK-NEXT: seta %al
; CHECK-NEXT: retq
%3 = call i32 @llvm.umin.i32(i32 %x, i32 9)
%4 = sub i32 0, %3
%5 = icmp ugt i32 %4, %y
%6 = zext i1 %5 to i32
ret i32 %6
}
define i32 @or_neg_sge(i32 %x, i32 %y) {
; CHECK-LABEL: or_neg_sge:
; CHECK: # %bb.0:
; CHECK-NEXT: orl $1, %edi
; CHECK-NEXT: negl %edi
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: setge %al
; CHECK-NEXT: retq
%3 = or i32 %x, 1
%4 = sub i32 0, %3
%5 = icmp sge i32 %4, %y
%6 = zext i1 %5 to i32
ret i32 %6
}
define i32 @or_neg_slt(i32 %x, i32 %y) {
; CHECK-LABEL: or_neg_slt:
; CHECK: # %bb.0:
; CHECK-NEXT: orl $1, %edi
; CHECK-NEXT: negl %edi
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: setl %al
; CHECK-NEXT: retq
%3 = or i32 %x, 1
%4 = sub i32 0, %3
%5 = icmp slt i32 %4, %y
%6 = zext i1 %5 to i32
ret i32 %6
}
define i32 @or_neg_sle(i32 %x, i32 %y) {
; CHECK-LABEL: or_neg_sle:
; CHECK: # %bb.0:
; CHECK-NEXT: orl $1, %edi
; CHECK-NEXT: negl %edi
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: setle %al
; CHECK-NEXT: retq
%3 = or i32 %x, 1
%4 = sub i32 0, %3
%5 = icmp sle i32 %4, %y
%6 = zext i1 %5 to i32
ret i32 %6
}
define i32 @or_neg_ult(i32 %x, i32 %y) {
; CHECK-LABEL: or_neg_ult:
; CHECK: # %bb.0:
; CHECK-NEXT: orl $1, %edi
; CHECK-NEXT: negl %edi
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: seta %al
; CHECK-NEXT: retq
%3 = or i32 %x, 1
%4 = sub i32 0, %3
%5 = icmp ugt i32 %4, %y
%6 = zext i1 %5 to i32
ret i32 %6
}
define i32 @or_neg_no_smin_sge(i32 %x, i32 %y) {
; CHECK-LABEL: or_neg_no_smin_sge:
; CHECK: # %bb.0:
; CHECK-NEXT: negl %edi
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: setge %al
; CHECK-NEXT: retq
%4 = sub i32 0, %x
%5 = icmp sge i32 %4, %y
%6 = zext i1 %5 to i32
ret i32 %6
}
; Negative test
define i32 @or_neg_ult_no_zero_alt(i32 %x, i32 %y) {
; CHECK-LABEL: or_neg_ult_no_zero_alt:
; CHECK: # %bb.0:
; CHECK-NEXT: negl %edi
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %esi, %edi
; CHECK-NEXT: setb %al
; CHECK-NEXT: retq
%4 = sub i32 0, %x
%5 = icmp ult i32 %4, %y
%6 = zext i1 %5 to i32
ret i32 %6
}
define i32 @or_neg_no_smin_but_zero_sle(i32 %x, i32 %y) {
; CHECK-LABEL: or_neg_no_smin_but_zero_sle:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: testl %edi, %edi
; CHECK-NEXT: cmovgl %edi, %ecx
; CHECK-NEXT: negl %ecx
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %esi, %ecx
; CHECK-NEXT: setle %al
; CHECK-NEXT: retq
%3 = call i32 @llvm.smax.i32(i32 %x, i32 0)
%4 = sub i32 0, %3
%5 = icmp sle i32 %4, %y
%6 = zext i1 %5 to i32
ret i32 %6
}
define i32 @or_neg_slt_zero_but_no_smin_uge(i32 %x, i32 %y) {
; CHECK-LABEL: or_neg_slt_zero_but_no_smin_uge:
; CHECK: # %bb.0:
; CHECK-NEXT: cmpl $9, %edi
; CHECK-NEXT: movl $9, %ecx
; CHECK-NEXT: cmovbl %edi, %ecx
; CHECK-NEXT: negl %ecx
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %esi, %ecx
; CHECK-NEXT: setae %al
; CHECK-NEXT: retq
%3 = call i32 @llvm.umin.i32(i32 %x, i32 9)
%4 = sub i32 0, %3
%5 = icmp uge i32 %4, %y
%6 = zext i1 %5 to i32
ret i32 %6
}
declare i32 @llvm.smax.i32(i32, i32)
declare i32 @llvm.umax.i32(i32, i32)
declare void @use_4xi1(<4 x i1>)
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-BMI2: {{.*}}
; CHECK-NOBMI-SSE2: {{.*}}