| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,CHECK-NOBMI,CHECK-NOBMI-SSE2 |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2 | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-BMI2-SSE2 |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2,+avx | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-AVX,CHECK-AVX12,CHECK-AVX1 |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2,+avx2 | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-AVX,CHECK-AVX12,CHECK-AVX2 |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2,+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-AVX,CHECK-AVX512 |
| |
| define i3 @sign_i3(i3 %a) { |
| ; CHECK-LABEL: sign_i3: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: andb $4, %dil |
| ; CHECK-NEXT: xorl %eax, %eax |
| ; CHECK-NEXT: negb %dil |
| ; CHECK-NEXT: sbbl %eax, %eax |
| ; CHECK-NEXT: orb $1, %al |
| ; CHECK-NEXT: # kill: def $al killed $al killed $eax |
| ; CHECK-NEXT: retq |
| %c = icmp sgt i3 %a, -1 |
| %res = select i1 %c, i3 1, i3 -1 |
| ret i3 %res |
| } |
| |
| define i4 @sign_i4(i4 %a) { |
| ; CHECK-LABEL: sign_i4: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: andb $8, %dil |
| ; CHECK-NEXT: xorl %eax, %eax |
| ; CHECK-NEXT: negb %dil |
| ; CHECK-NEXT: sbbl %eax, %eax |
| ; CHECK-NEXT: orb $1, %al |
| ; CHECK-NEXT: # kill: def $al killed $al killed $eax |
| ; CHECK-NEXT: retq |
| %c = icmp sgt i4 %a, -1 |
| %res = select i1 %c, i4 1, i4 -1 |
| ret i4 %res |
| } |
| |
| define i8 @sign_i8(i8 %a) { |
| ; CHECK-LABEL: sign_i8: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movl %edi, %eax |
| ; CHECK-NEXT: sarb $7, %al |
| ; CHECK-NEXT: orb $1, %al |
| ; CHECK-NEXT: # kill: def $al killed $al killed $eax |
| ; CHECK-NEXT: retq |
| %c = icmp sgt i8 %a, -1 |
| %res = select i1 %c, i8 1, i8 -1 |
| ret i8 %res |
| } |
| |
| define i16 @sign_i16(i16 %a) { |
| ; CHECK-LABEL: sign_i16: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movswl %di, %eax |
| ; CHECK-NEXT: sarl $15, %eax |
| ; CHECK-NEXT: orl $1, %eax |
| ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax |
| ; CHECK-NEXT: retq |
| %c = icmp sgt i16 %a, -1 |
| %res = select i1 %c, i16 1, i16 -1 |
| ret i16 %res |
| } |
| |
| define i32 @sign_i32(i32 %a) { |
| ; CHECK-LABEL: sign_i32: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movl %edi, %eax |
| ; CHECK-NEXT: sarl $31, %eax |
| ; CHECK-NEXT: orl $1, %eax |
| ; CHECK-NEXT: retq |
| %c = icmp sgt i32 %a, -1 |
| %res = select i1 %c, i32 1, i32 -1 |
| ret i32 %res |
| } |
| |
| define i64 @sign_i64(i64 %a) { |
| ; CHECK-LABEL: sign_i64: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movq %rdi, %rax |
| ; CHECK-NEXT: sarq $63, %rax |
| ; CHECK-NEXT: orq $1, %rax |
| ; CHECK-NEXT: retq |
| %c = icmp sgt i64 %a, -1 |
| %res = select i1 %c, i64 1, i64 -1 |
| ret i64 %res |
| } |
| |
| |
| define i64 @not_sign_i64(i64 %a) { |
| ; CHECK-LABEL: not_sign_i64: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: xorl %eax, %eax |
| ; CHECK-NEXT: testq %rdi, %rdi |
| ; CHECK-NEXT: setg %al |
| ; CHECK-NEXT: leaq -1(%rax,%rax), %rax |
| ; CHECK-NEXT: retq |
| %c = icmp sgt i64 %a, 0 |
| %res = select i1 %c, i64 1, i64 -1 |
| ret i64 %res |
| } |
| |
| define i64 @not_sign_i64_sgt_neg1_zero_or_neg1(i64 %a) { |
| ; CHECK-LABEL: not_sign_i64_sgt_neg1_zero_or_neg1: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movq %rdi, %rax |
| ; CHECK-NEXT: sarq $63, %rax |
| ; CHECK-NEXT: retq |
| %c = icmp sgt i64 %a, -1 |
| %res = select i1 %c, i64 0, i64 -1 |
| ret i64 %res |
| } |
| |
| define i64 @not_sign_i64_sgt_neg1_one_or_zero(i64 %a) { |
| ; CHECK-LABEL: not_sign_i64_sgt_neg1_one_or_zero: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movq %rdi, %rax |
| ; CHECK-NEXT: notq %rax |
| ; CHECK-NEXT: shrq $63, %rax |
| ; CHECK-NEXT: retq |
| %c = icmp sgt i64 %a, -1 |
| %res = select i1 %c, i64 1, i64 0 |
| ret i64 %res |
| } |
| |
| define i64 @not_sign_i64_ugt_neg1(i64 %a) { |
| ; CHECK-LABEL: not_sign_i64_ugt_neg1: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: movq $-1, %rax |
| ; CHECK-NEXT: retq |
| %c = icmp ugt i64 %a, -1 |
| %res = select i1 %c, i64 1, i64 -1 |
| ret i64 %res |
| } |
| |
| define <7 x i8> @sign_7xi8(<7 x i8> %a) { |
| ; CHECK-NOBMI-LABEL: sign_7xi8: |
| ; CHECK-NOBMI: # %bb.0: |
| ; CHECK-NOBMI-NEXT: movq %rdi, %rax |
| ; CHECK-NOBMI-NEXT: movl {{[0-9]+}}(%rsp), %edi |
| ; CHECK-NOBMI-NEXT: shll $8, %edi |
| ; CHECK-NOBMI-NEXT: movzbl %r9b, %r9d |
| ; CHECK-NOBMI-NEXT: orl %edi, %r9d |
| ; CHECK-NOBMI-NEXT: movzbl %sil, %esi |
| ; CHECK-NOBMI-NEXT: movzbl %dl, %edx |
| ; CHECK-NOBMI-NEXT: shll $8, %edx |
| ; CHECK-NOBMI-NEXT: orl %esi, %edx |
| ; CHECK-NOBMI-NEXT: movzbl %cl, %ecx |
| ; CHECK-NOBMI-NEXT: shll $16, %ecx |
| ; CHECK-NOBMI-NEXT: orl %edx, %ecx |
| ; CHECK-NOBMI-NEXT: shll $24, %r8d |
| ; CHECK-NOBMI-NEXT: orl %ecx, %r8d |
| ; CHECK-NOBMI-NEXT: movd %r8d, %xmm0 |
| ; CHECK-NOBMI-NEXT: pinsrw $2, %r9d, %xmm0 |
| ; CHECK-NOBMI-NEXT: pinsrw $3, {{[0-9]+}}(%rsp), %xmm0 |
| ; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm1 |
| ; CHECK-NOBMI-NEXT: pcmpgtb %xmm0, %xmm1 |
| ; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| ; CHECK-NOBMI-NEXT: movd %xmm1, (%rax) |
| ; CHECK-NOBMI-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) |
| ; CHECK-NOBMI-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx |
| ; CHECK-NOBMI-NEXT: movb %cl, 6(%rax) |
| ; CHECK-NOBMI-NEXT: pextrw $2, %xmm1, %ecx |
| ; CHECK-NOBMI-NEXT: movw %cx, 4(%rax) |
| ; CHECK-NOBMI-NEXT: retq |
| ; |
| ; CHECK-BMI2-SSE2-LABEL: sign_7xi8: |
| ; CHECK-BMI2-SSE2: # %bb.0: |
| ; CHECK-BMI2-SSE2-NEXT: movq %rdi, %rax |
| ; CHECK-BMI2-SSE2-NEXT: movl {{[0-9]+}}(%rsp), %edi |
| ; CHECK-BMI2-SSE2-NEXT: shll $8, %edi |
| ; CHECK-BMI2-SSE2-NEXT: movzbl %r9b, %r9d |
| ; CHECK-BMI2-SSE2-NEXT: orl %edi, %r9d |
| ; CHECK-BMI2-SSE2-NEXT: movzbl %sil, %esi |
| ; CHECK-BMI2-SSE2-NEXT: movzbl %dl, %edx |
| ; CHECK-BMI2-SSE2-NEXT: shll $8, %edx |
| ; CHECK-BMI2-SSE2-NEXT: orl %esi, %edx |
| ; CHECK-BMI2-SSE2-NEXT: movzbl %cl, %ecx |
| ; CHECK-BMI2-SSE2-NEXT: shll $16, %ecx |
| ; CHECK-BMI2-SSE2-NEXT: orl %edx, %ecx |
| ; CHECK-BMI2-SSE2-NEXT: shll $24, %r8d |
| ; CHECK-BMI2-SSE2-NEXT: orl %ecx, %r8d |
| ; CHECK-BMI2-SSE2-NEXT: movd %r8d, %xmm0 |
| ; CHECK-BMI2-SSE2-NEXT: pinsrw $2, %r9d, %xmm0 |
| ; CHECK-BMI2-SSE2-NEXT: pinsrw $3, {{[0-9]+}}(%rsp), %xmm0 |
| ; CHECK-BMI2-SSE2-NEXT: pxor %xmm1, %xmm1 |
| ; CHECK-BMI2-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 |
| ; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| ; CHECK-BMI2-SSE2-NEXT: movd %xmm1, (%rax) |
| ; CHECK-BMI2-SSE2-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) |
| ; CHECK-BMI2-SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx |
| ; CHECK-BMI2-SSE2-NEXT: movb %cl, 6(%rax) |
| ; CHECK-BMI2-SSE2-NEXT: pextrw $2, %xmm1, %ecx |
| ; CHECK-BMI2-SSE2-NEXT: movw %cx, 4(%rax) |
| ; CHECK-BMI2-SSE2-NEXT: retq |
| ; |
| ; CHECK-AVX12-LABEL: sign_7xi8: |
| ; CHECK-AVX12: # %bb.0: |
| ; CHECK-AVX12-NEXT: movq %rdi, %rax |
| ; CHECK-AVX12-NEXT: vmovd %esi, %xmm0 |
| ; CHECK-AVX12-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 |
| ; CHECK-AVX12-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 |
| ; CHECK-AVX12-NEXT: vpinsrb $3, %r8d, %xmm0, %xmm0 |
| ; CHECK-AVX12-NEXT: vpinsrb $4, %r9d, %xmm0, %xmm0 |
| ; CHECK-AVX12-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0 |
| ; CHECK-AVX12-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0 |
| ; CHECK-AVX12-NEXT: vpxor %xmm1, %xmm1, %xmm1 |
| ; CHECK-AVX12-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 |
| ; CHECK-AVX12-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; CHECK-AVX12-NEXT: vpextrb $6, %xmm0, 6(%rdi) |
| ; CHECK-AVX12-NEXT: vpextrw $2, %xmm0, 4(%rdi) |
| ; CHECK-AVX12-NEXT: vmovd %xmm0, (%rdi) |
| ; CHECK-AVX12-NEXT: retq |
| ; |
| ; CHECK-AVX512-LABEL: sign_7xi8: |
| ; CHECK-AVX512: # %bb.0: |
| ; CHECK-AVX512-NEXT: movq %rdi, %rax |
| ; CHECK-AVX512-NEXT: vmovd %esi, %xmm0 |
| ; CHECK-AVX512-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0 |
| ; CHECK-AVX512-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 |
| ; CHECK-AVX512-NEXT: vpinsrb $3, %r8d, %xmm0, %xmm0 |
| ; CHECK-AVX512-NEXT: vpinsrb $4, %r9d, %xmm0, %xmm0 |
| ; CHECK-AVX512-NEXT: vpinsrb $5, {{[0-9]+}}(%rsp), %xmm0, %xmm0 |
| ; CHECK-AVX512-NEXT: vpinsrb $6, {{[0-9]+}}(%rsp), %xmm0, %xmm0 |
| ; CHECK-AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 |
| ; CHECK-AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 |
| ; CHECK-AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 |
| ; CHECK-AVX512-NEXT: vpextrb $6, %xmm0, 6(%rdi) |
| ; CHECK-AVX512-NEXT: vpextrw $2, %xmm0, 4(%rdi) |
| ; CHECK-AVX512-NEXT: vmovd %xmm0, (%rdi) |
| ; CHECK-AVX512-NEXT: retq |
| %c = icmp sgt <7 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> |
| %res = select <7 x i1> %c, <7 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <7 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> |
| ret <7 x i8> %res |
| } |
| |
| define <8 x i8> @sign_8xi8(<8 x i8> %a) { |
| ; CHECK-NOBMI-LABEL: sign_8xi8: |
| ; CHECK-NOBMI: # %bb.0: |
| ; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm1 |
| ; CHECK-NOBMI-NEXT: pcmpgtb %xmm0, %xmm1 |
| ; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| ; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm0 |
| ; CHECK-NOBMI-NEXT: retq |
| ; |
| ; CHECK-BMI2-SSE2-LABEL: sign_8xi8: |
| ; CHECK-BMI2-SSE2: # %bb.0: |
| ; CHECK-BMI2-SSE2-NEXT: pxor %xmm1, %xmm1 |
| ; CHECK-BMI2-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 |
| ; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| ; CHECK-BMI2-SSE2-NEXT: movdqa %xmm1, %xmm0 |
| ; CHECK-BMI2-SSE2-NEXT: retq |
| ; |
| ; CHECK-AVX12-LABEL: sign_8xi8: |
| ; CHECK-AVX12: # %bb.0: |
| ; CHECK-AVX12-NEXT: vpxor %xmm1, %xmm1, %xmm1 |
| ; CHECK-AVX12-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 |
| ; CHECK-AVX12-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; CHECK-AVX12-NEXT: retq |
| ; |
| ; CHECK-AVX512-LABEL: sign_8xi8: |
| ; CHECK-AVX512: # %bb.0: |
| ; CHECK-AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 |
| ; CHECK-AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 |
| ; CHECK-AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 |
| ; CHECK-AVX512-NEXT: retq |
| %c = icmp sgt <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> |
| %res = select <8 x i1> %c, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> |
| ret <8 x i8> %res |
| } |
| |
| define <16 x i8> @sign_16xi8(<16 x i8> %a) { |
| ; CHECK-NOBMI-LABEL: sign_16xi8: |
| ; CHECK-NOBMI: # %bb.0: |
| ; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm1 |
| ; CHECK-NOBMI-NEXT: pcmpgtb %xmm0, %xmm1 |
| ; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| ; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm0 |
| ; CHECK-NOBMI-NEXT: retq |
| ; |
| ; CHECK-BMI2-SSE2-LABEL: sign_16xi8: |
| ; CHECK-BMI2-SSE2: # %bb.0: |
| ; CHECK-BMI2-SSE2-NEXT: pxor %xmm1, %xmm1 |
| ; CHECK-BMI2-SSE2-NEXT: pcmpgtb %xmm0, %xmm1 |
| ; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| ; CHECK-BMI2-SSE2-NEXT: movdqa %xmm1, %xmm0 |
| ; CHECK-BMI2-SSE2-NEXT: retq |
| ; |
| ; CHECK-AVX12-LABEL: sign_16xi8: |
| ; CHECK-AVX12: # %bb.0: |
| ; CHECK-AVX12-NEXT: vpxor %xmm1, %xmm1, %xmm1 |
| ; CHECK-AVX12-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 |
| ; CHECK-AVX12-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; CHECK-AVX12-NEXT: retq |
| ; |
| ; CHECK-AVX512-LABEL: sign_16xi8: |
| ; CHECK-AVX512: # %bb.0: |
| ; CHECK-AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 |
| ; CHECK-AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 |
| ; CHECK-AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 |
| ; CHECK-AVX512-NEXT: retq |
| %c = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> |
| %res = select <16 x i1> %c, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> |
| ret <16 x i8> %res |
| } |
| |
| define <3 x i32> @sign_3xi32(<3 x i32> %a) { |
| ; CHECK-NOBMI-LABEL: sign_3xi32: |
| ; CHECK-NOBMI: # %bb.0: |
| ; CHECK-NOBMI-NEXT: psrad $31, %xmm0 |
| ; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; CHECK-NOBMI-NEXT: retq |
| ; |
| ; CHECK-BMI2-SSE2-LABEL: sign_3xi32: |
| ; CHECK-BMI2-SSE2: # %bb.0: |
| ; CHECK-BMI2-SSE2-NEXT: psrad $31, %xmm0 |
| ; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; CHECK-BMI2-SSE2-NEXT: retq |
| ; |
| ; CHECK-AVX1-LABEL: sign_3xi32: |
| ; CHECK-AVX1: # %bb.0: |
| ; CHECK-AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 |
| ; CHECK-AVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; CHECK-AVX1-NEXT: retq |
| ; |
| ; CHECK-AVX2-LABEL: sign_3xi32: |
| ; CHECK-AVX2: # %bb.0: |
| ; CHECK-AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 |
| ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] |
| ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 |
| ; CHECK-AVX2-NEXT: retq |
| ; |
| ; CHECK-AVX512-LABEL: sign_3xi32: |
| ; CHECK-AVX512: # %bb.0: |
| ; CHECK-AVX512-NEXT: vpsrad $31, %xmm0, %xmm0 |
| ; CHECK-AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 |
| ; CHECK-AVX512-NEXT: retq |
| %c = icmp sgt <3 x i32> %a, <i32 -1, i32 -1, i32 -1> |
| %res = select <3 x i1> %c, <3 x i32> <i32 1, i32 1, i32 1>, <3 x i32> <i32 -1, i32 -1, i32 -1> |
| ret <3 x i32> %res |
| } |
| |
| define <4 x i32> @sign_4xi32(<4 x i32> %a) { |
| ; CHECK-NOBMI-LABEL: sign_4xi32: |
| ; CHECK-NOBMI: # %bb.0: |
| ; CHECK-NOBMI-NEXT: psrad $31, %xmm0 |
| ; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; CHECK-NOBMI-NEXT: retq |
| ; |
| ; CHECK-BMI2-SSE2-LABEL: sign_4xi32: |
| ; CHECK-BMI2-SSE2: # %bb.0: |
| ; CHECK-BMI2-SSE2-NEXT: psrad $31, %xmm0 |
| ; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; CHECK-BMI2-SSE2-NEXT: retq |
| ; |
| ; CHECK-AVX1-LABEL: sign_4xi32: |
| ; CHECK-AVX1: # %bb.0: |
| ; CHECK-AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 |
| ; CHECK-AVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; CHECK-AVX1-NEXT: retq |
| ; |
| ; CHECK-AVX2-LABEL: sign_4xi32: |
| ; CHECK-AVX2: # %bb.0: |
| ; CHECK-AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 |
| ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] |
| ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 |
| ; CHECK-AVX2-NEXT: retq |
| ; |
| ; CHECK-AVX512-LABEL: sign_4xi32: |
| ; CHECK-AVX512: # %bb.0: |
| ; CHECK-AVX512-NEXT: vpsrad $31, %xmm0, %xmm0 |
| ; CHECK-AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 |
| ; CHECK-AVX512-NEXT: retq |
| %c = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1> |
| %res = select <4 x i1> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> |
| ret <4 x i32> %res |
| } |
| |
| define <4 x i32> @sign_4xi32_multi_use(<4 x i32> %a) { |
| ; CHECK-NOBMI-LABEL: sign_4xi32_multi_use: |
| ; CHECK-NOBMI: # %bb.0: |
| ; CHECK-NOBMI-NEXT: subq $24, %rsp |
| ; CHECK-NOBMI-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm1 |
| ; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm2 |
| ; CHECK-NOBMI-NEXT: psrad $31, %xmm2 |
| ; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 |
| ; CHECK-NOBMI-NEXT: movdqa %xmm2, (%rsp) # 16-byte Spill |
| ; CHECK-NOBMI-NEXT: pcmpgtd %xmm1, %xmm0 |
| ; CHECK-NOBMI-NEXT: callq use_4xi1@PLT |
| ; CHECK-NOBMI-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload |
| ; CHECK-NOBMI-NEXT: addq $24, %rsp |
| ; CHECK-NOBMI-NEXT: .cfi_def_cfa_offset 8 |
| ; CHECK-NOBMI-NEXT: retq |
| ; |
| ; CHECK-BMI2-SSE2-LABEL: sign_4xi32_multi_use: |
| ; CHECK-BMI2-SSE2: # %bb.0: |
| ; CHECK-BMI2-SSE2-NEXT: subq $24, %rsp |
| ; CHECK-BMI2-SSE2-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 |
| ; CHECK-BMI2-SSE2-NEXT: movdqa %xmm0, %xmm2 |
| ; CHECK-BMI2-SSE2-NEXT: psrad $31, %xmm2 |
| ; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 |
| ; CHECK-BMI2-SSE2-NEXT: movdqa %xmm2, (%rsp) # 16-byte Spill |
| ; CHECK-BMI2-SSE2-NEXT: pcmpgtd %xmm1, %xmm0 |
| ; CHECK-BMI2-SSE2-NEXT: callq use_4xi1@PLT |
| ; CHECK-BMI2-SSE2-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload |
| ; CHECK-BMI2-SSE2-NEXT: addq $24, %rsp |
| ; CHECK-BMI2-SSE2-NEXT: .cfi_def_cfa_offset 8 |
| ; CHECK-BMI2-SSE2-NEXT: retq |
| ; |
| ; CHECK-AVX1-LABEL: sign_4xi32_multi_use: |
| ; CHECK-AVX1: # %bb.0: |
| ; CHECK-AVX1-NEXT: subq $24, %rsp |
| ; CHECK-AVX1-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 |
| ; CHECK-AVX1-NEXT: vpsrad $31, %xmm0, %xmm2 |
| ; CHECK-AVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 |
| ; CHECK-AVX1-NEXT: vmovdqa %xmm2, (%rsp) # 16-byte Spill |
| ; CHECK-AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 |
| ; CHECK-AVX1-NEXT: callq use_4xi1@PLT |
| ; CHECK-AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload |
| ; CHECK-AVX1-NEXT: addq $24, %rsp |
| ; CHECK-AVX1-NEXT: .cfi_def_cfa_offset 8 |
| ; CHECK-AVX1-NEXT: retq |
| ; |
| ; CHECK-AVX2-LABEL: sign_4xi32_multi_use: |
| ; CHECK-AVX2: # %bb.0: |
| ; CHECK-AVX2-NEXT: subq $24, %rsp |
| ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 |
| ; CHECK-AVX2-NEXT: vpsrad $31, %xmm0, %xmm2 |
| ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [1,1,1,1] |
| ; CHECK-AVX2-NEXT: vpor %xmm3, %xmm2, %xmm2 |
| ; CHECK-AVX2-NEXT: vmovdqa %xmm2, (%rsp) # 16-byte Spill |
| ; CHECK-AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 |
| ; CHECK-AVX2-NEXT: callq use_4xi1@PLT |
| ; CHECK-AVX2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload |
| ; CHECK-AVX2-NEXT: addq $24, %rsp |
| ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8 |
| ; CHECK-AVX2-NEXT: retq |
| ; |
| ; CHECK-AVX512-LABEL: sign_4xi32_multi_use: |
| ; CHECK-AVX512: # %bb.0: |
| ; CHECK-AVX512-NEXT: subq $24, %rsp |
| ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 32 |
| ; CHECK-AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 |
| ; CHECK-AVX512-NEXT: vpsrad $31, %xmm0, %xmm2 |
| ; CHECK-AVX512-NEXT: vpord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm2, %xmm2 |
| ; CHECK-AVX512-NEXT: vmovdqa %xmm2, (%rsp) # 16-byte Spill |
| ; CHECK-AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 |
| ; CHECK-AVX512-NEXT: callq use_4xi1@PLT |
| ; CHECK-AVX512-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload |
| ; CHECK-AVX512-NEXT: addq $24, %rsp |
| ; CHECK-AVX512-NEXT: .cfi_def_cfa_offset 8 |
| ; CHECK-AVX512-NEXT: retq |
| %c = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1> |
| %res = select <4 x i1> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> |
| call void @use_4xi1(<4 x i1> %c) |
| ret <4 x i32> %res |
| } |
| |
| ; Second icmp operand breaks sign pattern. |
| define <4 x i32> @not_sign_4xi32(<4 x i32> %a) { |
| ; CHECK-NOBMI-LABEL: not_sign_4xi32: |
| ; CHECK-NOBMI: # %bb.0: |
| ; CHECK-NOBMI-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm1 |
| ; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm0 |
| ; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; CHECK-NOBMI-NEXT: retq |
| ; |
| ; CHECK-BMI2-SSE2-LABEL: not_sign_4xi32: |
| ; CHECK-BMI2-SSE2: # %bb.0: |
| ; CHECK-BMI2-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 |
| ; CHECK-BMI2-SSE2-NEXT: pxor %xmm1, %xmm0 |
| ; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; CHECK-BMI2-SSE2-NEXT: retq |
| ; |
| ; CHECK-AVX1-LABEL: not_sign_4xi32: |
| ; CHECK-AVX1: # %bb.0: |
| ; CHECK-AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 |
| ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 |
| ; CHECK-AVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; CHECK-AVX1-NEXT: retq |
| ; |
| ; CHECK-AVX2-LABEL: not_sign_4xi32: |
| ; CHECK-AVX2: # %bb.0: |
| ; CHECK-AVX2-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 |
| ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 |
| ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] |
| ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 |
| ; CHECK-AVX2-NEXT: retq |
| ; |
| ; CHECK-AVX512-LABEL: not_sign_4xi32: |
| ; CHECK-AVX512: # %bb.0: |
| ; CHECK-AVX512-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1 |
| ; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 |
| ; CHECK-AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 {%k1} = [1,1,1,1] |
| ; CHECK-AVX512-NEXT: retq |
| %c = icmp sgt <4 x i32> %a, <i32 1, i32 -1, i32 -1, i32 -1> |
| %res = select <4 x i1> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> |
| ret <4 x i32> %res |
| } |
| |
| ; First select operand breaks sign pattern. |
| define <4 x i32> @not_sign_4xi32_select_true_breaks_pattern(<4 x i32> %a) { |
| ; CHECK-NOBMI-LABEL: not_sign_4xi32_select_true_breaks_pattern: |
| ; CHECK-NOBMI: # %bb.0: |
| ; CHECK-NOBMI-NEXT: psrad $31, %xmm0 |
| ; CHECK-NOBMI-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; CHECK-NOBMI-NEXT: retq |
| ; |
| ; CHECK-BMI2-SSE2-LABEL: not_sign_4xi32_select_true_breaks_pattern: |
| ; CHECK-BMI2-SSE2: # %bb.0: |
| ; CHECK-BMI2-SSE2-NEXT: psrad $31, %xmm0 |
| ; CHECK-BMI2-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 |
| ; CHECK-BMI2-SSE2-NEXT: retq |
| ; |
| ; CHECK-AVX-LABEL: not_sign_4xi32_select_true_breaks_pattern: |
| ; CHECK-AVX: # %bb.0: |
| ; CHECK-AVX-NEXT: vpsrad $31, %xmm0, %xmm0 |
| ; CHECK-AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 |
| ; CHECK-AVX-NEXT: retq |
| %c = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1> |
| %res = select <4 x i1> %c, <4 x i32> <i32 1, i32 1, i32 -1, i32 1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> |
| ret <4 x i32> %res |
| } |
| |
| ; Second select operand breaks sign pattern. |
| define <4 x i32> @not_sign_4xi32_select_false_breaks_pattern(<4 x i32> %a) { |
| ; CHECK-NOBMI-LABEL: not_sign_4xi32_select_false_breaks_pattern: |
| ; CHECK-NOBMI: # %bb.0: |
| ; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm1 |
| ; CHECK-NOBMI-NEXT: pcmpgtd %xmm1, %xmm0 |
| ; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm1 |
| ; CHECK-NOBMI-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| ; CHECK-NOBMI-NEXT: psubd %xmm0, %xmm1 |
| ; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm0 |
| ; CHECK-NOBMI-NEXT: retq |
| ; |
| ; CHECK-BMI2-SSE2-LABEL: not_sign_4xi32_select_false_breaks_pattern: |
| ; CHECK-BMI2-SSE2: # %bb.0: |
| ; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 |
| ; CHECK-BMI2-SSE2-NEXT: pcmpgtd %xmm1, %xmm0 |
| ; CHECK-BMI2-SSE2-NEXT: movdqa %xmm0, %xmm1 |
| ; CHECK-BMI2-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 |
| ; CHECK-BMI2-SSE2-NEXT: psubd %xmm0, %xmm1 |
| ; CHECK-BMI2-SSE2-NEXT: movdqa %xmm1, %xmm0 |
| ; CHECK-BMI2-SSE2-NEXT: retq |
| ; |
| ; CHECK-AVX12-LABEL: not_sign_4xi32_select_false_breaks_pattern: |
| ; CHECK-AVX12: # %bb.0: |
| ; CHECK-AVX12-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] |
| ; CHECK-AVX12-NEXT: vblendvps %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 |
| ; CHECK-AVX12-NEXT: retq |
| ; |
| ; CHECK-AVX512-LABEL: not_sign_4xi32_select_false_breaks_pattern: |
| ; CHECK-AVX512: # %bb.0: |
| ; CHECK-AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 |
| ; CHECK-AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k1 |
| ; CHECK-AVX512-NEXT: vpmovsxbd {{.*#+}} xmm0 = [4294967295,4294967295,4294967295,1] |
| ; CHECK-AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 {%k1} = [1,1,1,1] |
| ; CHECK-AVX512-NEXT: retq |
| %c = icmp sgt <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1> |
| %res = select <4 x i1> %c, <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 1> |
| ret <4 x i32> %res |
| } |
| |
| ; i65 is not legal. |
| define <4 x i65> @sign_4xi65(<4 x i65> %a) { |
| ; CHECK-NOBMI-LABEL: sign_4xi65: |
| ; CHECK-NOBMI: # %bb.0: |
| ; CHECK-NOBMI-NEXT: movq %rdi, %rax |
| ; CHECK-NOBMI-NEXT: movq {{[0-9]+}}(%rsp), %rcx |
| ; CHECK-NOBMI-NEXT: andl $1, %ecx |
| ; CHECK-NOBMI-NEXT: movq %rcx, %rsi |
| ; CHECK-NOBMI-NEXT: negq %rsi |
| ; CHECK-NOBMI-NEXT: movq {{[0-9]+}}(%rsp), %rdi |
| ; CHECK-NOBMI-NEXT: andl $1, %edi |
| ; CHECK-NOBMI-NEXT: movq %rdi, %r10 |
| ; CHECK-NOBMI-NEXT: negq %r10 |
| ; CHECK-NOBMI-NEXT: andl $1, %edx |
| ; CHECK-NOBMI-NEXT: negq %rdx |
| ; CHECK-NOBMI-NEXT: andl $1, %r8d |
| ; CHECK-NOBMI-NEXT: negq %r8 |
| ; CHECK-NOBMI-NEXT: leaq (%r8,%r8), %r9 |
| ; CHECK-NOBMI-NEXT: movq %rdx, %xmm0 |
| ; CHECK-NOBMI-NEXT: orq $1, %rdx |
| ; CHECK-NOBMI-NEXT: movq %rdx, (%rax) |
| ; CHECK-NOBMI-NEXT: andl $15, %r10d |
| ; CHECK-NOBMI-NEXT: movb %r10b, 32(%rax) |
| ; CHECK-NOBMI-NEXT: movl %esi, %r10d |
| ; CHECK-NOBMI-NEXT: andl $1, %r10d |
| ; CHECK-NOBMI-NEXT: shldq $2, %rsi, %r10 |
| ; CHECK-NOBMI-NEXT: shll $3, %edi |
| ; CHECK-NOBMI-NEXT: subq %rdi, %r10 |
| ; CHECK-NOBMI-NEXT: orq $8, %r10 |
| ; CHECK-NOBMI-NEXT: movq %r10, 24(%rax) |
| ; CHECK-NOBMI-NEXT: movl %r8d, %esi |
| ; CHECK-NOBMI-NEXT: andl $1, %esi |
| ; CHECK-NOBMI-NEXT: shldq $1, %r8, %rsi |
| ; CHECK-NOBMI-NEXT: shll $2, %ecx |
| ; CHECK-NOBMI-NEXT: subq %rcx, %rsi |
| ; CHECK-NOBMI-NEXT: orq $4, %rsi |
| ; CHECK-NOBMI-NEXT: movq %rsi, 16(%rax) |
| ; CHECK-NOBMI-NEXT: movq %rdx, %xmm1 |
| ; CHECK-NOBMI-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] |
| ; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] |
| ; CHECK-NOBMI-NEXT: movq %xmm0, %rcx |
| ; CHECK-NOBMI-NEXT: orq $2, %r9 |
| ; CHECK-NOBMI-NEXT: subq %rcx, %r9 |
| ; CHECK-NOBMI-NEXT: movq %r9, 8(%rax) |
| ; CHECK-NOBMI-NEXT: retq |
| ; |
| ; CHECK-BMI2-SSE2-LABEL: sign_4xi65: |
| ; CHECK-BMI2-SSE2: # %bb.0: |
| ; CHECK-BMI2-SSE2-NEXT: movq %rdi, %rax |
| ; CHECK-BMI2-SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rcx |
| ; CHECK-BMI2-SSE2-NEXT: andl $1, %ecx |
| ; CHECK-BMI2-SSE2-NEXT: movq %rcx, %rsi |
| ; CHECK-BMI2-SSE2-NEXT: negq %rsi |
| ; CHECK-BMI2-SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rdi |
| ; CHECK-BMI2-SSE2-NEXT: andl $1, %edi |
| ; CHECK-BMI2-SSE2-NEXT: movq %rdi, %r10 |
| ; CHECK-BMI2-SSE2-NEXT: negq %r10 |
| ; CHECK-BMI2-SSE2-NEXT: andl $1, %edx |
| ; CHECK-BMI2-SSE2-NEXT: negq %rdx |
| ; CHECK-BMI2-SSE2-NEXT: andl $1, %r8d |
| ; CHECK-BMI2-SSE2-NEXT: negq %r8 |
| ; CHECK-BMI2-SSE2-NEXT: leaq (%r8,%r8), %r9 |
| ; CHECK-BMI2-SSE2-NEXT: movq %rdx, %xmm0 |
| ; CHECK-BMI2-SSE2-NEXT: orq $1, %rdx |
| ; CHECK-BMI2-SSE2-NEXT: movq %rdx, (%rax) |
| ; CHECK-BMI2-SSE2-NEXT: andl $15, %r10d |
| ; CHECK-BMI2-SSE2-NEXT: movb %r10b, 32(%rax) |
| ; CHECK-BMI2-SSE2-NEXT: movl %esi, %r10d |
| ; CHECK-BMI2-SSE2-NEXT: andl $1, %r10d |
| ; CHECK-BMI2-SSE2-NEXT: shldq $2, %rsi, %r10 |
| ; CHECK-BMI2-SSE2-NEXT: shll $3, %edi |
| ; CHECK-BMI2-SSE2-NEXT: subq %rdi, %r10 |
| ; CHECK-BMI2-SSE2-NEXT: orq $8, %r10 |
| ; CHECK-BMI2-SSE2-NEXT: movq %r10, 24(%rax) |
| ; CHECK-BMI2-SSE2-NEXT: movl %r8d, %esi |
| ; CHECK-BMI2-SSE2-NEXT: andl $1, %esi |
| ; CHECK-BMI2-SSE2-NEXT: shldq $1, %r8, %rsi |
| ; CHECK-BMI2-SSE2-NEXT: shll $2, %ecx |
| ; CHECK-BMI2-SSE2-NEXT: subq %rcx, %rsi |
| ; CHECK-BMI2-SSE2-NEXT: orq $4, %rsi |
| ; CHECK-BMI2-SSE2-NEXT: movq %rsi, 16(%rax) |
| ; CHECK-BMI2-SSE2-NEXT: movq %rdx, %xmm1 |
| ; CHECK-BMI2-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] |
| ; CHECK-BMI2-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] |
| ; CHECK-BMI2-SSE2-NEXT: movq %xmm0, %rcx |
| ; CHECK-BMI2-SSE2-NEXT: orq $2, %r9 |
| ; CHECK-BMI2-SSE2-NEXT: subq %rcx, %r9 |
| ; CHECK-BMI2-SSE2-NEXT: movq %r9, 8(%rax) |
| ; CHECK-BMI2-SSE2-NEXT: retq |
| ; |
| ; CHECK-AVX12-LABEL: sign_4xi65: |
| ; CHECK-AVX12: # %bb.0: |
| ; CHECK-AVX12-NEXT: movq %rdi, %rax |
| ; CHECK-AVX12-NEXT: andl $1, %r8d |
| ; CHECK-AVX12-NEXT: movq %r8, %rsi |
| ; CHECK-AVX12-NEXT: negq %rsi |
| ; CHECK-AVX12-NEXT: movq {{[0-9]+}}(%rsp), %rcx |
| ; CHECK-AVX12-NEXT: andl $1, %ecx |
| ; CHECK-AVX12-NEXT: movq %rcx, %r9 |
| ; CHECK-AVX12-NEXT: negq %r9 |
| ; CHECK-AVX12-NEXT: movq {{[0-9]+}}(%rsp), %rdi |
| ; CHECK-AVX12-NEXT: andl $1, %edi |
| ; CHECK-AVX12-NEXT: movq %rdi, %r10 |
| ; CHECK-AVX12-NEXT: negq %r10 |
| ; CHECK-AVX12-NEXT: andl $1, %edx |
| ; CHECK-AVX12-NEXT: movq %rdx, %r11 |
| ; CHECK-AVX12-NEXT: negq %r11 |
| ; CHECK-AVX12-NEXT: orq $1, %r11 |
| ; CHECK-AVX12-NEXT: movq %r11, (%rax) |
| ; CHECK-AVX12-NEXT: andl $15, %r10d |
| ; CHECK-AVX12-NEXT: movb %r10b, 32(%rax) |
| ; CHECK-AVX12-NEXT: addl %r8d, %r8d |
| ; CHECK-AVX12-NEXT: subq %r8, %rdx |
| ; CHECK-AVX12-NEXT: orq $2, %rdx |
| ; CHECK-AVX12-NEXT: movq %rdx, 8(%rax) |
| ; CHECK-AVX12-NEXT: movl %r9d, %edx |
| ; CHECK-AVX12-NEXT: andl $1, %edx |
| ; CHECK-AVX12-NEXT: shldq $2, %r9, %rdx |
| ; CHECK-AVX12-NEXT: shll $3, %edi |
| ; CHECK-AVX12-NEXT: subq %rdi, %rdx |
| ; CHECK-AVX12-NEXT: orq $8, %rdx |
| ; CHECK-AVX12-NEXT: movq %rdx, 24(%rax) |
| ; CHECK-AVX12-NEXT: movl %esi, %edx |
| ; CHECK-AVX12-NEXT: andl $1, %edx |
| ; CHECK-AVX12-NEXT: shldq $1, %rsi, %rdx |
| ; CHECK-AVX12-NEXT: shll $2, %ecx |
| ; CHECK-AVX12-NEXT: subq %rcx, %rdx |
| ; CHECK-AVX12-NEXT: orq $4, %rdx |
| ; CHECK-AVX12-NEXT: movq %rdx, 16(%rax) |
| ; CHECK-AVX12-NEXT: retq |
| ; |
| ; CHECK-AVX512-LABEL: sign_4xi65: |
| ; CHECK-AVX512: # %bb.0: |
| ; CHECK-AVX512-NEXT: movq %rdi, %rax |
| ; CHECK-AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rcx |
| ; CHECK-AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rsi |
| ; CHECK-AVX512-NEXT: andl $1, %r8d |
| ; CHECK-AVX512-NEXT: andl $1, %edx |
| ; CHECK-AVX512-NEXT: andl $1, %esi |
| ; CHECK-AVX512-NEXT: andl $1, %ecx |
| ; CHECK-AVX512-NEXT: xorl %edi, %edi |
| ; CHECK-AVX512-NEXT: cmpq $1, %rcx |
| ; CHECK-AVX512-NEXT: movl $0, %ecx |
| ; CHECK-AVX512-NEXT: sbbl %ecx, %ecx |
| ; CHECK-AVX512-NEXT: kmovw %ecx, %k0 |
| ; CHECK-AVX512-NEXT: cmpq $1, %rsi |
| ; CHECK-AVX512-NEXT: movl $0, %ecx |
| ; CHECK-AVX512-NEXT: sbbl %ecx, %ecx |
| ; CHECK-AVX512-NEXT: kmovw %ecx, %k1 |
| ; CHECK-AVX512-NEXT: cmpq $1, %rdx |
| ; CHECK-AVX512-NEXT: movl $0, %ecx |
| ; CHECK-AVX512-NEXT: sbbl %ecx, %ecx |
| ; CHECK-AVX512-NEXT: kmovw %ecx, %k2 |
| ; CHECK-AVX512-NEXT: cmpq $1, %r8 |
| ; CHECK-AVX512-NEXT: sbbl %edi, %edi |
| ; CHECK-AVX512-NEXT: kmovw %edi, %k3 |
| ; CHECK-AVX512-NEXT: kshiftlw $4, %k3, %k3 |
| ; CHECK-AVX512-NEXT: kshiftlw $12, %k2, %k2 |
| ; CHECK-AVX512-NEXT: kshiftrw $12, %k2, %k2 |
| ; CHECK-AVX512-NEXT: korw %k3, %k2, %k2 |
| ; CHECK-AVX512-NEXT: kshiftlw $4, %k1, %k1 |
| ; CHECK-AVX512-NEXT: kshiftlw $12, %k0, %k0 |
| ; CHECK-AVX512-NEXT: kshiftrw $12, %k0, %k0 |
| ; CHECK-AVX512-NEXT: korw %k1, %k0, %k0 |
| ; CHECK-AVX512-NEXT: kunpckbw %k2, %k0, %k1 |
| ; CHECK-AVX512-NEXT: vpmovsxbq {{.*#+}} zmm0 = [18446744073709551615,1,18446744073709551615,1,18446744073709551615,1,18446744073709551615,1] |
| ; CHECK-AVX512-NEXT: vmovdqa32 {{.*#+}} zmm0 {%k1} = [1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0] |
| ; CHECK-AVX512-NEXT: vmovq %xmm0, (%rax) |
| ; CHECK-AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm1 |
| ; CHECK-AVX512-NEXT: vmovq %xmm1, %rcx |
| ; CHECK-AVX512-NEXT: vpextrq $1, %xmm1, %rdx |
| ; CHECK-AVX512-NEXT: shldq $2, %rcx, %rdx |
| ; CHECK-AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm1 |
| ; CHECK-AVX512-NEXT: vmovq %xmm1, %rsi |
| ; CHECK-AVX512-NEXT: leaq (%rdx,%rsi,8), %rdx |
| ; CHECK-AVX512-NEXT: movq %rdx, 24(%rax) |
| ; CHECK-AVX512-NEXT: vpextrq $1, %xmm0, %rdx |
| ; CHECK-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 |
| ; CHECK-AVX512-NEXT: vmovq %xmm0, %rdi |
| ; CHECK-AVX512-NEXT: leaq (%rdx,%rdi,2), %rdx |
| ; CHECK-AVX512-NEXT: movq %rdx, 8(%rax) |
| ; CHECK-AVX512-NEXT: vpextrq $1, %xmm0, %rdx |
| ; CHECK-AVX512-NEXT: shldq $1, %rdi, %rdx |
| ; CHECK-AVX512-NEXT: leaq (%rdx,%rcx,4), %rcx |
| ; CHECK-AVX512-NEXT: movq %rcx, 16(%rax) |
| ; CHECK-AVX512-NEXT: vpextrq $1, %xmm1, %rcx |
| ; CHECK-AVX512-NEXT: shldq $3, %rsi, %rcx |
| ; CHECK-AVX512-NEXT: andl $15, %ecx |
| ; CHECK-AVX512-NEXT: movb %cl, 32(%rax) |
| ; CHECK-AVX512-NEXT: vzeroupper |
| ; CHECK-AVX512-NEXT: retq |
| %c = icmp sgt <4 x i65> %a, <i65 -1, i65 -1, i65 -1, i65 -1> |
| %res = select <4 x i1> %c, <4 x i65> <i65 1, i65 1, i65 1, i65 1>, <4 x i65 > <i65 -1, i65 -1, i65 -1, i65 -1> |
| ret <4 x i65> %res |
| } |
| |
| define i32 @or_neg(i32 %x, i32 %y) { |
| ; CHECK-LABEL: or_neg: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: orl $1, %edi |
| ; CHECK-NEXT: negl %edi |
| ; CHECK-NEXT: xorl %eax, %eax |
| ; CHECK-NEXT: cmpl %esi, %edi |
| ; CHECK-NEXT: setg %al |
| ; CHECK-NEXT: retq |
| %3 = or i32 %x, 1 |
| %4 = sub i32 0, %3 |
| %5 = icmp sgt i32 %4, %y |
| %6 = zext i1 %5 to i32 |
| ret i32 %6 |
| } |
| |
| define i32 @or_neg_ugt(i32 %x, i32 %y) { |
| ; CHECK-LABEL: or_neg_ugt: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: orl $1, %edi |
| ; CHECK-NEXT: negl %edi |
| ; CHECK-NEXT: xorl %eax, %eax |
| ; CHECK-NEXT: cmpl %esi, %edi |
| ; CHECK-NEXT: seta %al |
| ; CHECK-NEXT: retq |
| %3 = or i32 %x, 1 |
| %4 = sub i32 0, %3 |
| %5 = icmp ugt i32 %4, %y |
| %6 = zext i1 %5 to i32 |
| ret i32 %6 |
| } |
| |
| ; Negative test |
| |
| define i32 @or_neg_no_smin(i32 %x, i32 %y) { |
| ; CHECK-LABEL: or_neg_no_smin: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: negl %edi |
| ; CHECK-NEXT: xorl %eax, %eax |
| ; CHECK-NEXT: cmpl %esi, %edi |
| ; CHECK-NEXT: setg %al |
| ; CHECK-NEXT: retq |
| %4 = sub i32 0, %x |
| %5 = icmp sgt i32 %4, %y |
| %6 = zext i1 %5 to i32 |
| ret i32 %6 |
| } |
| |
| ; Negative test |
| |
| define i32 @or_neg_ult_no_zero(i32 %x, i32 %y) { |
| ; CHECK-LABEL: or_neg_ult_no_zero: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: negl %edi |
| ; CHECK-NEXT: xorl %eax, %eax |
| ; CHECK-NEXT: cmpl %esi, %edi |
| ; CHECK-NEXT: setb %al |
| ; CHECK-NEXT: retq |
| %4 = sub i32 0, %x |
| %5 = icmp ult i32 %4, %y |
| %6 = zext i1 %5 to i32 |
| ret i32 %6 |
| } |
| |
| define i32 @or_neg_no_smin_but_zero(i32 %x, i32 %y) { |
| ; CHECK-LABEL: or_neg_no_smin_but_zero: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: xorl %ecx, %ecx |
| ; CHECK-NEXT: testl %edi, %edi |
| ; CHECK-NEXT: cmovgl %edi, %ecx |
| ; CHECK-NEXT: negl %ecx |
| ; CHECK-NEXT: xorl %eax, %eax |
| ; CHECK-NEXT: cmpl %esi, %ecx |
| ; CHECK-NEXT: setg %al |
| ; CHECK-NEXT: retq |
| %3 = call i32 @llvm.smax.i32(i32 %x, i32 0) |
| %4 = sub i32 0, %3 |
| %5 = icmp sgt i32 %4, %y |
| %6 = zext i1 %5 to i32 |
| ret i32 %6 |
| } |
| |
| define i32 @or_neg_slt_zero_but_no_smin(i32 %x, i32 %y) { |
| ; CHECK-LABEL: or_neg_slt_zero_but_no_smin: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: cmpl $9, %edi |
| ; CHECK-NEXT: movl $9, %ecx |
| ; CHECK-NEXT: cmovbl %edi, %ecx |
| ; CHECK-NEXT: negl %ecx |
| ; CHECK-NEXT: xorl %eax, %eax |
| ; CHECK-NEXT: cmpl %esi, %ecx |
| ; CHECK-NEXT: seta %al |
| ; CHECK-NEXT: retq |
| %3 = call i32 @llvm.umin.i32(i32 %x, i32 9) |
| %4 = sub i32 0, %3 |
| %5 = icmp ugt i32 %4, %y |
| %6 = zext i1 %5 to i32 |
| ret i32 %6 |
| } |
| |
| define i32 @or_neg_sge(i32 %x, i32 %y) { |
| ; CHECK-LABEL: or_neg_sge: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: orl $1, %edi |
| ; CHECK-NEXT: negl %edi |
| ; CHECK-NEXT: xorl %eax, %eax |
| ; CHECK-NEXT: cmpl %esi, %edi |
| ; CHECK-NEXT: setge %al |
| ; CHECK-NEXT: retq |
| %3 = or i32 %x, 1 |
| %4 = sub i32 0, %3 |
| %5 = icmp sge i32 %4, %y |
| %6 = zext i1 %5 to i32 |
| ret i32 %6 |
| } |
| |
| define i32 @or_neg_slt(i32 %x, i32 %y) { |
| ; CHECK-LABEL: or_neg_slt: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: orl $1, %edi |
| ; CHECK-NEXT: negl %edi |
| ; CHECK-NEXT: xorl %eax, %eax |
| ; CHECK-NEXT: cmpl %esi, %edi |
| ; CHECK-NEXT: setl %al |
| ; CHECK-NEXT: retq |
| %3 = or i32 %x, 1 |
| %4 = sub i32 0, %3 |
| %5 = icmp slt i32 %4, %y |
| %6 = zext i1 %5 to i32 |
| ret i32 %6 |
| } |
| |
| define i32 @or_neg_sle(i32 %x, i32 %y) { |
| ; CHECK-LABEL: or_neg_sle: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: orl $1, %edi |
| ; CHECK-NEXT: negl %edi |
| ; CHECK-NEXT: xorl %eax, %eax |
| ; CHECK-NEXT: cmpl %esi, %edi |
| ; CHECK-NEXT: setle %al |
| ; CHECK-NEXT: retq |
| %3 = or i32 %x, 1 |
| %4 = sub i32 0, %3 |
| %5 = icmp sle i32 %4, %y |
| %6 = zext i1 %5 to i32 |
| ret i32 %6 |
| } |
| |
| define i32 @or_neg_ult(i32 %x, i32 %y) { |
| ; CHECK-LABEL: or_neg_ult: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: orl $1, %edi |
| ; CHECK-NEXT: negl %edi |
| ; CHECK-NEXT: xorl %eax, %eax |
| ; CHECK-NEXT: cmpl %esi, %edi |
| ; CHECK-NEXT: seta %al |
| ; CHECK-NEXT: retq |
| %3 = or i32 %x, 1 |
| %4 = sub i32 0, %3 |
| %5 = icmp ugt i32 %4, %y |
| %6 = zext i1 %5 to i32 |
| ret i32 %6 |
| } |
| |
| define i32 @or_neg_no_smin_sge(i32 %x, i32 %y) { |
| ; CHECK-LABEL: or_neg_no_smin_sge: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: negl %edi |
| ; CHECK-NEXT: xorl %eax, %eax |
| ; CHECK-NEXT: cmpl %esi, %edi |
| ; CHECK-NEXT: setge %al |
| ; CHECK-NEXT: retq |
| %4 = sub i32 0, %x |
| %5 = icmp sge i32 %4, %y |
| %6 = zext i1 %5 to i32 |
| ret i32 %6 |
| } |
| |
| ; Negative test |
| |
| define i32 @or_neg_ult_no_zero_alt(i32 %x, i32 %y) { |
| ; CHECK-LABEL: or_neg_ult_no_zero_alt: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: negl %edi |
| ; CHECK-NEXT: xorl %eax, %eax |
| ; CHECK-NEXT: cmpl %esi, %edi |
| ; CHECK-NEXT: setb %al |
| ; CHECK-NEXT: retq |
| %4 = sub i32 0, %x |
| %5 = icmp ult i32 %4, %y |
| %6 = zext i1 %5 to i32 |
| ret i32 %6 |
| } |
| |
| define i32 @or_neg_no_smin_but_zero_sle(i32 %x, i32 %y) { |
| ; CHECK-LABEL: or_neg_no_smin_but_zero_sle: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: xorl %ecx, %ecx |
| ; CHECK-NEXT: testl %edi, %edi |
| ; CHECK-NEXT: cmovgl %edi, %ecx |
| ; CHECK-NEXT: negl %ecx |
| ; CHECK-NEXT: xorl %eax, %eax |
| ; CHECK-NEXT: cmpl %esi, %ecx |
| ; CHECK-NEXT: setle %al |
| ; CHECK-NEXT: retq |
| %3 = call i32 @llvm.smax.i32(i32 %x, i32 0) |
| %4 = sub i32 0, %3 |
| %5 = icmp sle i32 %4, %y |
| %6 = zext i1 %5 to i32 |
| ret i32 %6 |
| } |
| |
| define i32 @or_neg_slt_zero_but_no_smin_uge(i32 %x, i32 %y) { |
| ; CHECK-LABEL: or_neg_slt_zero_but_no_smin_uge: |
| ; CHECK: # %bb.0: |
| ; CHECK-NEXT: cmpl $9, %edi |
| ; CHECK-NEXT: movl $9, %ecx |
| ; CHECK-NEXT: cmovbl %edi, %ecx |
| ; CHECK-NEXT: negl %ecx |
| ; CHECK-NEXT: xorl %eax, %eax |
| ; CHECK-NEXT: cmpl %esi, %ecx |
| ; CHECK-NEXT: setae %al |
| ; CHECK-NEXT: retq |
| %3 = call i32 @llvm.umin.i32(i32 %x, i32 9) |
| %4 = sub i32 0, %3 |
| %5 = icmp uge i32 %4, %y |
| %6 = zext i1 %5 to i32 |
| ret i32 %6 |
| } |
| |
| declare i32 @llvm.smax.i32(i32, i32) |
| declare i32 @llvm.umax.i32(i32, i32) |
| declare void @use_4xi1(<4 x i1>) |
| ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: |
| ; CHECK-BMI2: {{.*}} |
| ; CHECK-NOBMI-SSE2: {{.*}} |