| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=i686-- < %s | FileCheck %s --check-prefix=X86 |
| ; RUN: llc -mtriple=x86_64-- < %s | FileCheck %s --check-prefix=X64 |
| |
| ; Optimize expanded SRL/SHL used as an input of |
| ; SETCC comparing it with zero by removing rotation. |
| ; |
| ; See https://bugs.llvm.org/show_bug.cgi?id=50197 |
| define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind { |
| ; X86-LABEL: opt_setcc_lt_power_of_2: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebp |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi |
| ; X86-NEXT: .p2align 4, 0x90 |
| ; X86-NEXT: .LBB0_1: # %loop |
| ; X86-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; X86-NEXT: addl $1, %edi |
| ; X86-NEXT: adcl $0, %esi |
| ; X86-NEXT: adcl $0, %edx |
| ; X86-NEXT: adcl $0, %ecx |
| ; X86-NEXT: movl %ecx, %ebx |
| ; X86-NEXT: shldl $4, %edx, %ebx |
| ; X86-NEXT: movl %esi, %ebp |
| ; X86-NEXT: orl %ecx, %ebp |
| ; X86-NEXT: shrdl $28, %edx, %ebp |
| ; X86-NEXT: orl %ebx, %ebp |
| ; X86-NEXT: jne .LBB0_1 |
| ; X86-NEXT: # %bb.2: # %exit |
| ; X86-NEXT: movl %edi, (%eax) |
| ; X86-NEXT: movl %esi, 4(%eax) |
| ; X86-NEXT: movl %edx, 8(%eax) |
| ; X86-NEXT: movl %ecx, 12(%eax) |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: popl %ebp |
| ; X86-NEXT: retl $4 |
| ; |
| ; X64-LABEL: opt_setcc_lt_power_of_2: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movq %rsi, %rdx |
| ; X64-NEXT: movq %rdi, %rax |
| ; X64-NEXT: .p2align 4, 0x90 |
| ; X64-NEXT: .LBB0_1: # %loop |
| ; X64-NEXT: # =>This Inner Loop Header: Depth=1 |
| ; X64-NEXT: addq $1, %rax |
| ; X64-NEXT: adcq $0, %rdx |
| ; X64-NEXT: movq %rax, %rcx |
| ; X64-NEXT: shrq $60, %rcx |
| ; X64-NEXT: orq %rdx, %rcx |
| ; X64-NEXT: jne .LBB0_1 |
| ; X64-NEXT: # %bb.2: # %exit |
| ; X64-NEXT: retq |
| br label %loop |
| |
| loop: |
| %phi.a = phi i128 [ %a, %0 ], [ %inc, %loop ] |
| %inc = add i128 %phi.a, 1 |
| %cmp = icmp ult i128 %inc, 1152921504606846976 |
| br i1 %cmp, label %exit, label %loop |
| |
| exit: |
| ret i128 %inc |
| } |
| |
| define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind { |
| ; X86-LABEL: opt_setcc_srl_eq_zero: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; X86-NEXT: shrdl $17, %ecx, %eax |
| ; X86-NEXT: orl %esi, %ecx |
| ; X86-NEXT: shldl $15, %edx, %esi |
| ; X86-NEXT: orl %esi, %eax |
| ; X86-NEXT: shrdl $17, %edx, %ecx |
| ; X86-NEXT: orl %eax, %ecx |
| ; X86-NEXT: sete %al |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: opt_setcc_srl_eq_zero: |
| ; X64: # %bb.0: |
| ; X64-NEXT: shrq $17, %rdi |
| ; X64-NEXT: orq %rsi, %rdi |
| ; X64-NEXT: sete %al |
| ; X64-NEXT: retq |
| %srl = lshr i128 %a, 17 |
| %cmp = icmp eq i128 %srl, 0 |
| ret i1 %cmp |
| } |
| |
| define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind { |
| ; X86-LABEL: opt_setcc_srl_ne_zero: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; X86-NEXT: shrdl $17, %ecx, %eax |
| ; X86-NEXT: orl %esi, %ecx |
| ; X86-NEXT: shldl $15, %edx, %esi |
| ; X86-NEXT: orl %esi, %eax |
| ; X86-NEXT: shrdl $17, %edx, %ecx |
| ; X86-NEXT: orl %eax, %ecx |
| ; X86-NEXT: setne %al |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: opt_setcc_srl_ne_zero: |
| ; X64: # %bb.0: |
| ; X64-NEXT: shrq $17, %rdi |
| ; X64-NEXT: orq %rsi, %rdi |
| ; X64-NEXT: setne %al |
| ; X64-NEXT: retq |
| %srl = lshr i128 %a, 17 |
| %cmp = icmp ne i128 %srl, 0 |
| ret i1 %cmp |
| } |
| |
| define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind { |
| ; X86-LABEL: opt_setcc_shl_eq_zero: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; X86-NEXT: shldl $17, %edx, %esi |
| ; X86-NEXT: orl %eax, %edx |
| ; X86-NEXT: shldl $17, %ecx, %edx |
| ; X86-NEXT: shldl $17, %eax, %ecx |
| ; X86-NEXT: orl %esi, %ecx |
| ; X86-NEXT: orl %ecx, %edx |
| ; X86-NEXT: sete %al |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: opt_setcc_shl_eq_zero: |
| ; X64: # %bb.0: |
| ; X64-NEXT: shlq $17, %rsi |
| ; X64-NEXT: orq %rdi, %rsi |
| ; X64-NEXT: sete %al |
| ; X64-NEXT: retq |
| %shl = shl i128 %a, 17 |
| %cmp = icmp eq i128 %shl, 0 |
| ret i1 %cmp |
| } |
| |
| define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind { |
| ; X86-LABEL: opt_setcc_shl_ne_zero: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; X86-NEXT: shldl $17, %edx, %esi |
| ; X86-NEXT: orl %eax, %edx |
| ; X86-NEXT: shldl $17, %ecx, %edx |
| ; X86-NEXT: shldl $17, %eax, %ecx |
| ; X86-NEXT: orl %esi, %ecx |
| ; X86-NEXT: orl %ecx, %edx |
| ; X86-NEXT: setne %al |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: opt_setcc_shl_ne_zero: |
| ; X64: # %bb.0: |
| ; X64-NEXT: shlq $17, %rsi |
| ; X64-NEXT: orq %rdi, %rsi |
| ; X64-NEXT: setne %al |
| ; X64-NEXT: retq |
| %shl = shl i128 %a, 17 |
| %cmp = icmp ne i128 %shl, 0 |
| ret i1 %cmp |
| } |
| |
| ; Negative test: optimization should not be applied if shift has multiple users. |
| define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind { |
| ; X86-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %ebx |
| ; X86-NEXT: pushl %edi |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; X86-NEXT: shldl $17, %esi, %edx |
| ; X86-NEXT: shldl $17, %ecx, %esi |
| ; X86-NEXT: shldl $17, %eax, %ecx |
| ; X86-NEXT: shll $17, %eax |
| ; X86-NEXT: movl %ecx, %edi |
| ; X86-NEXT: orl %edx, %edi |
| ; X86-NEXT: movl %eax, %ebx |
| ; X86-NEXT: orl %esi, %ebx |
| ; X86-NEXT: orl %edi, %ebx |
| ; X86-NEXT: sete %bl |
| ; X86-NEXT: pushl %edx |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: pushl %ecx |
| ; X86-NEXT: pushl %eax |
| ; X86-NEXT: calll use@PLT |
| ; X86-NEXT: addl $16, %esp |
| ; X86-NEXT: movl %ebx, %eax |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: popl %edi |
| ; X86-NEXT: popl %ebx |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users: |
| ; X64: # %bb.0: |
| ; X64-NEXT: pushq %rbx |
| ; X64-NEXT: shldq $17, %rdi, %rsi |
| ; X64-NEXT: shlq $17, %rdi |
| ; X64-NEXT: movq %rdi, %rax |
| ; X64-NEXT: orq %rsi, %rax |
| ; X64-NEXT: sete %bl |
| ; X64-NEXT: callq use@PLT |
| ; X64-NEXT: movl %ebx, %eax |
| ; X64-NEXT: popq %rbx |
| ; X64-NEXT: retq |
| %shl = shl i128 %a, 17 |
| %cmp = icmp eq i128 %shl, 0 |
| call void @use(i128 %shl) |
| ret i1 %cmp |
| } |
| |
| ; Check that optimization is applied to DAG having appropriate shape |
| ; even if there were no actual shift's expansion. |
| define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind { |
| ; X86-LABEL: opt_setcc_expanded_shl_correct_shifts: |
| ; X86: # %bb.0: |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: orl %eax, %edx |
| ; X86-NEXT: shldl $17, %ecx, %edx |
| ; X86-NEXT: shldl $17, %eax, %ecx |
| ; X86-NEXT: orl %edx, %ecx |
| ; X86-NEXT: sete %al |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: opt_setcc_expanded_shl_correct_shifts: |
| ; X64: # %bb.0: |
| ; X64-NEXT: shlq $17, %rdi |
| ; X64-NEXT: orq %rsi, %rdi |
| ; X64-NEXT: sete %al |
| ; X64-NEXT: retq |
| %shl.a = shl i64 %a, 17 |
| %srl.b = lshr i64 %b, 47 |
| %or.0 = or i64 %shl.a, %srl.b |
| %shl.b = shl i64 %b, 17 |
| %or.1 = or i64 %or.0, %shl.b |
| %cmp = icmp eq i64 %or.1, 0 |
| ret i1 %cmp |
| } |
| |
| ; Negative test: optimization should not be applied as |
| ; constants used in shifts do not match. |
| define i1 @opt_setcc_expanded_shl_wrong_shifts(i64 %a, i64 %b) nounwind { |
| ; X86-LABEL: opt_setcc_expanded_shl_wrong_shifts: |
| ; X86: # %bb.0: |
| ; X86-NEXT: pushl %esi |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; X86-NEXT: shldl $17, %edx, %esi |
| ; X86-NEXT: shldl $17, %ecx, %edx |
| ; X86-NEXT: shldl $18, %eax, %ecx |
| ; X86-NEXT: shll $18, %eax |
| ; X86-NEXT: orl %edx, %eax |
| ; X86-NEXT: orl %esi, %ecx |
| ; X86-NEXT: orl %eax, %ecx |
| ; X86-NEXT: sete %al |
| ; X86-NEXT: popl %esi |
| ; X86-NEXT: retl |
| ; |
| ; X64-LABEL: opt_setcc_expanded_shl_wrong_shifts: |
| ; X64: # %bb.0: |
| ; X64-NEXT: shldq $17, %rsi, %rdi |
| ; X64-NEXT: shlq $18, %rsi |
| ; X64-NEXT: orq %rdi, %rsi |
| ; X64-NEXT: sete %al |
| ; X64-NEXT: retq |
| %shl.a = shl i64 %a, 17 |
| %srl.b = lshr i64 %b, 47 |
| %or.0 = or i64 %shl.a, %srl.b |
| %shl.b = shl i64 %b, 18 |
| %or.1 = or i64 %or.0, %shl.b |
| %cmp = icmp eq i64 %or.1, 0 |
| ret i1 %cmp |
| } |
| |
| declare void @use(i128 %a) |