| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=ALL,X64 |
| ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=ALL,X32 |
| |
| ; These test cases are inspired by C++2a std::midpoint(). |
| ; See https://bugs.llvm.org/show_bug.cgi?id=40965 |
| |
| ; ---------------------------------------------------------------------------- ; |
| ; 32-bit width |
| ; ---------------------------------------------------------------------------- ; |
| |
| ; Values come from regs |
| |
| define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind { |
| ; X64-LABEL: scalar_i32_signed_reg_reg: |
| ; X64: # %bb.0: |
| ; X64-NEXT: xorl %eax, %eax |
| ; X64-NEXT: cmpl %esi, %edi |
| ; X64-NEXT: setle %al |
| ; X64-NEXT: leal -1(%rax,%rax), %eax |
| ; X64-NEXT: movl %edi, %ecx |
| ; X64-NEXT: cmovgl %esi, %ecx |
| ; X64-NEXT: cmovgel %edi, %esi |
| ; X64-NEXT: subl %ecx, %esi |
| ; X64-NEXT: shrl %esi |
| ; X64-NEXT: imull %esi, %eax |
| ; X64-NEXT: addl %edi, %eax |
| ; X64-NEXT: retq |
| ; |
| ; X32-LABEL: scalar_i32_signed_reg_reg: |
| ; X32: # %bb.0: |
| ; X32-NEXT: pushl %edi |
| ; X32-NEXT: pushl %esi |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X32-NEXT: xorl %eax, %eax |
| ; X32-NEXT: cmpl %edx, %ecx |
| ; X32-NEXT: setle %al |
| ; X32-NEXT: movl %edx, %esi |
| ; X32-NEXT: jg .LBB0_2 |
| ; X32-NEXT: # %bb.1: |
| ; X32-NEXT: movl %ecx, %esi |
| ; X32-NEXT: .LBB0_2: |
| ; X32-NEXT: leal -1(%eax,%eax), %edi |
| ; X32-NEXT: movl %ecx, %eax |
| ; X32-NEXT: jge .LBB0_4 |
| ; X32-NEXT: # %bb.3: |
| ; X32-NEXT: movl %edx, %eax |
| ; X32-NEXT: .LBB0_4: |
| ; X32-NEXT: subl %esi, %eax |
| ; X32-NEXT: shrl %eax |
| ; X32-NEXT: imull %edi, %eax |
| ; X32-NEXT: addl %ecx, %eax |
| ; X32-NEXT: popl %esi |
| ; X32-NEXT: popl %edi |
| ; X32-NEXT: retl |
| %t3 = icmp sgt i32 %a1, %a2 ; signed |
| %t4 = select i1 %t3, i32 -1, i32 1 |
| %t5 = select i1 %t3, i32 %a2, i32 %a1 |
| %t6 = select i1 %t3, i32 %a1, i32 %a2 |
| %t7 = sub i32 %t6, %t5 |
| %t8 = lshr i32 %t7, 1 |
| %t9 = mul nsw i32 %t8, %t4 ; signed |
| %a10 = add nsw i32 %t9, %a1 ; signed |
| ret i32 %a10 |
| } |
| |
| define i32 @scalar_i32_unsigned_reg_reg(i32 %a1, i32 %a2) nounwind { |
| ; X64-LABEL: scalar_i32_unsigned_reg_reg: |
| ; X64: # %bb.0: |
| ; X64-NEXT: xorl %eax, %eax |
| ; X64-NEXT: cmpl %esi, %edi |
| ; X64-NEXT: setbe %al |
| ; X64-NEXT: leal -1(%rax,%rax), %eax |
| ; X64-NEXT: movl %edi, %ecx |
| ; X64-NEXT: cmoval %esi, %ecx |
| ; X64-NEXT: cmoval %edi, %esi |
| ; X64-NEXT: subl %ecx, %esi |
| ; X64-NEXT: shrl %esi |
| ; X64-NEXT: imull %esi, %eax |
| ; X64-NEXT: addl %edi, %eax |
| ; X64-NEXT: retq |
| ; |
| ; X32-LABEL: scalar_i32_unsigned_reg_reg: |
| ; X32: # %bb.0: |
| ; X32-NEXT: pushl %esi |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X32-NEXT: xorl %edx, %edx |
| ; X32-NEXT: cmpl %eax, %ecx |
| ; X32-NEXT: setbe %dl |
| ; X32-NEXT: leal -1(%edx,%edx), %edx |
| ; X32-NEXT: ja .LBB1_1 |
| ; X32-NEXT: # %bb.2: |
| ; X32-NEXT: movl %ecx, %esi |
| ; X32-NEXT: jmp .LBB1_3 |
| ; X32-NEXT: .LBB1_1: |
| ; X32-NEXT: movl %eax, %esi |
| ; X32-NEXT: movl %ecx, %eax |
| ; X32-NEXT: .LBB1_3: |
| ; X32-NEXT: subl %esi, %eax |
| ; X32-NEXT: shrl %eax |
| ; X32-NEXT: imull %edx, %eax |
| ; X32-NEXT: addl %ecx, %eax |
| ; X32-NEXT: popl %esi |
| ; X32-NEXT: retl |
| %t3 = icmp ugt i32 %a1, %a2 |
| %t4 = select i1 %t3, i32 -1, i32 1 |
| %t5 = select i1 %t3, i32 %a2, i32 %a1 |
| %t6 = select i1 %t3, i32 %a1, i32 %a2 |
| %t7 = sub i32 %t6, %t5 |
| %t8 = lshr i32 %t7, 1 |
| %t9 = mul i32 %t8, %t4 |
| %a10 = add i32 %t9, %a1 |
| ret i32 %a10 |
| } |
| |
| ; Values are loaded. Only check signed case. |
| |
| define i32 @scalar_i32_signed_mem_reg(i32* %a1_addr, i32 %a2) nounwind { |
| ; X64-LABEL: scalar_i32_signed_mem_reg: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl (%rdi), %ecx |
| ; X64-NEXT: xorl %eax, %eax |
| ; X64-NEXT: cmpl %esi, %ecx |
| ; X64-NEXT: setle %al |
| ; X64-NEXT: leal -1(%rax,%rax), %eax |
| ; X64-NEXT: movl %ecx, %edx |
| ; X64-NEXT: cmovgl %esi, %edx |
| ; X64-NEXT: cmovgel %ecx, %esi |
| ; X64-NEXT: subl %edx, %esi |
| ; X64-NEXT: shrl %esi |
| ; X64-NEXT: imull %esi, %eax |
| ; X64-NEXT: addl %ecx, %eax |
| ; X64-NEXT: retq |
| ; |
| ; X32-LABEL: scalar_i32_signed_mem_reg: |
| ; X32: # %bb.0: |
| ; X32-NEXT: pushl %edi |
| ; X32-NEXT: pushl %esi |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movl (%eax), %ecx |
| ; X32-NEXT: xorl %eax, %eax |
| ; X32-NEXT: cmpl %edx, %ecx |
| ; X32-NEXT: setle %al |
| ; X32-NEXT: movl %edx, %esi |
| ; X32-NEXT: jg .LBB2_2 |
| ; X32-NEXT: # %bb.1: |
| ; X32-NEXT: movl %ecx, %esi |
| ; X32-NEXT: .LBB2_2: |
| ; X32-NEXT: leal -1(%eax,%eax), %edi |
| ; X32-NEXT: movl %ecx, %eax |
| ; X32-NEXT: jge .LBB2_4 |
| ; X32-NEXT: # %bb.3: |
| ; X32-NEXT: movl %edx, %eax |
| ; X32-NEXT: .LBB2_4: |
| ; X32-NEXT: subl %esi, %eax |
| ; X32-NEXT: shrl %eax |
| ; X32-NEXT: imull %edi, %eax |
| ; X32-NEXT: addl %ecx, %eax |
| ; X32-NEXT: popl %esi |
| ; X32-NEXT: popl %edi |
| ; X32-NEXT: retl |
| %a1 = load i32, i32* %a1_addr |
| %t3 = icmp sgt i32 %a1, %a2 ; signed |
| %t4 = select i1 %t3, i32 -1, i32 1 |
| %t5 = select i1 %t3, i32 %a2, i32 %a1 |
| %t6 = select i1 %t3, i32 %a1, i32 %a2 |
| %t7 = sub i32 %t6, %t5 |
| %t8 = lshr i32 %t7, 1 |
| %t9 = mul nsw i32 %t8, %t4 ; signed |
| %a10 = add nsw i32 %t9, %a1 ; signed |
| ret i32 %a10 |
| } |
| |
| define i32 @scalar_i32_signed_reg_mem(i32 %a1, i32* %a2_addr) nounwind { |
| ; X64-LABEL: scalar_i32_signed_reg_mem: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl (%rsi), %eax |
| ; X64-NEXT: xorl %ecx, %ecx |
| ; X64-NEXT: cmpl %eax, %edi |
| ; X64-NEXT: setle %cl |
| ; X64-NEXT: leal -1(%rcx,%rcx), %ecx |
| ; X64-NEXT: movl %edi, %edx |
| ; X64-NEXT: cmovgl %eax, %edx |
| ; X64-NEXT: cmovgel %edi, %eax |
| ; X64-NEXT: subl %edx, %eax |
| ; X64-NEXT: shrl %eax |
| ; X64-NEXT: imull %ecx, %eax |
| ; X64-NEXT: addl %edi, %eax |
| ; X64-NEXT: retq |
| ; |
| ; X32-LABEL: scalar_i32_signed_reg_mem: |
| ; X32: # %bb.0: |
| ; X32-NEXT: pushl %edi |
| ; X32-NEXT: pushl %esi |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movl (%eax), %edx |
| ; X32-NEXT: xorl %eax, %eax |
| ; X32-NEXT: cmpl %edx, %ecx |
| ; X32-NEXT: setle %al |
| ; X32-NEXT: movl %edx, %esi |
| ; X32-NEXT: jg .LBB3_2 |
| ; X32-NEXT: # %bb.1: |
| ; X32-NEXT: movl %ecx, %esi |
| ; X32-NEXT: .LBB3_2: |
| ; X32-NEXT: leal -1(%eax,%eax), %edi |
| ; X32-NEXT: movl %ecx, %eax |
| ; X32-NEXT: jge .LBB3_4 |
| ; X32-NEXT: # %bb.3: |
| ; X32-NEXT: movl %edx, %eax |
| ; X32-NEXT: .LBB3_4: |
| ; X32-NEXT: subl %esi, %eax |
| ; X32-NEXT: shrl %eax |
| ; X32-NEXT: imull %edi, %eax |
| ; X32-NEXT: addl %ecx, %eax |
| ; X32-NEXT: popl %esi |
| ; X32-NEXT: popl %edi |
| ; X32-NEXT: retl |
| %a2 = load i32, i32* %a2_addr |
| %t3 = icmp sgt i32 %a1, %a2 ; signed |
| %t4 = select i1 %t3, i32 -1, i32 1 |
| %t5 = select i1 %t3, i32 %a2, i32 %a1 |
| %t6 = select i1 %t3, i32 %a1, i32 %a2 |
| %t7 = sub i32 %t6, %t5 |
| %t8 = lshr i32 %t7, 1 |
| %t9 = mul nsw i32 %t8, %t4 ; signed |
| %a10 = add nsw i32 %t9, %a1 ; signed |
| ret i32 %a10 |
| } |
| |
| define i32 @scalar_i32_signed_mem_mem(i32* %a1_addr, i32* %a2_addr) nounwind { |
| ; X64-LABEL: scalar_i32_signed_mem_mem: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl (%rdi), %ecx |
| ; X64-NEXT: movl (%rsi), %eax |
| ; X64-NEXT: xorl %edx, %edx |
| ; X64-NEXT: cmpl %eax, %ecx |
| ; X64-NEXT: setle %dl |
| ; X64-NEXT: leal -1(%rdx,%rdx), %edx |
| ; X64-NEXT: movl %ecx, %esi |
| ; X64-NEXT: cmovgl %eax, %esi |
| ; X64-NEXT: cmovgel %ecx, %eax |
| ; X64-NEXT: subl %esi, %eax |
| ; X64-NEXT: shrl %eax |
| ; X64-NEXT: imull %edx, %eax |
| ; X64-NEXT: addl %ecx, %eax |
| ; X64-NEXT: retq |
| ; |
| ; X32-LABEL: scalar_i32_signed_mem_mem: |
| ; X32: # %bb.0: |
| ; X32-NEXT: pushl %edi |
| ; X32-NEXT: pushl %esi |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X32-NEXT: movl (%ecx), %ecx |
| ; X32-NEXT: movl (%eax), %edx |
| ; X32-NEXT: xorl %eax, %eax |
| ; X32-NEXT: cmpl %edx, %ecx |
| ; X32-NEXT: setle %al |
| ; X32-NEXT: movl %edx, %esi |
| ; X32-NEXT: jg .LBB4_2 |
| ; X32-NEXT: # %bb.1: |
| ; X32-NEXT: movl %ecx, %esi |
| ; X32-NEXT: .LBB4_2: |
| ; X32-NEXT: leal -1(%eax,%eax), %edi |
| ; X32-NEXT: movl %ecx, %eax |
| ; X32-NEXT: jge .LBB4_4 |
| ; X32-NEXT: # %bb.3: |
| ; X32-NEXT: movl %edx, %eax |
| ; X32-NEXT: .LBB4_4: |
| ; X32-NEXT: subl %esi, %eax |
| ; X32-NEXT: shrl %eax |
| ; X32-NEXT: imull %edi, %eax |
| ; X32-NEXT: addl %ecx, %eax |
| ; X32-NEXT: popl %esi |
| ; X32-NEXT: popl %edi |
| ; X32-NEXT: retl |
| %a1 = load i32, i32* %a1_addr |
| %a2 = load i32, i32* %a2_addr |
| %t3 = icmp sgt i32 %a1, %a2 ; signed |
| %t4 = select i1 %t3, i32 -1, i32 1 |
| %t5 = select i1 %t3, i32 %a2, i32 %a1 |
| %t6 = select i1 %t3, i32 %a1, i32 %a2 |
| %t7 = sub i32 %t6, %t5 |
| %t8 = lshr i32 %t7, 1 |
| %t9 = mul nsw i32 %t8, %t4 ; signed |
| %a10 = add nsw i32 %t9, %a1 ; signed |
| ret i32 %a10 |
| } |
| |
| ; ---------------------------------------------------------------------------- ; |
| ; 64-bit width |
| ; ---------------------------------------------------------------------------- ; |
| |
| ; Values come from regs |
| |
| define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind { |
| ; X64-LABEL: scalar_i64_signed_reg_reg: |
| ; X64: # %bb.0: |
| ; X64-NEXT: xorl %eax, %eax |
| ; X64-NEXT: cmpq %rsi, %rdi |
| ; X64-NEXT: setle %al |
| ; X64-NEXT: leaq -1(%rax,%rax), %rax |
| ; X64-NEXT: movq %rdi, %rcx |
| ; X64-NEXT: cmovgq %rsi, %rcx |
| ; X64-NEXT: cmovgeq %rdi, %rsi |
| ; X64-NEXT: subq %rcx, %rsi |
| ; X64-NEXT: shrq %rsi |
| ; X64-NEXT: imulq %rsi, %rax |
| ; X64-NEXT: addq %rdi, %rax |
| ; X64-NEXT: retq |
| ; |
| ; X32-LABEL: scalar_i64_signed_reg_reg: |
| ; X32: # %bb.0: |
| ; X32-NEXT: pushl %ebp |
| ; X32-NEXT: pushl %ebx |
| ; X32-NEXT: pushl %edi |
| ; X32-NEXT: pushl %esi |
| ; X32-NEXT: pushl %eax |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp |
| ; X32-NEXT: cmpl %esi, %edx |
| ; X32-NEXT: movl %ebp, %eax |
| ; X32-NEXT: sbbl %ecx, %eax |
| ; X32-NEXT: movl %edx, %eax |
| ; X32-NEXT: movl $-1, %edi |
| ; X32-NEXT: movl $-1, %ebx |
| ; X32-NEXT: jl .LBB5_2 |
| ; X32-NEXT: # %bb.1: |
| ; X32-NEXT: xorl %ebx, %ebx |
| ; X32-NEXT: movl $1, %edi |
| ; X32-NEXT: movl %ecx, %ebp |
| ; X32-NEXT: movl %esi, %edx |
| ; X32-NEXT: .LBB5_2: |
| ; X32-NEXT: movl %edi, (%esp) # 4-byte Spill |
| ; X32-NEXT: cmpl %eax, %esi |
| ; X32-NEXT: movl %ecx, %eax |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi |
| ; X32-NEXT: sbbl %edi, %eax |
| ; X32-NEXT: movl %esi, %eax |
| ; X32-NEXT: jge .LBB5_4 |
| ; X32-NEXT: # %bb.3: |
| ; X32-NEXT: movl %edi, %ecx |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: .LBB5_4: |
| ; X32-NEXT: subl %edx, %eax |
| ; X32-NEXT: sbbl %ebp, %ecx |
| ; X32-NEXT: shrdl $1, %ecx, %eax |
| ; X32-NEXT: imull %eax, %ebx |
| ; X32-NEXT: movl (%esp), %esi # 4-byte Reload |
| ; X32-NEXT: mull %esi |
| ; X32-NEXT: addl %ebx, %edx |
| ; X32-NEXT: shrl %ecx |
| ; X32-NEXT: imull %esi, %ecx |
| ; X32-NEXT: addl %ecx, %edx |
| ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx |
| ; X32-NEXT: addl $4, %esp |
| ; X32-NEXT: popl %esi |
| ; X32-NEXT: popl %edi |
| ; X32-NEXT: popl %ebx |
| ; X32-NEXT: popl %ebp |
| ; X32-NEXT: retl |
| %t3 = icmp sgt i64 %a1, %a2 ; signed |
| %t4 = select i1 %t3, i64 -1, i64 1 |
| %t5 = select i1 %t3, i64 %a2, i64 %a1 |
| %t6 = select i1 %t3, i64 %a1, i64 %a2 |
| %t7 = sub i64 %t6, %t5 |
| %t8 = lshr i64 %t7, 1 |
| %t9 = mul nsw i64 %t8, %t4 ; signed |
| %a10 = add nsw i64 %t9, %a1 ; signed |
| ret i64 %a10 |
| } |
| |
| define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind { |
| ; X64-LABEL: scalar_i64_unsigned_reg_reg: |
| ; X64: # %bb.0: |
| ; X64-NEXT: xorl %eax, %eax |
| ; X64-NEXT: cmpq %rsi, %rdi |
| ; X64-NEXT: setbe %al |
| ; X64-NEXT: leaq -1(%rax,%rax), %rax |
| ; X64-NEXT: movq %rdi, %rcx |
| ; X64-NEXT: cmovaq %rsi, %rcx |
| ; X64-NEXT: cmovaq %rdi, %rsi |
| ; X64-NEXT: subq %rcx, %rsi |
| ; X64-NEXT: shrq %rsi |
| ; X64-NEXT: imulq %rsi, %rax |
| ; X64-NEXT: addq %rdi, %rax |
| ; X64-NEXT: retq |
| ; |
| ; X32-LABEL: scalar_i64_unsigned_reg_reg: |
| ; X32: # %bb.0: |
| ; X32-NEXT: pushl %ebp |
| ; X32-NEXT: pushl %ebx |
| ; X32-NEXT: pushl %edi |
| ; X32-NEXT: pushl %esi |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi |
| ; X32-NEXT: cmpl %ecx, %eax |
| ; X32-NEXT: movl %edi, %edx |
| ; X32-NEXT: sbbl {{[0-9]+}}(%esp), %edx |
| ; X32-NEXT: movl $-1, %ebx |
| ; X32-NEXT: jb .LBB6_1 |
| ; X32-NEXT: # %bb.2: |
| ; X32-NEXT: xorl %ebp, %ebp |
| ; X32-NEXT: movl $1, %ebx |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X32-NEXT: movl %ecx, %esi |
| ; X32-NEXT: jmp .LBB6_3 |
| ; X32-NEXT: .LBB6_1: |
| ; X32-NEXT: movl $-1, %ebp |
| ; X32-NEXT: movl %edi, %edx |
| ; X32-NEXT: movl %eax, %esi |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi |
| ; X32-NEXT: movl %ecx, %eax |
| ; X32-NEXT: .LBB6_3: |
| ; X32-NEXT: subl %esi, %eax |
| ; X32-NEXT: sbbl %edx, %edi |
| ; X32-NEXT: shrdl $1, %edi, %eax |
| ; X32-NEXT: imull %eax, %ebp |
| ; X32-NEXT: mull %ebx |
| ; X32-NEXT: addl %ebp, %edx |
| ; X32-NEXT: shrl %edi |
| ; X32-NEXT: imull %ebx, %edi |
| ; X32-NEXT: addl %edi, %edx |
| ; X32-NEXT: addl %ecx, %eax |
| ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx |
| ; X32-NEXT: popl %esi |
| ; X32-NEXT: popl %edi |
| ; X32-NEXT: popl %ebx |
| ; X32-NEXT: popl %ebp |
| ; X32-NEXT: retl |
| %t3 = icmp ugt i64 %a1, %a2 |
| %t4 = select i1 %t3, i64 -1, i64 1 |
| %t5 = select i1 %t3, i64 %a2, i64 %a1 |
| %t6 = select i1 %t3, i64 %a1, i64 %a2 |
| %t7 = sub i64 %t6, %t5 |
| %t8 = lshr i64 %t7, 1 |
| %t9 = mul i64 %t8, %t4 |
| %a10 = add i64 %t9, %a1 |
| ret i64 %a10 |
| } |
| |
| ; Values are loaded. Only check signed case. |
| |
| define i64 @scalar_i64_signed_mem_reg(i64* %a1_addr, i64 %a2) nounwind { |
| ; X64-LABEL: scalar_i64_signed_mem_reg: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movq (%rdi), %rcx |
| ; X64-NEXT: xorl %eax, %eax |
| ; X64-NEXT: cmpq %rsi, %rcx |
| ; X64-NEXT: setle %al |
| ; X64-NEXT: leaq -1(%rax,%rax), %rax |
| ; X64-NEXT: movq %rcx, %rdx |
| ; X64-NEXT: cmovgq %rsi, %rdx |
| ; X64-NEXT: cmovgeq %rcx, %rsi |
| ; X64-NEXT: subq %rdx, %rsi |
| ; X64-NEXT: shrq %rsi |
| ; X64-NEXT: imulq %rsi, %rax |
| ; X64-NEXT: addq %rcx, %rax |
| ; X64-NEXT: retq |
| ; |
| ; X32-LABEL: scalar_i64_signed_mem_reg: |
| ; X32: # %bb.0: |
| ; X32-NEXT: pushl %ebp |
| ; X32-NEXT: pushl %ebx |
| ; X32-NEXT: pushl %edi |
| ; X32-NEXT: pushl %esi |
| ; X32-NEXT: pushl %eax |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movl (%eax), %esi |
| ; X32-NEXT: movl 4(%eax), %ebp |
| ; X32-NEXT: cmpl %esi, %ecx |
| ; X32-NEXT: movl %edx, %eax |
| ; X32-NEXT: sbbl %ebp, %eax |
| ; X32-NEXT: movl $-1, %eax |
| ; X32-NEXT: movl $-1, %ebx |
| ; X32-NEXT: movl %ecx, %edi |
| ; X32-NEXT: jl .LBB7_2 |
| ; X32-NEXT: # %bb.1: |
| ; X32-NEXT: xorl %ebx, %ebx |
| ; X32-NEXT: movl $1, %eax |
| ; X32-NEXT: movl %ebp, %edx |
| ; X32-NEXT: movl %esi, %edi |
| ; X32-NEXT: .LBB7_2: |
| ; X32-NEXT: movl %eax, (%esp) # 4-byte Spill |
| ; X32-NEXT: cmpl %ecx, %esi |
| ; X32-NEXT: movl %ebp, %eax |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X32-NEXT: sbbl %ecx, %eax |
| ; X32-NEXT: movl %ebp, %ecx |
| ; X32-NEXT: movl %esi, %eax |
| ; X32-NEXT: jge .LBB7_4 |
| ; X32-NEXT: # %bb.3: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: .LBB7_4: |
| ; X32-NEXT: subl %edi, %eax |
| ; X32-NEXT: sbbl %edx, %ecx |
| ; X32-NEXT: shrdl $1, %ecx, %eax |
| ; X32-NEXT: imull %eax, %ebx |
| ; X32-NEXT: movl (%esp), %edi # 4-byte Reload |
| ; X32-NEXT: mull %edi |
| ; X32-NEXT: addl %ebx, %edx |
| ; X32-NEXT: shrl %ecx |
| ; X32-NEXT: imull %edi, %ecx |
| ; X32-NEXT: addl %ecx, %edx |
| ; X32-NEXT: addl %esi, %eax |
| ; X32-NEXT: adcl %ebp, %edx |
| ; X32-NEXT: addl $4, %esp |
| ; X32-NEXT: popl %esi |
| ; X32-NEXT: popl %edi |
| ; X32-NEXT: popl %ebx |
| ; X32-NEXT: popl %ebp |
| ; X32-NEXT: retl |
| %a1 = load i64, i64* %a1_addr |
| %t3 = icmp sgt i64 %a1, %a2 ; signed |
| %t4 = select i1 %t3, i64 -1, i64 1 |
| %t5 = select i1 %t3, i64 %a2, i64 %a1 |
| %t6 = select i1 %t3, i64 %a1, i64 %a2 |
| %t7 = sub i64 %t6, %t5 |
| %t8 = lshr i64 %t7, 1 |
| %t9 = mul nsw i64 %t8, %t4 ; signed |
| %a10 = add nsw i64 %t9, %a1 ; signed |
| ret i64 %a10 |
| } |
| |
| define i64 @scalar_i64_signed_reg_mem(i64 %a1, i64* %a2_addr) nounwind { |
| ; X64-LABEL: scalar_i64_signed_reg_mem: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movq (%rsi), %rax |
| ; X64-NEXT: xorl %ecx, %ecx |
| ; X64-NEXT: cmpq %rax, %rdi |
| ; X64-NEXT: setle %cl |
| ; X64-NEXT: leaq -1(%rcx,%rcx), %rcx |
| ; X64-NEXT: movq %rdi, %rdx |
| ; X64-NEXT: cmovgq %rax, %rdx |
| ; X64-NEXT: cmovgeq %rdi, %rax |
| ; X64-NEXT: subq %rdx, %rax |
| ; X64-NEXT: shrq %rax |
| ; X64-NEXT: imulq %rcx, %rax |
| ; X64-NEXT: addq %rdi, %rax |
| ; X64-NEXT: retq |
| ; |
| ; X32-LABEL: scalar_i64_signed_reg_mem: |
| ; X32: # %bb.0: |
| ; X32-NEXT: pushl %ebp |
| ; X32-NEXT: pushl %ebx |
| ; X32-NEXT: pushl %edi |
| ; X32-NEXT: pushl %esi |
| ; X32-NEXT: subl $8, %esp |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movl (%eax), %edx |
| ; X32-NEXT: movl 4(%eax), %ebp |
| ; X32-NEXT: cmpl %esi, %edx |
| ; X32-NEXT: movl %ebp, %eax |
| ; X32-NEXT: sbbl %ecx, %eax |
| ; X32-NEXT: movl $-1, %eax |
| ; X32-NEXT: movl $-1, %ebx |
| ; X32-NEXT: movl %ebp, (%esp) # 4-byte Spill |
| ; X32-NEXT: movl %edx, %edi |
| ; X32-NEXT: jl .LBB8_2 |
| ; X32-NEXT: # %bb.1: |
| ; X32-NEXT: xorl %ebx, %ebx |
| ; X32-NEXT: movl $1, %eax |
| ; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill |
| ; X32-NEXT: movl %esi, %edi |
| ; X32-NEXT: .LBB8_2: |
| ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X32-NEXT: cmpl %edx, %esi |
| ; X32-NEXT: movl %ecx, %eax |
| ; X32-NEXT: sbbl %ebp, %eax |
| ; X32-NEXT: jge .LBB8_4 |
| ; X32-NEXT: # %bb.3: |
| ; X32-NEXT: movl %ebp, %ecx |
| ; X32-NEXT: movl %edx, %esi |
| ; X32-NEXT: .LBB8_4: |
| ; X32-NEXT: subl %edi, %esi |
| ; X32-NEXT: sbbl (%esp), %ecx # 4-byte Folded Reload |
| ; X32-NEXT: shrdl $1, %ecx, %esi |
| ; X32-NEXT: imull %esi, %ebx |
| ; X32-NEXT: movl %esi, %eax |
| ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload |
| ; X32-NEXT: mull %esi |
| ; X32-NEXT: addl %ebx, %edx |
| ; X32-NEXT: shrl %ecx |
| ; X32-NEXT: imull %esi, %ecx |
| ; X32-NEXT: addl %ecx, %edx |
| ; X32-NEXT: addl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: adcl {{[0-9]+}}(%esp), %edx |
| ; X32-NEXT: addl $8, %esp |
| ; X32-NEXT: popl %esi |
| ; X32-NEXT: popl %edi |
| ; X32-NEXT: popl %ebx |
| ; X32-NEXT: popl %ebp |
| ; X32-NEXT: retl |
| %a2 = load i64, i64* %a2_addr |
| %t3 = icmp sgt i64 %a1, %a2 ; signed |
| %t4 = select i1 %t3, i64 -1, i64 1 |
| %t5 = select i1 %t3, i64 %a2, i64 %a1 |
| %t6 = select i1 %t3, i64 %a1, i64 %a2 |
| %t7 = sub i64 %t6, %t5 |
| %t8 = lshr i64 %t7, 1 |
| %t9 = mul nsw i64 %t8, %t4 ; signed |
| %a10 = add nsw i64 %t9, %a1 ; signed |
| ret i64 %a10 |
| } |
| |
| define i64 @scalar_i64_signed_mem_mem(i64* %a1_addr, i64* %a2_addr) nounwind { |
| ; X64-LABEL: scalar_i64_signed_mem_mem: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movq (%rdi), %rcx |
| ; X64-NEXT: movq (%rsi), %rax |
| ; X64-NEXT: xorl %edx, %edx |
| ; X64-NEXT: cmpq %rax, %rcx |
| ; X64-NEXT: setle %dl |
| ; X64-NEXT: leaq -1(%rdx,%rdx), %rdx |
| ; X64-NEXT: movq %rcx, %rsi |
| ; X64-NEXT: cmovgq %rax, %rsi |
| ; X64-NEXT: cmovgeq %rcx, %rax |
| ; X64-NEXT: subq %rsi, %rax |
| ; X64-NEXT: shrq %rax |
| ; X64-NEXT: imulq %rdx, %rax |
| ; X64-NEXT: addq %rcx, %rax |
| ; X64-NEXT: retq |
| ; |
| ; X32-LABEL: scalar_i64_signed_mem_mem: |
| ; X32: # %bb.0: |
| ; X32-NEXT: pushl %ebp |
| ; X32-NEXT: pushl %ebx |
| ; X32-NEXT: pushl %edi |
| ; X32-NEXT: pushl %esi |
| ; X32-NEXT: subl $12, %esp |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X32-NEXT: movl (%ecx), %esi |
| ; X32-NEXT: movl 4(%ecx), %edi |
| ; X32-NEXT: movl (%eax), %edx |
| ; X32-NEXT: movl 4(%eax), %ebp |
| ; X32-NEXT: cmpl %esi, %edx |
| ; X32-NEXT: movl %ebp, %eax |
| ; X32-NEXT: sbbl %edi, %eax |
| ; X32-NEXT: movl $-1, %eax |
| ; X32-NEXT: movl $-1, %ebx |
| ; X32-NEXT: movl %ebp, %ecx |
| ; X32-NEXT: movl %edx, (%esp) # 4-byte Spill |
| ; X32-NEXT: jl .LBB9_2 |
| ; X32-NEXT: # %bb.1: |
| ; X32-NEXT: xorl %ebx, %ebx |
| ; X32-NEXT: movl $1, %eax |
| ; X32-NEXT: movl %edi, %ecx |
| ; X32-NEXT: movl %esi, (%esp) # 4-byte Spill |
| ; X32-NEXT: .LBB9_2: |
| ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill |
| ; X32-NEXT: cmpl %edx, %esi |
| ; X32-NEXT: movl %edi, %eax |
| ; X32-NEXT: sbbl %ebp, %eax |
| ; X32-NEXT: movl %edi, %ecx |
| ; X32-NEXT: movl %esi, %eax |
| ; X32-NEXT: jge .LBB9_4 |
| ; X32-NEXT: # %bb.3: |
| ; X32-NEXT: movl %ebp, %ecx |
| ; X32-NEXT: movl %edx, %eax |
| ; X32-NEXT: .LBB9_4: |
| ; X32-NEXT: subl (%esp), %eax # 4-byte Folded Reload |
| ; X32-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload |
| ; X32-NEXT: shrdl $1, %ecx, %eax |
| ; X32-NEXT: imull %eax, %ebx |
| ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload |
| ; X32-NEXT: mull %ebp |
| ; X32-NEXT: addl %ebx, %edx |
| ; X32-NEXT: shrl %ecx |
| ; X32-NEXT: imull %ebp, %ecx |
| ; X32-NEXT: addl %ecx, %edx |
| ; X32-NEXT: addl %esi, %eax |
| ; X32-NEXT: adcl %edi, %edx |
| ; X32-NEXT: addl $12, %esp |
| ; X32-NEXT: popl %esi |
| ; X32-NEXT: popl %edi |
| ; X32-NEXT: popl %ebx |
| ; X32-NEXT: popl %ebp |
| ; X32-NEXT: retl |
| %a1 = load i64, i64* %a1_addr |
| %a2 = load i64, i64* %a2_addr |
| %t3 = icmp sgt i64 %a1, %a2 ; signed |
| %t4 = select i1 %t3, i64 -1, i64 1 |
| %t5 = select i1 %t3, i64 %a2, i64 %a1 |
| %t6 = select i1 %t3, i64 %a1, i64 %a2 |
| %t7 = sub i64 %t6, %t5 |
| %t8 = lshr i64 %t7, 1 |
| %t9 = mul nsw i64 %t8, %t4 ; signed |
| %a10 = add nsw i64 %t9, %a1 ; signed |
| ret i64 %a10 |
| } |
| |
| ; ---------------------------------------------------------------------------- ; |
| ; 16-bit width |
| ; ---------------------------------------------------------------------------- ; |
| |
| ; Values come from regs |
| |
| define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind { |
| ; X64-LABEL: scalar_i16_signed_reg_reg: |
| ; X64: # %bb.0: |
| ; X64-NEXT: xorl %eax, %eax |
| ; X64-NEXT: cmpw %si, %di |
| ; X64-NEXT: setle %al |
| ; X64-NEXT: leal -1(%rax,%rax), %ecx |
| ; X64-NEXT: movl %edi, %eax |
| ; X64-NEXT: cmovgl %esi, %eax |
| ; X64-NEXT: cmovgel %edi, %esi |
| ; X64-NEXT: subl %eax, %esi |
| ; X64-NEXT: movzwl %si, %eax |
| ; X64-NEXT: shrl %eax |
| ; X64-NEXT: imull %ecx, %eax |
| ; X64-NEXT: addl %edi, %eax |
| ; X64-NEXT: # kill: def $ax killed $ax killed $eax |
| ; X64-NEXT: retq |
| ; |
| ; X32-LABEL: scalar_i16_signed_reg_reg: |
| ; X32: # %bb.0: |
| ; X32-NEXT: pushl %edi |
| ; X32-NEXT: pushl %esi |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X32-NEXT: xorl %edx, %edx |
| ; X32-NEXT: cmpw %ax, %cx |
| ; X32-NEXT: setle %dl |
| ; X32-NEXT: movl %eax, %esi |
| ; X32-NEXT: jg .LBB10_2 |
| ; X32-NEXT: # %bb.1: |
| ; X32-NEXT: movl %ecx, %esi |
| ; X32-NEXT: .LBB10_2: |
| ; X32-NEXT: leal -1(%edx,%edx), %edx |
| ; X32-NEXT: movl %ecx, %edi |
| ; X32-NEXT: jge .LBB10_4 |
| ; X32-NEXT: # %bb.3: |
| ; X32-NEXT: movl %eax, %edi |
| ; X32-NEXT: .LBB10_4: |
| ; X32-NEXT: subl %esi, %edi |
| ; X32-NEXT: movzwl %di, %eax |
| ; X32-NEXT: shrl %eax |
| ; X32-NEXT: imull %edx, %eax |
| ; X32-NEXT: addl %ecx, %eax |
| ; X32-NEXT: # kill: def $ax killed $ax killed $eax |
| ; X32-NEXT: popl %esi |
| ; X32-NEXT: popl %edi |
| ; X32-NEXT: retl |
| %t3 = icmp sgt i16 %a1, %a2 ; signed |
| %t4 = select i1 %t3, i16 -1, i16 1 |
| %t5 = select i1 %t3, i16 %a2, i16 %a1 |
| %t6 = select i1 %t3, i16 %a1, i16 %a2 |
| %t7 = sub i16 %t6, %t5 |
| %t8 = lshr i16 %t7, 1 |
| %t9 = mul nsw i16 %t8, %t4 ; signed |
| %a10 = add nsw i16 %t9, %a1 ; signed |
| ret i16 %a10 |
| } |
| |
| define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind { |
| ; X64-LABEL: scalar_i16_unsigned_reg_reg: |
| ; X64: # %bb.0: |
| ; X64-NEXT: xorl %eax, %eax |
| ; X64-NEXT: cmpw %si, %di |
| ; X64-NEXT: setbe %al |
| ; X64-NEXT: leal -1(%rax,%rax), %ecx |
| ; X64-NEXT: movl %edi, %eax |
| ; X64-NEXT: cmoval %esi, %eax |
| ; X64-NEXT: cmoval %edi, %esi |
| ; X64-NEXT: subl %eax, %esi |
| ; X64-NEXT: movzwl %si, %eax |
| ; X64-NEXT: shrl %eax |
| ; X64-NEXT: imull %ecx, %eax |
| ; X64-NEXT: addl %edi, %eax |
| ; X64-NEXT: # kill: def $ax killed $ax killed $eax |
| ; X64-NEXT: retq |
| ; |
| ; X32-LABEL: scalar_i16_unsigned_reg_reg: |
| ; X32: # %bb.0: |
| ; X32-NEXT: pushl %esi |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X32-NEXT: xorl %edx, %edx |
| ; X32-NEXT: cmpw %ax, %cx |
| ; X32-NEXT: setbe %dl |
| ; X32-NEXT: leal -1(%edx,%edx), %edx |
| ; X32-NEXT: ja .LBB11_1 |
| ; X32-NEXT: # %bb.2: |
| ; X32-NEXT: movl %ecx, %esi |
| ; X32-NEXT: jmp .LBB11_3 |
| ; X32-NEXT: .LBB11_1: |
| ; X32-NEXT: movl %eax, %esi |
| ; X32-NEXT: movl %ecx, %eax |
| ; X32-NEXT: .LBB11_3: |
| ; X32-NEXT: subl %esi, %eax |
| ; X32-NEXT: movzwl %ax, %eax |
| ; X32-NEXT: shrl %eax |
| ; X32-NEXT: imull %edx, %eax |
| ; X32-NEXT: addl %ecx, %eax |
| ; X32-NEXT: # kill: def $ax killed $ax killed $eax |
| ; X32-NEXT: popl %esi |
| ; X32-NEXT: retl |
| %t3 = icmp ugt i16 %a1, %a2 |
| %t4 = select i1 %t3, i16 -1, i16 1 |
| %t5 = select i1 %t3, i16 %a2, i16 %a1 |
| %t6 = select i1 %t3, i16 %a1, i16 %a2 |
| %t7 = sub i16 %t6, %t5 |
| %t8 = lshr i16 %t7, 1 |
| %t9 = mul i16 %t8, %t4 |
| %a10 = add i16 %t9, %a1 |
| ret i16 %a10 |
| } |
| |
| ; Values are loaded. Only check signed case. |
| |
| define i16 @scalar_i16_signed_mem_reg(i16* %a1_addr, i16 %a2) nounwind { |
| ; X64-LABEL: scalar_i16_signed_mem_reg: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movzwl (%rdi), %ecx |
| ; X64-NEXT: xorl %eax, %eax |
| ; X64-NEXT: cmpw %si, %cx |
| ; X64-NEXT: setle %al |
| ; X64-NEXT: leal -1(%rax,%rax), %edx |
| ; X64-NEXT: movl %ecx, %eax |
| ; X64-NEXT: cmovgl %esi, %eax |
| ; X64-NEXT: cmovgel %ecx, %esi |
| ; X64-NEXT: subl %eax, %esi |
| ; X64-NEXT: movzwl %si, %eax |
| ; X64-NEXT: shrl %eax |
| ; X64-NEXT: imull %edx, %eax |
| ; X64-NEXT: addl %ecx, %eax |
| ; X64-NEXT: # kill: def $ax killed $ax killed $eax |
| ; X64-NEXT: retq |
| ; |
| ; X32-LABEL: scalar_i16_signed_mem_reg: |
| ; X32: # %bb.0: |
| ; X32-NEXT: pushl %edi |
| ; X32-NEXT: pushl %esi |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X32-NEXT: movzwl (%ecx), %ecx |
| ; X32-NEXT: xorl %edx, %edx |
| ; X32-NEXT: cmpw %ax, %cx |
| ; X32-NEXT: setle %dl |
| ; X32-NEXT: movl %eax, %esi |
| ; X32-NEXT: jg .LBB12_2 |
| ; X32-NEXT: # %bb.1: |
| ; X32-NEXT: movl %ecx, %esi |
| ; X32-NEXT: .LBB12_2: |
| ; X32-NEXT: leal -1(%edx,%edx), %edx |
| ; X32-NEXT: movl %ecx, %edi |
| ; X32-NEXT: jge .LBB12_4 |
| ; X32-NEXT: # %bb.3: |
| ; X32-NEXT: movl %eax, %edi |
| ; X32-NEXT: .LBB12_4: |
| ; X32-NEXT: subl %esi, %edi |
| ; X32-NEXT: movzwl %di, %eax |
| ; X32-NEXT: shrl %eax |
| ; X32-NEXT: imull %edx, %eax |
| ; X32-NEXT: addl %ecx, %eax |
| ; X32-NEXT: # kill: def $ax killed $ax killed $eax |
| ; X32-NEXT: popl %esi |
| ; X32-NEXT: popl %edi |
| ; X32-NEXT: retl |
| %a1 = load i16, i16* %a1_addr |
| %t3 = icmp sgt i16 %a1, %a2 ; signed |
| %t4 = select i1 %t3, i16 -1, i16 1 |
| %t5 = select i1 %t3, i16 %a2, i16 %a1 |
| %t6 = select i1 %t3, i16 %a1, i16 %a2 |
| %t7 = sub i16 %t6, %t5 |
| %t8 = lshr i16 %t7, 1 |
| %t9 = mul nsw i16 %t8, %t4 ; signed |
| %a10 = add nsw i16 %t9, %a1 ; signed |
| ret i16 %a10 |
| } |
| |
| define i16 @scalar_i16_signed_reg_mem(i16 %a1, i16* %a2_addr) nounwind { |
| ; X64-LABEL: scalar_i16_signed_reg_mem: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movzwl (%rsi), %eax |
| ; X64-NEXT: xorl %ecx, %ecx |
| ; X64-NEXT: cmpw %ax, %di |
| ; X64-NEXT: setle %cl |
| ; X64-NEXT: leal -1(%rcx,%rcx), %ecx |
| ; X64-NEXT: movl %edi, %edx |
| ; X64-NEXT: cmovgl %eax, %edx |
| ; X64-NEXT: cmovgel %edi, %eax |
| ; X64-NEXT: subl %edx, %eax |
| ; X64-NEXT: movzwl %ax, %eax |
| ; X64-NEXT: shrl %eax |
| ; X64-NEXT: imull %ecx, %eax |
| ; X64-NEXT: addl %edi, %eax |
| ; X64-NEXT: # kill: def $ax killed $ax killed $eax |
| ; X64-NEXT: retq |
| ; |
| ; X32-LABEL: scalar_i16_signed_reg_mem: |
| ; X32: # %bb.0: |
| ; X32-NEXT: pushl %edi |
| ; X32-NEXT: pushl %esi |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movzwl (%eax), %eax |
| ; X32-NEXT: xorl %edx, %edx |
| ; X32-NEXT: cmpw %ax, %cx |
| ; X32-NEXT: setle %dl |
| ; X32-NEXT: movl %eax, %esi |
| ; X32-NEXT: jg .LBB13_2 |
| ; X32-NEXT: # %bb.1: |
| ; X32-NEXT: movl %ecx, %esi |
| ; X32-NEXT: .LBB13_2: |
| ; X32-NEXT: leal -1(%edx,%edx), %edx |
| ; X32-NEXT: movl %ecx, %edi |
| ; X32-NEXT: jge .LBB13_4 |
| ; X32-NEXT: # %bb.3: |
| ; X32-NEXT: movl %eax, %edi |
| ; X32-NEXT: .LBB13_4: |
| ; X32-NEXT: subl %esi, %edi |
| ; X32-NEXT: movzwl %di, %eax |
| ; X32-NEXT: shrl %eax |
| ; X32-NEXT: imull %edx, %eax |
| ; X32-NEXT: addl %ecx, %eax |
| ; X32-NEXT: # kill: def $ax killed $ax killed $eax |
| ; X32-NEXT: popl %esi |
| ; X32-NEXT: popl %edi |
| ; X32-NEXT: retl |
| %a2 = load i16, i16* %a2_addr |
| %t3 = icmp sgt i16 %a1, %a2 ; signed |
| %t4 = select i1 %t3, i16 -1, i16 1 |
| %t5 = select i1 %t3, i16 %a2, i16 %a1 |
| %t6 = select i1 %t3, i16 %a1, i16 %a2 |
| %t7 = sub i16 %t6, %t5 |
| %t8 = lshr i16 %t7, 1 |
| %t9 = mul nsw i16 %t8, %t4 ; signed |
| %a10 = add nsw i16 %t9, %a1 ; signed |
| ret i16 %a10 |
| } |
| |
| define i16 @scalar_i16_signed_mem_mem(i16* %a1_addr, i16* %a2_addr) nounwind { |
| ; X64-LABEL: scalar_i16_signed_mem_mem: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movzwl (%rdi), %ecx |
| ; X64-NEXT: movzwl (%rsi), %eax |
| ; X64-NEXT: xorl %edx, %edx |
| ; X64-NEXT: cmpw %ax, %cx |
| ; X64-NEXT: setle %dl |
| ; X64-NEXT: leal -1(%rdx,%rdx), %edx |
| ; X64-NEXT: movl %ecx, %esi |
| ; X64-NEXT: cmovgl %eax, %esi |
| ; X64-NEXT: cmovgel %ecx, %eax |
| ; X64-NEXT: subl %esi, %eax |
| ; X64-NEXT: movzwl %ax, %eax |
| ; X64-NEXT: shrl %eax |
| ; X64-NEXT: imull %edx, %eax |
| ; X64-NEXT: addl %ecx, %eax |
| ; X64-NEXT: # kill: def $ax killed $ax killed $eax |
| ; X64-NEXT: retq |
| ; |
| ; X32-LABEL: scalar_i16_signed_mem_mem: |
| ; X32: # %bb.0: |
| ; X32-NEXT: pushl %edi |
| ; X32-NEXT: pushl %esi |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X32-NEXT: movzwl (%ecx), %ecx |
| ; X32-NEXT: movzwl (%eax), %eax |
| ; X32-NEXT: xorl %edx, %edx |
| ; X32-NEXT: cmpw %ax, %cx |
| ; X32-NEXT: setle %dl |
| ; X32-NEXT: movl %eax, %esi |
| ; X32-NEXT: jg .LBB14_2 |
| ; X32-NEXT: # %bb.1: |
| ; X32-NEXT: movl %ecx, %esi |
| ; X32-NEXT: .LBB14_2: |
| ; X32-NEXT: leal -1(%edx,%edx), %edx |
| ; X32-NEXT: movl %ecx, %edi |
| ; X32-NEXT: jge .LBB14_4 |
| ; X32-NEXT: # %bb.3: |
| ; X32-NEXT: movl %eax, %edi |
| ; X32-NEXT: .LBB14_4: |
| ; X32-NEXT: subl %esi, %edi |
| ; X32-NEXT: movzwl %di, %eax |
| ; X32-NEXT: shrl %eax |
| ; X32-NEXT: imull %edx, %eax |
| ; X32-NEXT: addl %ecx, %eax |
| ; X32-NEXT: # kill: def $ax killed $ax killed $eax |
| ; X32-NEXT: popl %esi |
| ; X32-NEXT: popl %edi |
| ; X32-NEXT: retl |
| %a1 = load i16, i16* %a1_addr |
| %a2 = load i16, i16* %a2_addr |
| %t3 = icmp sgt i16 %a1, %a2 ; signed |
| %t4 = select i1 %t3, i16 -1, i16 1 |
| %t5 = select i1 %t3, i16 %a2, i16 %a1 |
| %t6 = select i1 %t3, i16 %a1, i16 %a2 |
| %t7 = sub i16 %t6, %t5 |
| %t8 = lshr i16 %t7, 1 |
| %t9 = mul nsw i16 %t8, %t4 ; signed |
| %a10 = add nsw i16 %t9, %a1 ; signed |
| ret i16 %a10 |
| } |
| |
| ; ---------------------------------------------------------------------------- ; |
| ; 8-bit width |
| ; ---------------------------------------------------------------------------- ; |
| |
| ; Values come from regs |
| |
| define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind { |
| ; X64-LABEL: scalar_i8_signed_reg_reg: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %eax |
| ; X64-NEXT: cmpb %al, %dil |
| ; X64-NEXT: setle %cl |
| ; X64-NEXT: movl %edi, %edx |
| ; X64-NEXT: cmovgl %esi, %edx |
| ; X64-NEXT: cmovgel %edi, %eax |
| ; X64-NEXT: addb %cl, %cl |
| ; X64-NEXT: decb %cl |
| ; X64-NEXT: subb %dl, %al |
| ; X64-NEXT: shrb %al |
| ; X64-NEXT: # kill: def $al killed $al killed $eax |
| ; X64-NEXT: mulb %cl |
| ; X64-NEXT: addb %dil, %al |
| ; X64-NEXT: retq |
| ; |
| ; X32-LABEL: scalar_i8_signed_reg_reg: |
| ; X32: # %bb.0: |
| ; X32-NEXT: movb {{[0-9]+}}(%esp), %ah |
| ; X32-NEXT: movb {{[0-9]+}}(%esp), %cl |
| ; X32-NEXT: cmpb %ah, %cl |
| ; X32-NEXT: setle %dl |
| ; X32-NEXT: movb %ah, %ch |
| ; X32-NEXT: jg .LBB15_2 |
| ; X32-NEXT: # %bb.1: |
| ; X32-NEXT: movb %cl, %ch |
| ; X32-NEXT: .LBB15_2: |
| ; X32-NEXT: movb %cl, %al |
| ; X32-NEXT: jge .LBB15_4 |
| ; X32-NEXT: # %bb.3: |
| ; X32-NEXT: movb %ah, %al |
| ; X32-NEXT: .LBB15_4: |
| ; X32-NEXT: subb %ch, %al |
| ; X32-NEXT: addb %dl, %dl |
| ; X32-NEXT: decb %dl |
| ; X32-NEXT: shrb %al |
| ; X32-NEXT: mulb %dl |
| ; X32-NEXT: addb %cl, %al |
| ; X32-NEXT: retl |
| %t3 = icmp sgt i8 %a1, %a2 ; signed |
| %t4 = select i1 %t3, i8 -1, i8 1 |
| %t5 = select i1 %t3, i8 %a2, i8 %a1 |
| %t6 = select i1 %t3, i8 %a1, i8 %a2 |
| %t7 = sub i8 %t6, %t5 |
| %t8 = lshr i8 %t7, 1 |
| %t9 = mul nsw i8 %t8, %t4 ; signed |
| %a10 = add nsw i8 %t9, %a1 ; signed |
| ret i8 %a10 |
| } |
| |
| define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind { |
| ; X64-LABEL: scalar_i8_unsigned_reg_reg: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movl %esi, %eax |
| ; X64-NEXT: cmpb %al, %dil |
| ; X64-NEXT: setbe %cl |
| ; X64-NEXT: movl %edi, %edx |
| ; X64-NEXT: cmoval %esi, %edx |
| ; X64-NEXT: cmoval %edi, %eax |
| ; X64-NEXT: addb %cl, %cl |
| ; X64-NEXT: decb %cl |
| ; X64-NEXT: subb %dl, %al |
| ; X64-NEXT: shrb %al |
| ; X64-NEXT: # kill: def $al killed $al killed $eax |
| ; X64-NEXT: mulb %cl |
| ; X64-NEXT: addb %dil, %al |
| ; X64-NEXT: retq |
| ; |
| ; X32-LABEL: scalar_i8_unsigned_reg_reg: |
| ; X32: # %bb.0: |
| ; X32-NEXT: movb {{[0-9]+}}(%esp), %al |
| ; X32-NEXT: movb {{[0-9]+}}(%esp), %cl |
| ; X32-NEXT: cmpb %al, %cl |
| ; X32-NEXT: setbe %dl |
| ; X32-NEXT: ja .LBB16_1 |
| ; X32-NEXT: # %bb.2: |
| ; X32-NEXT: movb %cl, %ah |
| ; X32-NEXT: jmp .LBB16_3 |
| ; X32-NEXT: .LBB16_1: |
| ; X32-NEXT: movb %al, %ah |
| ; X32-NEXT: movb %cl, %al |
| ; X32-NEXT: .LBB16_3: |
| ; X32-NEXT: subb %ah, %al |
| ; X32-NEXT: addb %dl, %dl |
| ; X32-NEXT: decb %dl |
| ; X32-NEXT: shrb %al |
| ; X32-NEXT: mulb %dl |
| ; X32-NEXT: addb %cl, %al |
| ; X32-NEXT: retl |
| %t3 = icmp ugt i8 %a1, %a2 |
| %t4 = select i1 %t3, i8 -1, i8 1 |
| %t5 = select i1 %t3, i8 %a2, i8 %a1 |
| %t6 = select i1 %t3, i8 %a1, i8 %a2 |
| %t7 = sub i8 %t6, %t5 |
| %t8 = lshr i8 %t7, 1 |
| %t9 = mul i8 %t8, %t4 |
| %a10 = add i8 %t9, %a1 |
| ret i8 %a10 |
| } |
| |
| ; Values are loaded. Only check signed case. |
| |
| define i8 @scalar_i8_signed_mem_reg(i8* %a1_addr, i8 %a2) nounwind { |
| ; X64-LABEL: scalar_i8_signed_mem_reg: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movzbl (%rdi), %ecx |
| ; X64-NEXT: cmpb %sil, %cl |
| ; X64-NEXT: setle %dl |
| ; X64-NEXT: movl %ecx, %edi |
| ; X64-NEXT: cmovgl %esi, %edi |
| ; X64-NEXT: movl %ecx, %eax |
| ; X64-NEXT: cmovll %esi, %eax |
| ; X64-NEXT: addb %dl, %dl |
| ; X64-NEXT: decb %dl |
| ; X64-NEXT: subb %dil, %al |
| ; X64-NEXT: shrb %al |
| ; X64-NEXT: # kill: def $al killed $al killed $eax |
| ; X64-NEXT: mulb %dl |
| ; X64-NEXT: addb %cl, %al |
| ; X64-NEXT: retq |
| ; |
| ; X32-LABEL: scalar_i8_signed_mem_reg: |
| ; X32: # %bb.0: |
| ; X32-NEXT: movb {{[0-9]+}}(%esp), %ah |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X32-NEXT: movb (%ecx), %cl |
| ; X32-NEXT: cmpb %ah, %cl |
| ; X32-NEXT: setle %dl |
| ; X32-NEXT: movb %ah, %ch |
| ; X32-NEXT: jg .LBB17_2 |
| ; X32-NEXT: # %bb.1: |
| ; X32-NEXT: movb %cl, %ch |
| ; X32-NEXT: .LBB17_2: |
| ; X32-NEXT: movb %cl, %al |
| ; X32-NEXT: jge .LBB17_4 |
| ; X32-NEXT: # %bb.3: |
| ; X32-NEXT: movb %ah, %al |
| ; X32-NEXT: .LBB17_4: |
| ; X32-NEXT: subb %ch, %al |
| ; X32-NEXT: addb %dl, %dl |
| ; X32-NEXT: decb %dl |
| ; X32-NEXT: shrb %al |
| ; X32-NEXT: mulb %dl |
| ; X32-NEXT: addb %cl, %al |
| ; X32-NEXT: retl |
| %a1 = load i8, i8* %a1_addr |
| %t3 = icmp sgt i8 %a1, %a2 ; signed |
| %t4 = select i1 %t3, i8 -1, i8 1 |
| %t5 = select i1 %t3, i8 %a2, i8 %a1 |
| %t6 = select i1 %t3, i8 %a1, i8 %a2 |
| %t7 = sub i8 %t6, %t5 |
| %t8 = lshr i8 %t7, 1 |
| %t9 = mul nsw i8 %t8, %t4 ; signed |
| %a10 = add nsw i8 %t9, %a1 ; signed |
| ret i8 %a10 |
| } |
| |
| define i8 @scalar_i8_signed_reg_mem(i8 %a1, i8* %a2_addr) nounwind { |
| ; X64-LABEL: scalar_i8_signed_reg_mem: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movzbl (%rsi), %eax |
| ; X64-NEXT: cmpb %al, %dil |
| ; X64-NEXT: setle %cl |
| ; X64-NEXT: movl %edi, %edx |
| ; X64-NEXT: cmovgl %eax, %edx |
| ; X64-NEXT: cmovgel %edi, %eax |
| ; X64-NEXT: addb %cl, %cl |
| ; X64-NEXT: decb %cl |
| ; X64-NEXT: subb %dl, %al |
| ; X64-NEXT: shrb %al |
| ; X64-NEXT: # kill: def $al killed $al killed $eax |
| ; X64-NEXT: mulb %cl |
| ; X64-NEXT: addb %dil, %al |
| ; X64-NEXT: retq |
| ; |
| ; X32-LABEL: scalar_i8_signed_reg_mem: |
| ; X32: # %bb.0: |
| ; X32-NEXT: movb {{[0-9]+}}(%esp), %cl |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movb (%eax), %ah |
| ; X32-NEXT: cmpb %ah, %cl |
| ; X32-NEXT: setle %dl |
| ; X32-NEXT: movb %ah, %ch |
| ; X32-NEXT: jg .LBB18_2 |
| ; X32-NEXT: # %bb.1: |
| ; X32-NEXT: movb %cl, %ch |
| ; X32-NEXT: .LBB18_2: |
| ; X32-NEXT: movb %cl, %al |
| ; X32-NEXT: jge .LBB18_4 |
| ; X32-NEXT: # %bb.3: |
| ; X32-NEXT: movb %ah, %al |
| ; X32-NEXT: .LBB18_4: |
| ; X32-NEXT: subb %ch, %al |
| ; X32-NEXT: addb %dl, %dl |
| ; X32-NEXT: decb %dl |
| ; X32-NEXT: shrb %al |
| ; X32-NEXT: mulb %dl |
| ; X32-NEXT: addb %cl, %al |
| ; X32-NEXT: retl |
| %a2 = load i8, i8* %a2_addr |
| %t3 = icmp sgt i8 %a1, %a2 ; signed |
| %t4 = select i1 %t3, i8 -1, i8 1 |
| %t5 = select i1 %t3, i8 %a2, i8 %a1 |
| %t6 = select i1 %t3, i8 %a1, i8 %a2 |
| %t7 = sub i8 %t6, %t5 |
| %t8 = lshr i8 %t7, 1 |
| %t9 = mul nsw i8 %t8, %t4 ; signed |
| %a10 = add nsw i8 %t9, %a1 ; signed |
| ret i8 %a10 |
| } |
| |
| define i8 @scalar_i8_signed_mem_mem(i8* %a1_addr, i8* %a2_addr) nounwind { |
| ; X64-LABEL: scalar_i8_signed_mem_mem: |
| ; X64: # %bb.0: |
| ; X64-NEXT: movzbl (%rdi), %ecx |
| ; X64-NEXT: movzbl (%rsi), %eax |
| ; X64-NEXT: cmpb %al, %cl |
| ; X64-NEXT: setle %dl |
| ; X64-NEXT: movl %ecx, %esi |
| ; X64-NEXT: cmovgl %eax, %esi |
| ; X64-NEXT: cmovgel %ecx, %eax |
| ; X64-NEXT: addb %dl, %dl |
| ; X64-NEXT: decb %dl |
| ; X64-NEXT: subb %sil, %al |
| ; X64-NEXT: shrb %al |
| ; X64-NEXT: # kill: def $al killed $al killed $eax |
| ; X64-NEXT: mulb %dl |
| ; X64-NEXT: addb %cl, %al |
| ; X64-NEXT: retq |
| ; |
| ; X32-LABEL: scalar_i8_signed_mem_mem: |
| ; X32: # %bb.0: |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax |
| ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx |
| ; X32-NEXT: movb (%ecx), %cl |
| ; X32-NEXT: movb (%eax), %ah |
| ; X32-NEXT: cmpb %ah, %cl |
| ; X32-NEXT: setle %dl |
| ; X32-NEXT: movb %ah, %ch |
| ; X32-NEXT: jg .LBB19_2 |
| ; X32-NEXT: # %bb.1: |
| ; X32-NEXT: movb %cl, %ch |
| ; X32-NEXT: .LBB19_2: |
| ; X32-NEXT: movb %cl, %al |
| ; X32-NEXT: jge .LBB19_4 |
| ; X32-NEXT: # %bb.3: |
| ; X32-NEXT: movb %ah, %al |
| ; X32-NEXT: .LBB19_4: |
| ; X32-NEXT: subb %ch, %al |
| ; X32-NEXT: addb %dl, %dl |
| ; X32-NEXT: decb %dl |
| ; X32-NEXT: shrb %al |
| ; X32-NEXT: mulb %dl |
| ; X32-NEXT: addb %cl, %al |
| ; X32-NEXT: retl |
| %a1 = load i8, i8* %a1_addr |
| %a2 = load i8, i8* %a2_addr |
| %t3 = icmp sgt i8 %a1, %a2 ; signed |
| %t4 = select i1 %t3, i8 -1, i8 1 |
| %t5 = select i1 %t3, i8 %a2, i8 %a1 |
| %t6 = select i1 %t3, i8 %a1, i8 %a2 |
| %t7 = sub i8 %t6, %t5 |
| %t8 = lshr i8 %t7, 1 |
| %t9 = mul nsw i8 %t8, %t4 ; signed |
| %a10 = add nsw i8 %t9, %a1 ; signed |
| ret i8 %a10 |
| } |