| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu \ |
| ; RUN: -mcpu=pwr9 < %s | FileCheck %s --check-prefix=CHECK-P9 |
| ; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu \ |
| ; RUN: -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-P8 |
| |
| ; Basic comparison with branch |
| define i32 @test_br_cc_olt(half %a, half %b) nounwind { |
| ; CHECK-P9-LABEL: test_br_cc_olt: |
| ; CHECK-P9: # %bb.0: |
| ; CHECK-P9-NEXT: clrlwi 3, 3, 16 |
| ; CHECK-P9-NEXT: clrlwi 4, 4, 16 |
| ; CHECK-P9-NEXT: mtfprwz 0, 4 |
| ; CHECK-P9-NEXT: mtfprwz 1, 3 |
| ; CHECK-P9-NEXT: xscvhpdp 0, 0 |
| ; CHECK-P9-NEXT: xscvhpdp 1, 1 |
| ; CHECK-P9-NEXT: fcmpu 0, 1, 0 |
| ; CHECK-P9-NEXT: bge 0, .LBB0_2 |
| ; CHECK-P9-NEXT: # %bb.1: # %if.then |
| ; CHECK-P9-NEXT: li 3, 1 |
| ; CHECK-P9-NEXT: blr |
| ; CHECK-P9-NEXT: .LBB0_2: # %if.else |
| ; CHECK-P9-NEXT: li 3, 0 |
| ; CHECK-P9-NEXT: blr |
| ; |
| ; CHECK-P8-LABEL: test_br_cc_olt: |
| ; CHECK-P8: # %bb.0: |
| ; CHECK-P8-NEXT: mflr 0 |
| ; CHECK-P8-NEXT: std 30, -24(1) # 8-byte Folded Spill |
| ; CHECK-P8-NEXT: stfd 31, -8(1) # 8-byte Folded Spill |
| ; CHECK-P8-NEXT: stdu 1, -64(1) |
| ; CHECK-P8-NEXT: mr 30, 3 |
| ; CHECK-P8-NEXT: clrldi 3, 4, 48 |
| ; CHECK-P8-NEXT: std 0, 80(1) |
| ; CHECK-P8-NEXT: bl __extendhfsf2 |
| ; CHECK-P8-NEXT: nop |
| ; CHECK-P8-NEXT: clrldi 3, 30, 48 |
| ; CHECK-P8-NEXT: fmr 31, 1 |
| ; CHECK-P8-NEXT: bl __extendhfsf2 |
| ; CHECK-P8-NEXT: nop |
| ; CHECK-P8-NEXT: fcmpu 0, 1, 31 |
| ; CHECK-P8-NEXT: bge 0, .LBB0_2 |
| ; CHECK-P8-NEXT: # %bb.1: # %if.then |
| ; CHECK-P8-NEXT: li 3, 1 |
| ; CHECK-P8-NEXT: b .LBB0_3 |
| ; CHECK-P8-NEXT: .LBB0_2: # %if.else |
| ; CHECK-P8-NEXT: li 3, 0 |
| ; CHECK-P8-NEXT: .LBB0_3: # %if.then |
| ; CHECK-P8-NEXT: addi 1, 1, 64 |
| ; CHECK-P8-NEXT: ld 0, 16(1) |
| ; CHECK-P8-NEXT: lfd 31, -8(1) # 8-byte Folded Reload |
| ; CHECK-P8-NEXT: ld 30, -24(1) # 8-byte Folded Reload |
| ; CHECK-P8-NEXT: mtlr 0 |
| ; CHECK-P8-NEXT: blr |
| %cmp = fcmp olt half %a, %b |
| br i1 %cmp, label %if.then, label %if.else |
| if.then: |
| ret i32 1 |
| if.else: |
| ret i32 0 |
| } |
| |
| ; Test with constant |
| define i32 @test_br_cc_constant(half %a) nounwind { |
| ; CHECK-P9-LABEL: test_br_cc_constant: |
| ; CHECK-P9: # %bb.0: |
| ; CHECK-P9-NEXT: clrlwi 3, 3, 16 |
| ; CHECK-P9-NEXT: xxlxor 1, 1, 1 |
| ; CHECK-P9-NEXT: mtfprwz 0, 3 |
| ; CHECK-P9-NEXT: xscvhpdp 0, 0 |
| ; CHECK-P9-NEXT: fcmpu 0, 0, 1 |
| ; CHECK-P9-NEXT: ble 0, .LBB1_2 |
| ; CHECK-P9-NEXT: # %bb.1: # %if.then |
| ; CHECK-P9-NEXT: li 3, 1 |
| ; CHECK-P9-NEXT: blr |
| ; CHECK-P9-NEXT: .LBB1_2: # %if.else |
| ; CHECK-P9-NEXT: li 3, 0 |
| ; CHECK-P9-NEXT: blr |
| ; |
| ; CHECK-P8-LABEL: test_br_cc_constant: |
| ; CHECK-P8: # %bb.0: |
| ; CHECK-P8-NEXT: mflr 0 |
| ; CHECK-P8-NEXT: stdu 1, -32(1) |
| ; CHECK-P8-NEXT: clrldi 3, 3, 48 |
| ; CHECK-P8-NEXT: std 0, 48(1) |
| ; CHECK-P8-NEXT: bl __extendhfsf2 |
| ; CHECK-P8-NEXT: nop |
| ; CHECK-P8-NEXT: xxlxor 0, 0, 0 |
| ; CHECK-P8-NEXT: fcmpu 0, 1, 0 |
| ; CHECK-P8-NEXT: ble 0, .LBB1_2 |
| ; CHECK-P8-NEXT: # %bb.1: # %if.then |
| ; CHECK-P8-NEXT: li 3, 1 |
| ; CHECK-P8-NEXT: b .LBB1_3 |
| ; CHECK-P8-NEXT: .LBB1_2: # %if.else |
| ; CHECK-P8-NEXT: li 3, 0 |
| ; CHECK-P8-NEXT: .LBB1_3: # %if.then |
| ; CHECK-P8-NEXT: addi 1, 1, 32 |
| ; CHECK-P8-NEXT: ld 0, 16(1) |
| ; CHECK-P8-NEXT: mtlr 0 |
| ; CHECK-P8-NEXT: blr |
| %cmp = fcmp ogt half %a, 0xH0000 |
| br i1 %cmp, label %if.then, label %if.else |
| if.then: |
| ret i32 1 |
| if.else: |
| ret i32 0 |
| } |
| |
| ; vector reduction + branch |
| define fastcc i16 @test_vector_reduce_br(half %arg) nounwind { |
| ; CHECK-P9-LABEL: test_vector_reduce_br: |
| ; CHECK-P9: # %bb.0: |
| ; CHECK-P9-NEXT: clrlwi 3, 3, 16 |
| ; CHECK-P9-NEXT: xxlxor 1, 1, 1 |
| ; CHECK-P9-NEXT: mtfprwz 0, 3 |
| ; CHECK-P9-NEXT: xscvhpdp 0, 0 |
| ; CHECK-P9-NEXT: fcmpu 0, 0, 1 |
| ; CHECK-P9-NEXT: bc 12, 0, .LBB2_3 |
| ; CHECK-P9-NEXT: # %bb.1: |
| ; CHECK-P9-NEXT: fcmpu 0, 0, 0 |
| ; CHECK-P9-NEXT: bc 12, 3, .LBB2_3 |
| ; CHECK-P9-NEXT: # %bb.2: # %taken |
| ; CHECK-P9-NEXT: li 3, 0 |
| ; CHECK-P9-NEXT: blr |
| ; CHECK-P9-NEXT: .LBB2_3: # %not_taken |
| ; CHECK-P9-NEXT: li 3, 1 |
| ; CHECK-P9-NEXT: blr |
| ; |
| ; CHECK-P8-LABEL: test_vector_reduce_br: |
| ; CHECK-P8: # %bb.0: |
| ; CHECK-P8-NEXT: mflr 0 |
| ; CHECK-P8-NEXT: stdu 1, -32(1) |
| ; CHECK-P8-NEXT: clrldi 3, 3, 48 |
| ; CHECK-P8-NEXT: std 0, 48(1) |
| ; CHECK-P8-NEXT: bl __extendhfsf2 |
| ; CHECK-P8-NEXT: nop |
| ; CHECK-P8-NEXT: xxlxor 0, 0, 0 |
| ; CHECK-P8-NEXT: fcmpu 0, 1, 0 |
| ; CHECK-P8-NEXT: bc 12, 0, .LBB2_3 |
| ; CHECK-P8-NEXT: # %bb.1: |
| ; CHECK-P8-NEXT: fcmpu 0, 1, 1 |
| ; CHECK-P8-NEXT: bc 12, 3, .LBB2_3 |
| ; CHECK-P8-NEXT: # %bb.2: # %taken |
| ; CHECK-P8-NEXT: li 3, 0 |
| ; CHECK-P8-NEXT: b .LBB2_4 |
| ; CHECK-P8-NEXT: .LBB2_3: # %not_taken |
| ; CHECK-P8-NEXT: li 3, 1 |
| ; CHECK-P8-NEXT: .LBB2_4: # %taken |
| ; CHECK-P8-NEXT: addi 1, 1, 32 |
| ; CHECK-P8-NEXT: ld 0, 16(1) |
| ; CHECK-P8-NEXT: mtlr 0 |
| ; CHECK-P8-NEXT: blr |
| %reduce = tail call half @llvm.vector.reduce.fmin.v4f16(<4 x half> zeroinitializer) |
| %cmp = fcmp ole half %reduce, %arg |
| br i1 %cmp, label %taken, label %not_taken |
| taken: |
| ret i16 0 |
| not_taken: |
| ret i16 1 |
| } |
| |
| declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>) |