blob: 2726e287f5df7eb0f4e0d6f101a70feb75b42b77 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr9 < %s | FileCheck %s --check-prefix=CHECK-P9
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr8 < %s | FileCheck %s --check-prefix=CHECK-P8
; Basic comparison with branch
define i32 @test_br_cc_olt(half %a, half %b) nounwind {
; CHECK-P9-LABEL: test_br_cc_olt:
; CHECK-P9: # %bb.0:
; CHECK-P9-NEXT: clrlwi 3, 3, 16
; CHECK-P9-NEXT: clrlwi 4, 4, 16
; CHECK-P9-NEXT: mtfprwz 0, 4
; CHECK-P9-NEXT: mtfprwz 1, 3
; CHECK-P9-NEXT: xscvhpdp 0, 0
; CHECK-P9-NEXT: xscvhpdp 1, 1
; CHECK-P9-NEXT: fcmpu 0, 1, 0
; CHECK-P9-NEXT: bge 0, .LBB0_2
; CHECK-P9-NEXT: # %bb.1: # %if.then
; CHECK-P9-NEXT: li 3, 1
; CHECK-P9-NEXT: blr
; CHECK-P9-NEXT: .LBB0_2: # %if.else
; CHECK-P9-NEXT: li 3, 0
; CHECK-P9-NEXT: blr
;
; CHECK-P8-LABEL: test_br_cc_olt:
; CHECK-P8: # %bb.0:
; CHECK-P8-NEXT: mflr 0
; CHECK-P8-NEXT: std 30, -24(1) # 8-byte Folded Spill
; CHECK-P8-NEXT: stfd 31, -8(1) # 8-byte Folded Spill
; CHECK-P8-NEXT: stdu 1, -64(1)
; CHECK-P8-NEXT: mr 30, 3
; CHECK-P8-NEXT: clrldi 3, 4, 48
; CHECK-P8-NEXT: std 0, 80(1)
; CHECK-P8-NEXT: bl __extendhfsf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: clrldi 3, 30, 48
; CHECK-P8-NEXT: fmr 31, 1
; CHECK-P8-NEXT: bl __extendhfsf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: fcmpu 0, 1, 31
; CHECK-P8-NEXT: bge 0, .LBB0_2
; CHECK-P8-NEXT: # %bb.1: # %if.then
; CHECK-P8-NEXT: li 3, 1
; CHECK-P8-NEXT: b .LBB0_3
; CHECK-P8-NEXT: .LBB0_2: # %if.else
; CHECK-P8-NEXT: li 3, 0
; CHECK-P8-NEXT: .LBB0_3: # %if.then
; CHECK-P8-NEXT: addi 1, 1, 64
; CHECK-P8-NEXT: ld 0, 16(1)
; CHECK-P8-NEXT: lfd 31, -8(1) # 8-byte Folded Reload
; CHECK-P8-NEXT: ld 30, -24(1) # 8-byte Folded Reload
; CHECK-P8-NEXT: mtlr 0
; CHECK-P8-NEXT: blr
%cmp = fcmp olt half %a, %b
br i1 %cmp, label %if.then, label %if.else
if.then:
ret i32 1
if.else:
ret i32 0
}
; Test with constant
define i32 @test_br_cc_constant(half %a) nounwind {
; CHECK-P9-LABEL: test_br_cc_constant:
; CHECK-P9: # %bb.0:
; CHECK-P9-NEXT: clrlwi 3, 3, 16
; CHECK-P9-NEXT: xxlxor 1, 1, 1
; CHECK-P9-NEXT: mtfprwz 0, 3
; CHECK-P9-NEXT: xscvhpdp 0, 0
; CHECK-P9-NEXT: fcmpu 0, 0, 1
; CHECK-P9-NEXT: ble 0, .LBB1_2
; CHECK-P9-NEXT: # %bb.1: # %if.then
; CHECK-P9-NEXT: li 3, 1
; CHECK-P9-NEXT: blr
; CHECK-P9-NEXT: .LBB1_2: # %if.else
; CHECK-P9-NEXT: li 3, 0
; CHECK-P9-NEXT: blr
;
; CHECK-P8-LABEL: test_br_cc_constant:
; CHECK-P8: # %bb.0:
; CHECK-P8-NEXT: mflr 0
; CHECK-P8-NEXT: stdu 1, -32(1)
; CHECK-P8-NEXT: clrldi 3, 3, 48
; CHECK-P8-NEXT: std 0, 48(1)
; CHECK-P8-NEXT: bl __extendhfsf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: xxlxor 0, 0, 0
; CHECK-P8-NEXT: fcmpu 0, 1, 0
; CHECK-P8-NEXT: ble 0, .LBB1_2
; CHECK-P8-NEXT: # %bb.1: # %if.then
; CHECK-P8-NEXT: li 3, 1
; CHECK-P8-NEXT: b .LBB1_3
; CHECK-P8-NEXT: .LBB1_2: # %if.else
; CHECK-P8-NEXT: li 3, 0
; CHECK-P8-NEXT: .LBB1_3: # %if.then
; CHECK-P8-NEXT: addi 1, 1, 32
; CHECK-P8-NEXT: ld 0, 16(1)
; CHECK-P8-NEXT: mtlr 0
; CHECK-P8-NEXT: blr
%cmp = fcmp ogt half %a, 0xH0000
br i1 %cmp, label %if.then, label %if.else
if.then:
ret i32 1
if.else:
ret i32 0
}
; vector reduction + branch
define fastcc i16 @test_vector_reduce_br(half %arg) nounwind {
; CHECK-P9-LABEL: test_vector_reduce_br:
; CHECK-P9: # %bb.0:
; CHECK-P9-NEXT: clrlwi 3, 3, 16
; CHECK-P9-NEXT: xxlxor 1, 1, 1
; CHECK-P9-NEXT: mtfprwz 0, 3
; CHECK-P9-NEXT: xscvhpdp 0, 0
; CHECK-P9-NEXT: fcmpu 0, 0, 1
; CHECK-P9-NEXT: bc 12, 0, .LBB2_3
; CHECK-P9-NEXT: # %bb.1:
; CHECK-P9-NEXT: fcmpu 0, 0, 0
; CHECK-P9-NEXT: bc 12, 3, .LBB2_3
; CHECK-P9-NEXT: # %bb.2: # %taken
; CHECK-P9-NEXT: li 3, 0
; CHECK-P9-NEXT: blr
; CHECK-P9-NEXT: .LBB2_3: # %not_taken
; CHECK-P9-NEXT: li 3, 1
; CHECK-P9-NEXT: blr
;
; CHECK-P8-LABEL: test_vector_reduce_br:
; CHECK-P8: # %bb.0:
; CHECK-P8-NEXT: mflr 0
; CHECK-P8-NEXT: stdu 1, -32(1)
; CHECK-P8-NEXT: clrldi 3, 3, 48
; CHECK-P8-NEXT: std 0, 48(1)
; CHECK-P8-NEXT: bl __extendhfsf2
; CHECK-P8-NEXT: nop
; CHECK-P8-NEXT: xxlxor 0, 0, 0
; CHECK-P8-NEXT: fcmpu 0, 1, 0
; CHECK-P8-NEXT: bc 12, 0, .LBB2_3
; CHECK-P8-NEXT: # %bb.1:
; CHECK-P8-NEXT: fcmpu 0, 1, 1
; CHECK-P8-NEXT: bc 12, 3, .LBB2_3
; CHECK-P8-NEXT: # %bb.2: # %taken
; CHECK-P8-NEXT: li 3, 0
; CHECK-P8-NEXT: b .LBB2_4
; CHECK-P8-NEXT: .LBB2_3: # %not_taken
; CHECK-P8-NEXT: li 3, 1
; CHECK-P8-NEXT: .LBB2_4: # %taken
; CHECK-P8-NEXT: addi 1, 1, 32
; CHECK-P8-NEXT: ld 0, 16(1)
; CHECK-P8-NEXT: mtlr 0
; CHECK-P8-NEXT: blr
%reduce = tail call half @llvm.vector.reduce.fmin.v4f16(<4 x half> zeroinitializer)
%cmp = fcmp ole half %reduce, %arg
br i1 %cmp, label %taken, label %not_taken
taken:
ret i16 0
not_taken:
ret i16 1
}
declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>)