blob: 32099625143f1989caa4027791b379b2dfe6cd47 [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mcpu=sm_100 < %s | FileCheck %s
; RUN: %if ptxas-sm_100 %{ llc < %s -mcpu=sm_100 | %ptxas-verify -arch=sm_100 %}
; NOTE: Currently only scalar SETP predicate inversions are optimized by MachineCSE.
target triple = "nvptx64-nvidia-cuda"
define i32 @test_float_f16x2_eq(i32 %arg) {
; CHECK-LABEL: test_float_f16x2_eq(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<5>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: ld.param.b32 %r1, [test_float_f16x2_eq_param_0];
; CHECK-NEXT: mov.b32 %r2, 0;
; CHECK-NEXT: setp.eq.f16x2 %p2|%p3, %r1, %r2;
; CHECK-NEXT: and.pred %p1, %p2, %p3;
; CHECK-NEXT: @%p1 bra $L__BB0_2;
; CHECK-NEXT: // %bb.1: // %then
; CHECK-NEXT: mov.b32 %r2, 1;
; CHECK-NEXT: $L__BB0_2: // %merge1
; CHECK-NEXT: not.pred %p4, %p1;
; CHECK-NEXT: @%p4 bra $L__BB0_4;
; CHECK-NEXT: // %bb.3: // %else
; CHECK-NEXT: mov.b32 %r2, 0;
; CHECK-NEXT: $L__BB0_4: // %merge2
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
entry:
%a = bitcast i32 %arg to <2 x half>
%zero = bitcast i32 0 to <2 x half>
%cmp = fcmp oeq <2 x half> %a, %zero
%e0 = extractelement <2 x i1> %cmp, i32 0
%e1 = extractelement <2 x i1> %cmp, i32 1
%and = and i1 %e0, %e1
br i1 %and, label %merge1, label %then
then:
br label %merge1
merge1:
%phi1 = phi i32 [ 1, %then ], [ 0, %entry ]
br i1 %and, label %else, label %merge2
else:
br label %merge2
merge2:
%phi2 = phi i32 [ %phi1, %merge1 ], [ 0, %else ]
ret i32 %phi2
}
define i32 @test_float_bf16x2_eq(i32 %arg) {
; CHECK-LABEL: test_float_bf16x2_eq(
; CHECK: {
; CHECK-NEXT: .reg .pred %p<5>;
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: ld.param.b32 %r1, [test_float_bf16x2_eq_param_0];
; CHECK-NEXT: mov.b32 %r2, 0;
; CHECK-NEXT: setp.eq.bf16x2 %p2|%p3, %r1, %r2;
; CHECK-NEXT: and.pred %p1, %p2, %p3;
; CHECK-NEXT: @%p1 bra $L__BB1_2;
; CHECK-NEXT: // %bb.1: // %then
; CHECK-NEXT: mov.b32 %r2, 1;
; CHECK-NEXT: $L__BB1_2: // %merge1
; CHECK-NEXT: not.pred %p4, %p1;
; CHECK-NEXT: @%p4 bra $L__BB1_4;
; CHECK-NEXT: // %bb.3: // %else
; CHECK-NEXT: mov.b32 %r2, 0;
; CHECK-NEXT: $L__BB1_4: // %merge2
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
entry:
%a = bitcast i32 %arg to <2 x bfloat>
%zero = bitcast i32 0 to <2 x bfloat>
%cmp = fcmp oeq <2 x bfloat> %a, %zero
%e0 = extractelement <2 x i1> %cmp, i32 0
%e1 = extractelement <2 x i1> %cmp, i32 1
%and = and i1 %e0, %e1
br i1 %and, label %merge1, label %then
then:
br label %merge1
merge1:
%phi1 = phi i32 [ 1, %then ], [ 0, %entry ]
br i1 %and, label %else, label %merge2
else:
br label %merge2
merge2:
%phi2 = phi i32 [ %phi1, %merge1 ], [ 0, %else ]
ret i32 %phi2
}