| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc -mcpu=sm_100 < %s | FileCheck %s |
| ; RUN: %if ptxas-sm_100 %{ llc < %s -mcpu=sm_100 | %ptxas-verify -arch=sm_100 %} |
| |
| ; NOTE: Currently only scalar SETP predicate inversions are optimized by MachineCSE. |
| target triple = "nvptx64-nvidia-cuda" |
| |
| define i32 @test_float_f16x2_eq(i32 %arg) { |
| ; CHECK-LABEL: test_float_f16x2_eq( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<5>; |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_float_f16x2_eq_param_0]; |
| ; CHECK-NEXT: mov.b32 %r2, 0; |
| ; CHECK-NEXT: setp.eq.f16x2 %p2|%p3, %r1, %r2; |
| ; CHECK-NEXT: and.pred %p1, %p2, %p3; |
| ; CHECK-NEXT: @%p1 bra $L__BB0_2; |
| ; CHECK-NEXT: // %bb.1: // %then |
| ; CHECK-NEXT: mov.b32 %r2, 1; |
| ; CHECK-NEXT: $L__BB0_2: // %merge1 |
| ; CHECK-NEXT: not.pred %p4, %p1; |
| ; CHECK-NEXT: @%p4 bra $L__BB0_4; |
| ; CHECK-NEXT: // %bb.3: // %else |
| ; CHECK-NEXT: mov.b32 %r2, 0; |
| ; CHECK-NEXT: $L__BB0_4: // %merge2 |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| entry: |
| %a = bitcast i32 %arg to <2 x half> |
| %zero = bitcast i32 0 to <2 x half> |
| %cmp = fcmp oeq <2 x half> %a, %zero |
| %e0 = extractelement <2 x i1> %cmp, i32 0 |
| %e1 = extractelement <2 x i1> %cmp, i32 1 |
| %and = and i1 %e0, %e1 |
| br i1 %and, label %merge1, label %then |
| |
| then: |
| br label %merge1 |
| |
| merge1: |
| %phi1 = phi i32 [ 1, %then ], [ 0, %entry ] |
| br i1 %and, label %else, label %merge2 |
| |
| else: |
| br label %merge2 |
| |
| merge2: |
| %phi2 = phi i32 [ %phi1, %merge1 ], [ 0, %else ] |
| ret i32 %phi2 |
| } |
| |
| define i32 @test_float_bf16x2_eq(i32 %arg) { |
| ; CHECK-LABEL: test_float_bf16x2_eq( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .pred %p<5>; |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_float_bf16x2_eq_param_0]; |
| ; CHECK-NEXT: mov.b32 %r2, 0; |
| ; CHECK-NEXT: setp.eq.bf16x2 %p2|%p3, %r1, %r2; |
| ; CHECK-NEXT: and.pred %p1, %p2, %p3; |
| ; CHECK-NEXT: @%p1 bra $L__BB1_2; |
| ; CHECK-NEXT: // %bb.1: // %then |
| ; CHECK-NEXT: mov.b32 %r2, 1; |
| ; CHECK-NEXT: $L__BB1_2: // %merge1 |
| ; CHECK-NEXT: not.pred %p4, %p1; |
| ; CHECK-NEXT: @%p4 bra $L__BB1_4; |
| ; CHECK-NEXT: // %bb.3: // %else |
| ; CHECK-NEXT: mov.b32 %r2, 0; |
| ; CHECK-NEXT: $L__BB1_4: // %merge2 |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| entry: |
| %a = bitcast i32 %arg to <2 x bfloat> |
| %zero = bitcast i32 0 to <2 x bfloat> |
| %cmp = fcmp oeq <2 x bfloat> %a, %zero |
| %e0 = extractelement <2 x i1> %cmp, i32 0 |
| %e1 = extractelement <2 x i1> %cmp, i32 1 |
| %and = and i1 %e0, %e1 |
| br i1 %and, label %merge1, label %then |
| |
| then: |
| br label %merge1 |
| |
| merge1: |
| %phi1 = phi i32 [ 1, %then ], [ 0, %entry ] |
| br i1 %and, label %else, label %merge2 |
| |
| else: |
| br label %merge2 |
| |
| merge2: |
| %phi2 = phi i32 [ %phi1, %merge1 ], [ 0, %else ] |
| ret i32 %phi2 |
| } |