| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc -march=nvptx64 -mcpu=sm_100f -o - %s | FileCheck %s |
| |
| target triple = "nvptx64-nvidia-cuda" |
| target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" |
| |
| define void @test_fmaxnum3(float %a, float %b, float %c, ptr addrspace(1) %output) { |
| ; CHECK-LABEL: test_fmaxnum3( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_fmaxnum3_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [test_fmaxnum3_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [test_fmaxnum3_param_2]; |
| ; CHECK-NEXT: max.f32 %r4, %r1, %r2, %r3; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_fmaxnum3_param_3]; |
| ; CHECK-NEXT: st.global.b32 [%rd1], %r4; |
| ; CHECK-NEXT: ret; |
| entry: |
| %max_ab = call float @llvm.maxnum.f32(float %a, float %b) |
| %max_abc = call float @llvm.maxnum.f32(float %max_ab, float %c) |
| store float %max_abc, ptr addrspace(1) %output, align 4 |
| ret void |
| } |
| |
| define void @test_fminnum3(float %a, float %b, float %c, ptr addrspace(1) %output) { |
| ; CHECK-LABEL: test_fminnum3( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_fminnum3_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [test_fminnum3_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [test_fminnum3_param_2]; |
| ; CHECK-NEXT: min.f32 %r4, %r1, %r2, %r3; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_fminnum3_param_3]; |
| ; CHECK-NEXT: st.global.b32 [%rd1], %r4; |
| ; CHECK-NEXT: ret; |
| entry: |
| %min_ab = call float @llvm.minnum.f32(float %a, float %b) |
| %min_abc = call float @llvm.minnum.f32(float %min_ab, float %c) |
| store float %min_abc, ptr addrspace(1) %output, align 4 |
| ret void |
| } |
| |
| define void @test_fmaximum3(float %a, float %b, float %c, ptr addrspace(1) %output) { |
| ; CHECK-LABEL: test_fmaximum3( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_fmaximum3_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [test_fmaximum3_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [test_fmaximum3_param_2]; |
| ; CHECK-NEXT: max.NaN.f32 %r4, %r1, %r2, %r3; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_fmaximum3_param_3]; |
| ; CHECK-NEXT: st.global.b32 [%rd1], %r4; |
| ; CHECK-NEXT: ret; |
| entry: |
| %max_ab = call float @llvm.maximum.f32(float %a, float %b) |
| %max_abc = call float @llvm.maximum.f32(float %max_ab, float %c) |
| store float %max_abc, ptr addrspace(1) %output, align 4 |
| ret void |
| } |
| |
| define void @test_fminimum3(float %a, float %b, float %c, ptr addrspace(1) %output) { |
| ; CHECK-LABEL: test_fminimum3( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_fminimum3_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [test_fminimum3_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [test_fminimum3_param_2]; |
| ; CHECK-NEXT: min.NaN.f32 %r4, %r1, %r2, %r3; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_fminimum3_param_3]; |
| ; CHECK-NEXT: st.global.b32 [%rd1], %r4; |
| ; CHECK-NEXT: ret; |
| entry: |
| %min_ab = call float @llvm.minimum.f32(float %a, float %b) |
| %min_abc = call float @llvm.minimum.f32(float %min_ab, float %c) |
| store float %min_abc, ptr addrspace(1) %output, align 4 |
| ret void |
| } |
| |
| define void @test_fmaximumnum3(float %a, float %b, float %c, ptr addrspace(1) %output) { |
| ; CHECK-LABEL: test_fmaximumnum3( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_fmaximumnum3_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [test_fmaximumnum3_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [test_fmaximumnum3_param_2]; |
| ; CHECK-NEXT: max.f32 %r4, %r1, %r2, %r3; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_fmaximumnum3_param_3]; |
| ; CHECK-NEXT: st.global.b32 [%rd1], %r4; |
| ; CHECK-NEXT: ret; |
| entry: |
| %max_ab = call float @llvm.maximumnum.f32(float %a, float %b) |
| %max_abc = call float @llvm.maximumnum.f32(float %max_ab, float %c) |
| store float %max_abc, ptr addrspace(1) %output, align 4 |
| ret void |
| } |
| |
| define void @test_fminimumnum3(float %a, float %b, float %c, ptr addrspace(1) %output) { |
| ; CHECK-LABEL: test_fminimumnum3( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_fminimumnum3_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [test_fminimumnum3_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [test_fminimumnum3_param_2]; |
| ; CHECK-NEXT: min.f32 %r4, %r1, %r2, %r3; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_fminimumnum3_param_3]; |
| ; CHECK-NEXT: st.global.b32 [%rd1], %r4; |
| ; CHECK-NEXT: ret; |
| entry: |
| %min_ab = call float @llvm.minimumnum.f32(float %a, float %b) |
| %min_abc = call float @llvm.minimumnum.f32(float %min_ab, float %c) |
| store float %min_abc, ptr addrspace(1) %output, align 4 |
| ret void |
| } |
| |
| ; Test commuted operands (second operand is the nested operation) |
| define void @test_fmaxnum3_commuted(float %a, float %b, float %c, ptr addrspace(1) %output) { |
| ; CHECK-LABEL: test_fmaxnum3_commuted( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<5>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_fmaxnum3_commuted_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [test_fmaxnum3_commuted_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [test_fmaxnum3_commuted_param_2]; |
| ; CHECK-NEXT: max.f32 %r4, %r1, %r2, %r3; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_fmaxnum3_commuted_param_3]; |
| ; CHECK-NEXT: st.global.b32 [%rd1], %r4; |
| ; CHECK-NEXT: ret; |
| entry: |
| %max_bc = call float @llvm.maxnum.f32(float %b, float %c) |
| %max_abc = call float @llvm.maxnum.f32(float %a, float %max_bc) |
| store float %max_abc, ptr addrspace(1) %output, align 4 |
| ret void |
| } |
| |
| ; NEGATIVE TEST: Mixed min/max operations should not combine |
| define void @test_mixed_minmax_no_combine(float %a, float %b, float %c, ptr addrspace(1) %output) { |
| ; CHECK-LABEL: test_mixed_minmax_no_combine( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<6>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_mixed_minmax_no_combine_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [test_mixed_minmax_no_combine_param_1]; |
| ; CHECK-NEXT: min.f32 %r3, %r1, %r2; |
| ; CHECK-NEXT: ld.param.b32 %r4, [test_mixed_minmax_no_combine_param_2]; |
| ; CHECK-NEXT: max.f32 %r5, %r3, %r4; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_mixed_minmax_no_combine_param_3]; |
| ; CHECK-NEXT: st.global.b32 [%rd1], %r5; |
| ; CHECK-NEXT: ret; |
| entry: |
| %min_ab = call float @llvm.minnum.f32(float %a, float %b) |
| %max_result = call float @llvm.maxnum.f32(float %min_ab, float %c) |
| store float %max_result, ptr addrspace(1) %output, align 4 |
| ret void |
| } |
| |
| ; NEGATIVE TEST: Mixed maxnum/maximum operations should not combine |
| define void @test_mixed_maxnum_maximum_no_combine(float %a, float %b, float %c, ptr addrspace(1) %output) { |
| ; CHECK-LABEL: test_mixed_maxnum_maximum_no_combine( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<6>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_mixed_maxnum_maximum_no_combine_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [test_mixed_maxnum_maximum_no_combine_param_1]; |
| ; CHECK-NEXT: max.f32 %r3, %r1, %r2; |
| ; CHECK-NEXT: ld.param.b32 %r4, [test_mixed_maxnum_maximum_no_combine_param_2]; |
| ; CHECK-NEXT: max.NaN.f32 %r5, %r3, %r4; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_mixed_maxnum_maximum_no_combine_param_3]; |
| ; CHECK-NEXT: st.global.b32 [%rd1], %r5; |
| ; CHECK-NEXT: ret; |
| entry: |
| %maxnum_ab = call float @llvm.maxnum.f32(float %a, float %b) |
| %maximum_result = call float @llvm.maximum.f32(float %maxnum_ab, float %c) |
| store float %maximum_result, ptr addrspace(1) %output, align 4 |
| ret void |
| } |
| |
| ; NEGATIVE TEST: f16 should not be combined (only f32 supported) |
| define void @test_f16_no_combine(half %a, half %b, half %c, ptr addrspace(1) %output) { |
| ; CHECK-LABEL: test_f16_no_combine( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<6>; |
| ; CHECK-NEXT: .reg .b64 %rd<2>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.b16 %rs1, [test_f16_no_combine_param_0]; |
| ; CHECK-NEXT: ld.param.b16 %rs2, [test_f16_no_combine_param_1]; |
| ; CHECK-NEXT: max.f16 %rs3, %rs1, %rs2; |
| ; CHECK-NEXT: ld.param.b16 %rs4, [test_f16_no_combine_param_2]; |
| ; CHECK-NEXT: max.f16 %rs5, %rs3, %rs4; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_f16_no_combine_param_3]; |
| ; CHECK-NEXT: st.global.b16 [%rd1], %rs5; |
| ; CHECK-NEXT: ret; |
| entry: |
| %max_ab = call half @llvm.maxnum.f16(half %a, half %b) |
| %max_abc = call half @llvm.maxnum.f16(half %max_ab, half %c) |
| store half %max_abc, ptr addrspace(1) %output, align 2 |
| ret void |
| } |
| |
| ; NEGATIVE TEST: Multiple uses of intermediate result should not combine |
| define void @test_multiple_uses_no_combine(float %a, float %b, float %c, ptr addrspace(1) %output1, ptr addrspace(1) %output2) { |
| ; CHECK-LABEL: test_multiple_uses_no_combine( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<6>; |
| ; CHECK-NEXT: .reg .b64 %rd<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: // %entry |
| ; CHECK-NEXT: ld.param.b32 %r1, [test_multiple_uses_no_combine_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [test_multiple_uses_no_combine_param_1]; |
| ; CHECK-NEXT: max.f32 %r3, %r1, %r2; |
| ; CHECK-NEXT: ld.param.b32 %r4, [test_multiple_uses_no_combine_param_2]; |
| ; CHECK-NEXT: max.f32 %r5, %r3, %r4; |
| ; CHECK-NEXT: ld.param.b64 %rd1, [test_multiple_uses_no_combine_param_3]; |
| ; CHECK-NEXT: st.global.b32 [%rd1], %r3; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [test_multiple_uses_no_combine_param_4]; |
| ; CHECK-NEXT: st.global.b32 [%rd2], %r5; |
| ; CHECK-NEXT: ret; |
| entry: |
| %max_ab = call float @llvm.maxnum.f32(float %a, float %b) |
| %max_abc = call float @llvm.maxnum.f32(float %max_ab, float %c) |
| ; Multiple uses of %max_ab should prevent combining |
| store float %max_ab, ptr addrspace(1) %output1, align 4 |
| store float %max_abc, ptr addrspace(1) %output2, align 4 |
| ret void |
| } |
| |
| ; Declare all the intrinsics we need |
| declare float @llvm.maxnum.f32(float, float) #0 |
| declare float @llvm.minnum.f32(float, float) #0 |
| declare float @llvm.maximum.f32(float, float) #0 |
| declare float @llvm.minimum.f32(float, float) #0 |
| declare float @llvm.maximumnum.f32(float, float) #0 |
| declare float @llvm.minimumnum.f32(float, float) #0 |
| declare half @llvm.maxnum.f16(half, half) #0 |
| |
| attributes #0 = { nounwind readnone speculatable willreturn } |