| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s |
| ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %} |
| |
| define float @div_full(float %a, float %b) { |
| ; CHECK-LABEL: div_full( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<9>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [div_full_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [div_full_param_1]; |
| ; CHECK-NEXT: div.full.f32 %r3, %r1, %r2; |
| ; CHECK-NEXT: mov.b32 %r4, 0f40400000; |
| ; CHECK-NEXT: div.full.f32 %r5, %r3, %r4; |
| ; CHECK-NEXT: div.full.ftz.f32 %r6, %r5, %r2; |
| ; CHECK-NEXT: mov.b32 %r7, 0f40800000; |
| ; CHECK-NEXT: div.full.ftz.f32 %r8, %r6, %r7; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r8; |
| ; CHECK-NEXT: ret; |
| %1 = call float @llvm.nvvm.div.full(float %a, float %b) |
| %2 = call float @llvm.nvvm.div.full(float %1, float 3.0) |
| %3 = call float @llvm.nvvm.div.full.ftz(float %2, float %b) |
| %4 = call float @llvm.nvvm.div.full.ftz(float %3, float 4.0) |
| ret float %4 |
| } |
| |
| define float @div_fast_rr(float %a, float %b) { |
| ; CHECK-LABEL: div_fast_rr( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [div_fast_rr_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [div_fast_rr_param_1]; |
| ; CHECK-NEXT: div.approx.f32 %r3, %r1, %r2; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-NEXT: ret; |
| %t1 = fdiv afn float %a, %b |
| ret float %t1 |
| } |
| |
| define float @div_fast_rr_ftz(float %a, float %b) #0 { |
| ; CHECK-LABEL: div_fast_rr_ftz( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [div_fast_rr_ftz_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [div_fast_rr_ftz_param_1]; |
| ; CHECK-NEXT: div.approx.ftz.f32 %r3, %r1, %r2; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-NEXT: ret; |
| %t1 = fdiv afn float %a, %b |
| ret float %t1 |
| } |
| |
| define float @div_fast_ri(float %a, float %b) { |
| ; CHECK-LABEL: div_fast_ri( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [div_fast_ri_param_0]; |
| ; CHECK-NEXT: mul.rn.f32 %r2, %r1, 0f3F000000; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %t1 = fdiv afn float %a, 2.0 |
| ret float %t1 |
| } |
| |
| define float @div_fast_ri_ftz(float %a, float %b) #0 { |
| ; CHECK-LABEL: div_fast_ri_ftz( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [div_fast_ri_ftz_param_0]; |
| ; CHECK-NEXT: mul.rn.ftz.f32 %r2, %r1, 0f3F000000; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %t1 = fdiv afn float %a, 2.0 |
| ret float %t1 |
| } |
| |
| define float @rcp_fast(float %a) { |
| ; CHECK-LABEL: rcp_fast( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [rcp_fast_param_0]; |
| ; CHECK-NEXT: rcp.approx.f32 %r2, %r1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %t1 = fdiv afn float 1.0, %a |
| ret float %t1 |
| } |
| |
| define float @rcp_fast_ftz(float %a) #0 { |
| ; CHECK-LABEL: rcp_fast_ftz( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<3>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [rcp_fast_ftz_param_0]; |
| ; CHECK-NEXT: rcp.approx.ftz.f32 %r2, %r1; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-NEXT: ret; |
| %t1 = fdiv afn float 1.0, %a |
| ret float %t1 |
| } |
| |
| define float @div_fast_vec(float %a, float %b, float %c, float %d) { |
| ; CHECK-LABEL: div_fast_vec( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [div_fast_vec_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [div_fast_vec_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [div_fast_vec_param_2]; |
| ; CHECK-NEXT: ld.param.b32 %r4, [div_fast_vec_param_3]; |
| ; CHECK-NEXT: div.approx.f32 %r5, %r2, %r4; |
| ; CHECK-NEXT: div.approx.f32 %r6, %r1, %r3; |
| ; CHECK-NEXT: add.rn.f32 %r7, %r6, %r5; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r7; |
| ; CHECK-NEXT: ret; |
| %ins_a0 = insertelement <2 x float> poison, float %a, i32 0 |
| %ins_a1 = insertelement <2 x float> %ins_a0, float %b, i32 1 |
| %ins_b0 = insertelement <2 x float> poison, float %c, i32 0 |
| %ins_b1 = insertelement <2 x float> %ins_b0, float %d, i32 1 |
| %fdiv = fdiv fast <2 x float> %ins_a1, %ins_b1 |
| %ext0 = extractelement <2 x float> %fdiv, i32 0 |
| %ext1 = extractelement <2 x float> %fdiv, i32 1 |
| %fadd = fadd float %ext0, %ext1 |
| ret float %fadd |
| } |
| |
| define float @div_fast_vec_ftz(float %a, float %b, float %c, float %d) #0 { |
| ; CHECK-LABEL: div_fast_vec_ftz( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<8>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [div_fast_vec_ftz_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [div_fast_vec_ftz_param_1]; |
| ; CHECK-NEXT: ld.param.b32 %r3, [div_fast_vec_ftz_param_2]; |
| ; CHECK-NEXT: ld.param.b32 %r4, [div_fast_vec_ftz_param_3]; |
| ; CHECK-NEXT: div.approx.ftz.f32 %r5, %r2, %r4; |
| ; CHECK-NEXT: div.approx.ftz.f32 %r6, %r1, %r3; |
| ; CHECK-NEXT: add.rn.ftz.f32 %r7, %r6, %r5; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r7; |
| ; CHECK-NEXT: ret; |
| %ins_a0 = insertelement <2 x float> poison, float %a, i32 0 |
| %ins_a1 = insertelement <2 x float> %ins_a0, float %b, i32 1 |
| %ins_b0 = insertelement <2 x float> poison, float %c, i32 0 |
| %ins_b1 = insertelement <2 x float> %ins_b0, float %d, i32 1 |
| %fdiv = fdiv fast <2 x float> %ins_a1, %ins_b1 |
| %ext0 = extractelement <2 x float> %fdiv, i32 0 |
| %ext1 = extractelement <2 x float> %fdiv, i32 1 |
| %fadd = fadd float %ext0, %ext1 |
| ret float %fadd |
| } |
| |
| attributes #0 = { "denormal-fp-math-f32" = "preserve-sign" } |