| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -verify-machineinstrs -nvptx-prec-divf32=0 | FileCheck %s --check-prefix=APPROX |
| ; RUN: llc < %s -verify-machineinstrs -nvptx-prec-divf32=1 | FileCheck %s --check-prefix=FULL |
| ; RUN: llc < %s -verify-machineinstrs -nvptx-prec-divf32=2 | FileCheck %s --check-prefixes=IEEE,FTZ |
| ; RUN: llc < %s -verify-machineinstrs -nvptx-prec-divf32=3 | FileCheck %s --check-prefixes=IEEE,NOFTZ |
| |
| target triple = "nvptx64-nvidia-cuda" |
| |
| define float @div_ftz(float %a, float %b) "denormal-fp-math-f32" = "preserve-sign" { |
| ; APPROX-LABEL: div_ftz( |
| ; APPROX: { |
| ; APPROX-NEXT: .reg .b32 %r<4>; |
| ; APPROX-EMPTY: |
| ; APPROX-NEXT: // %bb.0: |
| ; APPROX-NEXT: ld.param.b32 %r1, [div_ftz_param_0]; |
| ; APPROX-NEXT: ld.param.b32 %r2, [div_ftz_param_1]; |
| ; APPROX-NEXT: div.approx.ftz.f32 %r3, %r1, %r2; |
| ; APPROX-NEXT: st.param.b32 [func_retval0], %r3; |
| ; APPROX-NEXT: ret; |
| ; |
| ; FULL-LABEL: div_ftz( |
| ; FULL: { |
| ; FULL-NEXT: .reg .b32 %r<4>; |
| ; FULL-EMPTY: |
| ; FULL-NEXT: // %bb.0: |
| ; FULL-NEXT: ld.param.b32 %r1, [div_ftz_param_0]; |
| ; FULL-NEXT: ld.param.b32 %r2, [div_ftz_param_1]; |
| ; FULL-NEXT: div.full.ftz.f32 %r3, %r1, %r2; |
| ; FULL-NEXT: st.param.b32 [func_retval0], %r3; |
| ; FULL-NEXT: ret; |
| ; |
| ; FTZ-LABEL: div_ftz( |
| ; FTZ: { |
| ; FTZ-NEXT: .reg .b32 %r<4>; |
| ; FTZ-EMPTY: |
| ; FTZ-NEXT: // %bb.0: |
| ; FTZ-NEXT: ld.param.b32 %r1, [div_ftz_param_0]; |
| ; FTZ-NEXT: ld.param.b32 %r2, [div_ftz_param_1]; |
| ; FTZ-NEXT: div.rn.ftz.f32 %r3, %r1, %r2; |
| ; FTZ-NEXT: st.param.b32 [func_retval0], %r3; |
| ; FTZ-NEXT: ret; |
| ; |
| ; NOFTZ-LABEL: div_ftz( |
| ; NOFTZ: { |
| ; NOFTZ-NEXT: .reg .b32 %r<4>; |
| ; NOFTZ-EMPTY: |
| ; NOFTZ-NEXT: // %bb.0: |
| ; NOFTZ-NEXT: ld.param.b32 %r1, [div_ftz_param_0]; |
| ; NOFTZ-NEXT: ld.param.b32 %r2, [div_ftz_param_1]; |
| ; NOFTZ-NEXT: div.rn.f32 %r3, %r1, %r2; |
| ; NOFTZ-NEXT: st.param.b32 [func_retval0], %r3; |
| ; NOFTZ-NEXT: ret; |
| %val = fdiv float %a, %b |
| ret float %val |
| } |
| |
| |
| define float @div(float %a, float %b) { |
| ; APPROX-LABEL: div( |
| ; APPROX: { |
| ; APPROX-NEXT: .reg .b32 %r<4>; |
| ; APPROX-EMPTY: |
| ; APPROX-NEXT: // %bb.0: |
| ; APPROX-NEXT: ld.param.b32 %r1, [div_param_0]; |
| ; APPROX-NEXT: ld.param.b32 %r2, [div_param_1]; |
| ; APPROX-NEXT: div.approx.f32 %r3, %r1, %r2; |
| ; APPROX-NEXT: st.param.b32 [func_retval0], %r3; |
| ; APPROX-NEXT: ret; |
| ; |
| ; FULL-LABEL: div( |
| ; FULL: { |
| ; FULL-NEXT: .reg .b32 %r<4>; |
| ; FULL-EMPTY: |
| ; FULL-NEXT: // %bb.0: |
| ; FULL-NEXT: ld.param.b32 %r1, [div_param_0]; |
| ; FULL-NEXT: ld.param.b32 %r2, [div_param_1]; |
| ; FULL-NEXT: div.full.f32 %r3, %r1, %r2; |
| ; FULL-NEXT: st.param.b32 [func_retval0], %r3; |
| ; FULL-NEXT: ret; |
| ; |
| ; IEEE-LABEL: div( |
| ; IEEE: { |
| ; IEEE-NEXT: .reg .b32 %r<4>; |
| ; IEEE-EMPTY: |
| ; IEEE-NEXT: // %bb.0: |
| ; IEEE-NEXT: ld.param.b32 %r1, [div_param_0]; |
| ; IEEE-NEXT: ld.param.b32 %r2, [div_param_1]; |
| ; IEEE-NEXT: div.rn.f32 %r3, %r1, %r2; |
| ; IEEE-NEXT: st.param.b32 [func_retval0], %r3; |
| ; IEEE-NEXT: ret; |
| %val = fdiv float %a, %b |
| ret float %val |
| } |