| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s --enable-unsafe-fp-math -mcpu=sm_60 | FileCheck %s --check-prefixes=FAST |
| ; RUN: llc < %s -mcpu=sm_60 | FileCheck %s --check-prefixes=NORMAL |
| |
| |
| target triple = "nvptx64-unknown-cuda" |
| |
| define half @frem_f16(half %a, half %b) { |
| ; FAST-LABEL: frem_f16( |
| ; FAST: { |
| ; FAST-NEXT: .reg .b16 %rs<4>; |
| ; FAST-NEXT: .reg .b32 %r<7>; |
| ; FAST-EMPTY: |
| ; FAST-NEXT: // %bb.0: |
| ; FAST-NEXT: ld.param.b16 %rs1, [frem_f16_param_0]; |
| ; FAST-NEXT: ld.param.b16 %rs2, [frem_f16_param_1]; |
| ; FAST-NEXT: cvt.f32.f16 %r1, %rs2; |
| ; FAST-NEXT: cvt.f32.f16 %r2, %rs1; |
| ; FAST-NEXT: div.approx.f32 %r3, %r2, %r1; |
| ; FAST-NEXT: cvt.rzi.f32.f32 %r4, %r3; |
| ; FAST-NEXT: neg.f32 %r5, %r4; |
| ; FAST-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2; |
| ; FAST-NEXT: cvt.rn.f16.f32 %rs3, %r6; |
| ; FAST-NEXT: st.param.b16 [func_retval0], %rs3; |
| ; FAST-NEXT: ret; |
| ; |
| ; NORMAL-LABEL: frem_f16( |
| ; NORMAL: { |
| ; NORMAL-NEXT: .reg .pred %p<2>; |
| ; NORMAL-NEXT: .reg .b16 %rs<4>; |
| ; NORMAL-NEXT: .reg .b32 %r<8>; |
| ; NORMAL-EMPTY: |
| ; NORMAL-NEXT: // %bb.0: |
| ; NORMAL-NEXT: ld.param.b16 %rs1, [frem_f16_param_0]; |
| ; NORMAL-NEXT: ld.param.b16 %rs2, [frem_f16_param_1]; |
| ; NORMAL-NEXT: cvt.f32.f16 %r1, %rs2; |
| ; NORMAL-NEXT: cvt.f32.f16 %r2, %rs1; |
| ; NORMAL-NEXT: div.rn.f32 %r3, %r2, %r1; |
| ; NORMAL-NEXT: cvt.rzi.f32.f32 %r4, %r3; |
| ; NORMAL-NEXT: neg.f32 %r5, %r4; |
| ; NORMAL-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2; |
| ; NORMAL-NEXT: testp.infinite.f32 %p1, %r1; |
| ; NORMAL-NEXT: selp.f32 %r7, %r2, %r6, %p1; |
| ; NORMAL-NEXT: cvt.rn.f16.f32 %rs3, %r7; |
| ; NORMAL-NEXT: st.param.b16 [func_retval0], %rs3; |
| ; NORMAL-NEXT: ret; |
| %r = frem half %a, %b |
| ret half %r |
| } |
| |
| define float @frem_f32(float %a, float %b) { |
| ; FAST-LABEL: frem_f32( |
| ; FAST: { |
| ; FAST-NEXT: .reg .b32 %r<7>; |
| ; FAST-EMPTY: |
| ; FAST-NEXT: // %bb.0: |
| ; FAST-NEXT: ld.param.b32 %r1, [frem_f32_param_0]; |
| ; FAST-NEXT: ld.param.b32 %r2, [frem_f32_param_1]; |
| ; FAST-NEXT: div.approx.f32 %r3, %r1, %r2; |
| ; FAST-NEXT: cvt.rzi.f32.f32 %r4, %r3; |
| ; FAST-NEXT: neg.f32 %r5, %r4; |
| ; FAST-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1; |
| ; FAST-NEXT: st.param.b32 [func_retval0], %r6; |
| ; FAST-NEXT: ret; |
| ; |
| ; NORMAL-LABEL: frem_f32( |
| ; NORMAL: { |
| ; NORMAL-NEXT: .reg .pred %p<2>; |
| ; NORMAL-NEXT: .reg .b32 %r<8>; |
| ; NORMAL-EMPTY: |
| ; NORMAL-NEXT: // %bb.0: |
| ; NORMAL-NEXT: ld.param.b32 %r1, [frem_f32_param_0]; |
| ; NORMAL-NEXT: ld.param.b32 %r2, [frem_f32_param_1]; |
| ; NORMAL-NEXT: div.rn.f32 %r3, %r1, %r2; |
| ; NORMAL-NEXT: cvt.rzi.f32.f32 %r4, %r3; |
| ; NORMAL-NEXT: neg.f32 %r5, %r4; |
| ; NORMAL-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1; |
| ; NORMAL-NEXT: testp.infinite.f32 %p1, %r2; |
| ; NORMAL-NEXT: selp.f32 %r7, %r1, %r6, %p1; |
| ; NORMAL-NEXT: st.param.b32 [func_retval0], %r7; |
| ; NORMAL-NEXT: ret; |
| %r = frem float %a, %b |
| ret float %r |
| } |
| |
| define double @frem_f64(double %a, double %b) { |
| ; FAST-LABEL: frem_f64( |
| ; FAST: { |
| ; FAST-NEXT: .reg .b64 %rd<7>; |
| ; FAST-EMPTY: |
| ; FAST-NEXT: // %bb.0: |
| ; FAST-NEXT: ld.param.b64 %rd1, [frem_f64_param_0]; |
| ; FAST-NEXT: ld.param.b64 %rd2, [frem_f64_param_1]; |
| ; FAST-NEXT: div.rn.f64 %rd3, %rd1, %rd2; |
| ; FAST-NEXT: cvt.rzi.f64.f64 %rd4, %rd3; |
| ; FAST-NEXT: neg.f64 %rd5, %rd4; |
| ; FAST-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1; |
| ; FAST-NEXT: st.param.b64 [func_retval0], %rd6; |
| ; FAST-NEXT: ret; |
| ; |
| ; NORMAL-LABEL: frem_f64( |
| ; NORMAL: { |
| ; NORMAL-NEXT: .reg .pred %p<2>; |
| ; NORMAL-NEXT: .reg .b64 %rd<8>; |
| ; NORMAL-EMPTY: |
| ; NORMAL-NEXT: // %bb.0: |
| ; NORMAL-NEXT: ld.param.b64 %rd1, [frem_f64_param_0]; |
| ; NORMAL-NEXT: ld.param.b64 %rd2, [frem_f64_param_1]; |
| ; NORMAL-NEXT: div.rn.f64 %rd3, %rd1, %rd2; |
| ; NORMAL-NEXT: cvt.rzi.f64.f64 %rd4, %rd3; |
| ; NORMAL-NEXT: neg.f64 %rd5, %rd4; |
| ; NORMAL-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1; |
| ; NORMAL-NEXT: testp.infinite.f64 %p1, %rd2; |
| ; NORMAL-NEXT: selp.f64 %rd7, %rd1, %rd6, %p1; |
| ; NORMAL-NEXT: st.param.b64 [func_retval0], %rd7; |
| ; NORMAL-NEXT: ret; |
| %r = frem double %a, %b |
| ret double %r |
| } |
| |
| define half @frem_f16_ninf(half %a, half %b) { |
| ; FAST-LABEL: frem_f16_ninf( |
| ; FAST: { |
| ; FAST-NEXT: .reg .b16 %rs<4>; |
| ; FAST-NEXT: .reg .b32 %r<7>; |
| ; FAST-EMPTY: |
| ; FAST-NEXT: // %bb.0: |
| ; FAST-NEXT: ld.param.b16 %rs1, [frem_f16_ninf_param_0]; |
| ; FAST-NEXT: ld.param.b16 %rs2, [frem_f16_ninf_param_1]; |
| ; FAST-NEXT: cvt.f32.f16 %r1, %rs2; |
| ; FAST-NEXT: cvt.f32.f16 %r2, %rs1; |
| ; FAST-NEXT: div.approx.f32 %r3, %r2, %r1; |
| ; FAST-NEXT: cvt.rzi.f32.f32 %r4, %r3; |
| ; FAST-NEXT: neg.f32 %r5, %r4; |
| ; FAST-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2; |
| ; FAST-NEXT: cvt.rn.f16.f32 %rs3, %r6; |
| ; FAST-NEXT: st.param.b16 [func_retval0], %rs3; |
| ; FAST-NEXT: ret; |
| ; |
| ; NORMAL-LABEL: frem_f16_ninf( |
| ; NORMAL: { |
| ; NORMAL-NEXT: .reg .b16 %rs<4>; |
| ; NORMAL-NEXT: .reg .b32 %r<7>; |
| ; NORMAL-EMPTY: |
| ; NORMAL-NEXT: // %bb.0: |
| ; NORMAL-NEXT: ld.param.b16 %rs1, [frem_f16_ninf_param_0]; |
| ; NORMAL-NEXT: ld.param.b16 %rs2, [frem_f16_ninf_param_1]; |
| ; NORMAL-NEXT: cvt.f32.f16 %r1, %rs2; |
| ; NORMAL-NEXT: cvt.f32.f16 %r2, %rs1; |
| ; NORMAL-NEXT: div.rn.f32 %r3, %r2, %r1; |
| ; NORMAL-NEXT: cvt.rzi.f32.f32 %r4, %r3; |
| ; NORMAL-NEXT: neg.f32 %r5, %r4; |
| ; NORMAL-NEXT: fma.rn.f32 %r6, %r5, %r1, %r2; |
| ; NORMAL-NEXT: cvt.rn.f16.f32 %rs3, %r6; |
| ; NORMAL-NEXT: st.param.b16 [func_retval0], %rs3; |
| ; NORMAL-NEXT: ret; |
| %r = frem ninf half %a, %b |
| ret half %r |
| } |
| |
| define float @frem_f32_ninf(float %a, float %b) { |
| ; FAST-LABEL: frem_f32_ninf( |
| ; FAST: { |
| ; FAST-NEXT: .reg .b32 %r<7>; |
| ; FAST-EMPTY: |
| ; FAST-NEXT: // %bb.0: |
| ; FAST-NEXT: ld.param.b32 %r1, [frem_f32_ninf_param_0]; |
| ; FAST-NEXT: ld.param.b32 %r2, [frem_f32_ninf_param_1]; |
| ; FAST-NEXT: div.approx.f32 %r3, %r1, %r2; |
| ; FAST-NEXT: cvt.rzi.f32.f32 %r4, %r3; |
| ; FAST-NEXT: neg.f32 %r5, %r4; |
| ; FAST-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1; |
| ; FAST-NEXT: st.param.b32 [func_retval0], %r6; |
| ; FAST-NEXT: ret; |
| ; |
| ; NORMAL-LABEL: frem_f32_ninf( |
| ; NORMAL: { |
| ; NORMAL-NEXT: .reg .b32 %r<7>; |
| ; NORMAL-EMPTY: |
| ; NORMAL-NEXT: // %bb.0: |
| ; NORMAL-NEXT: ld.param.b32 %r1, [frem_f32_ninf_param_0]; |
| ; NORMAL-NEXT: ld.param.b32 %r2, [frem_f32_ninf_param_1]; |
| ; NORMAL-NEXT: div.rn.f32 %r3, %r1, %r2; |
| ; NORMAL-NEXT: cvt.rzi.f32.f32 %r4, %r3; |
| ; NORMAL-NEXT: neg.f32 %r5, %r4; |
| ; NORMAL-NEXT: fma.rn.f32 %r6, %r5, %r2, %r1; |
| ; NORMAL-NEXT: st.param.b32 [func_retval0], %r6; |
| ; NORMAL-NEXT: ret; |
| %r = frem ninf float %a, %b |
| ret float %r |
| } |
| |
| define double @frem_f64_ninf(double %a, double %b) { |
| ; FAST-LABEL: frem_f64_ninf( |
| ; FAST: { |
| ; FAST-NEXT: .reg .b64 %rd<7>; |
| ; FAST-EMPTY: |
| ; FAST-NEXT: // %bb.0: |
| ; FAST-NEXT: ld.param.b64 %rd1, [frem_f64_ninf_param_0]; |
| ; FAST-NEXT: ld.param.b64 %rd2, [frem_f64_ninf_param_1]; |
| ; FAST-NEXT: div.rn.f64 %rd3, %rd1, %rd2; |
| ; FAST-NEXT: cvt.rzi.f64.f64 %rd4, %rd3; |
| ; FAST-NEXT: neg.f64 %rd5, %rd4; |
| ; FAST-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1; |
| ; FAST-NEXT: st.param.b64 [func_retval0], %rd6; |
| ; FAST-NEXT: ret; |
| ; |
| ; NORMAL-LABEL: frem_f64_ninf( |
| ; NORMAL: { |
| ; NORMAL-NEXT: .reg .b64 %rd<7>; |
| ; NORMAL-EMPTY: |
| ; NORMAL-NEXT: // %bb.0: |
| ; NORMAL-NEXT: ld.param.b64 %rd1, [frem_f64_ninf_param_0]; |
| ; NORMAL-NEXT: ld.param.b64 %rd2, [frem_f64_ninf_param_1]; |
| ; NORMAL-NEXT: div.rn.f64 %rd3, %rd1, %rd2; |
| ; NORMAL-NEXT: cvt.rzi.f64.f64 %rd4, %rd3; |
| ; NORMAL-NEXT: neg.f64 %rd5, %rd4; |
| ; NORMAL-NEXT: fma.rn.f64 %rd6, %rd5, %rd2, %rd1; |
| ; NORMAL-NEXT: st.param.b64 [func_retval0], %rd6; |
| ; NORMAL-NEXT: ret; |
| %r = frem ninf double %a, %b |
| ret double %r |
| } |
| |
| define float @frem_f32_imm1(float %a) { |
| ; FAST-LABEL: frem_f32_imm1( |
| ; FAST: { |
| ; FAST-NEXT: .reg .b32 %r<5>; |
| ; FAST-EMPTY: |
| ; FAST-NEXT: // %bb.0: |
| ; FAST-NEXT: ld.param.b32 %r1, [frem_f32_imm1_param_0]; |
| ; FAST-NEXT: mul.f32 %r2, %r1, 0f3E124925; |
| ; FAST-NEXT: cvt.rzi.f32.f32 %r3, %r2; |
| ; FAST-NEXT: fma.rn.f32 %r4, %r3, 0fC0E00000, %r1; |
| ; FAST-NEXT: st.param.b32 [func_retval0], %r4; |
| ; FAST-NEXT: ret; |
| ; |
| ; NORMAL-LABEL: frem_f32_imm1( |
| ; NORMAL: { |
| ; NORMAL-NEXT: .reg .b32 %r<5>; |
| ; NORMAL-EMPTY: |
| ; NORMAL-NEXT: // %bb.0: |
| ; NORMAL-NEXT: ld.param.b32 %r1, [frem_f32_imm1_param_0]; |
| ; NORMAL-NEXT: div.rn.f32 %r2, %r1, 0f40E00000; |
| ; NORMAL-NEXT: cvt.rzi.f32.f32 %r3, %r2; |
| ; NORMAL-NEXT: fma.rn.f32 %r4, %r3, 0fC0E00000, %r1; |
| ; NORMAL-NEXT: st.param.b32 [func_retval0], %r4; |
| ; NORMAL-NEXT: ret; |
| %r = frem float %a, 7.0 |
| ret float %r |
| } |
| |
| define float @frem_f32_imm2(float %a) { |
| ; FAST-LABEL: frem_f32_imm2( |
| ; FAST: { |
| ; FAST-NEXT: .reg .b32 %r<7>; |
| ; FAST-EMPTY: |
| ; FAST-NEXT: // %bb.0: |
| ; FAST-NEXT: ld.param.b32 %r1, [frem_f32_imm2_param_0]; |
| ; FAST-NEXT: mov.b32 %r2, 0f40E00000; |
| ; FAST-NEXT: div.approx.f32 %r3, %r2, %r1; |
| ; FAST-NEXT: cvt.rzi.f32.f32 %r4, %r3; |
| ; FAST-NEXT: neg.f32 %r5, %r4; |
| ; FAST-NEXT: fma.rn.f32 %r6, %r5, %r1, 0f40E00000; |
| ; FAST-NEXT: st.param.b32 [func_retval0], %r6; |
| ; FAST-NEXT: ret; |
| ; |
| ; NORMAL-LABEL: frem_f32_imm2( |
| ; NORMAL: { |
| ; NORMAL-NEXT: .reg .pred %p<2>; |
| ; NORMAL-NEXT: .reg .b32 %r<8>; |
| ; NORMAL-EMPTY: |
| ; NORMAL-NEXT: // %bb.0: |
| ; NORMAL-NEXT: ld.param.b32 %r1, [frem_f32_imm2_param_0]; |
| ; NORMAL-NEXT: mov.b32 %r2, 0f40E00000; |
| ; NORMAL-NEXT: div.rn.f32 %r3, %r2, %r1; |
| ; NORMAL-NEXT: cvt.rzi.f32.f32 %r4, %r3; |
| ; NORMAL-NEXT: neg.f32 %r5, %r4; |
| ; NORMAL-NEXT: fma.rn.f32 %r6, %r5, %r1, 0f40E00000; |
| ; NORMAL-NEXT: testp.infinite.f32 %p1, %r1; |
| ; NORMAL-NEXT: selp.f32 %r7, 0f40E00000, %r6, %p1; |
| ; NORMAL-NEXT: st.param.b32 [func_retval0], %r7; |
| ; NORMAL-NEXT: ret; |
| %r = frem float 7.0, %a |
| ret float %r |
| } |