blob: 00fc3d2341a5e70075a0f307c653f22d26bab323 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx64 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 | %ptxas-verify %}
define float @div_full(float %a, float %b) {
; CHECK-LABEL: div_full(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<9>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [div_full_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [div_full_param_1];
; CHECK-NEXT: div.full.f32 %r3, %r1, %r2;
; CHECK-NEXT: mov.b32 %r4, 0f40400000;
; CHECK-NEXT: div.full.f32 %r5, %r3, %r4;
; CHECK-NEXT: div.full.ftz.f32 %r6, %r5, %r2;
; CHECK-NEXT: mov.b32 %r7, 0f40800000;
; CHECK-NEXT: div.full.ftz.f32 %r8, %r6, %r7;
; CHECK-NEXT: st.param.b32 [func_retval0], %r8;
; CHECK-NEXT: ret;
%1 = call float @llvm.nvvm.div.full(float %a, float %b)
%2 = call float @llvm.nvvm.div.full(float %1, float 3.0)
%3 = call float @llvm.nvvm.div.full.ftz(float %2, float %b)
%4 = call float @llvm.nvvm.div.full.ftz(float %3, float 4.0)
ret float %4
}
define float @div_fast_rr(float %a, float %b) {
; CHECK-LABEL: div_fast_rr(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [div_fast_rr_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [div_fast_rr_param_1];
; CHECK-NEXT: div.approx.f32 %r3, %r1, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%t1 = fdiv afn float %a, %b
ret float %t1
}
define float @div_fast_rr_ftz(float %a, float %b) #0 {
; CHECK-LABEL: div_fast_rr_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [div_fast_rr_ftz_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [div_fast_rr_ftz_param_1];
; CHECK-NEXT: div.approx.ftz.f32 %r3, %r1, %r2;
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NEXT: ret;
%t1 = fdiv afn float %a, %b
ret float %t1
}
define float @div_fast_ri(float %a, float %b) {
; CHECK-LABEL: div_fast_ri(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [div_fast_ri_param_0];
; CHECK-NEXT: mul.rn.f32 %r2, %r1, 0f3F000000;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%t1 = fdiv afn float %a, 2.0
ret float %t1
}
define float @div_fast_ri_ftz(float %a, float %b) #0 {
; CHECK-LABEL: div_fast_ri_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [div_fast_ri_ftz_param_0];
; CHECK-NEXT: mul.rn.ftz.f32 %r2, %r1, 0f3F000000;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%t1 = fdiv afn float %a, 2.0
ret float %t1
}
define float @rcp_fast(float %a) {
; CHECK-LABEL: rcp_fast(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [rcp_fast_param_0];
; CHECK-NEXT: rcp.approx.f32 %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%t1 = fdiv afn float 1.0, %a
ret float %t1
}
define float @rcp_fast_ftz(float %a) #0 {
; CHECK-LABEL: rcp_fast_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [rcp_fast_ftz_param_0];
; CHECK-NEXT: rcp.approx.ftz.f32 %r2, %r1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
%t1 = fdiv afn float 1.0, %a
ret float %t1
}
define float @div_fast_vec(float %a, float %b, float %c, float %d) {
; CHECK-LABEL: div_fast_vec(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<8>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [div_fast_vec_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [div_fast_vec_param_1];
; CHECK-NEXT: ld.param.b32 %r3, [div_fast_vec_param_2];
; CHECK-NEXT: ld.param.b32 %r4, [div_fast_vec_param_3];
; CHECK-NEXT: div.approx.f32 %r5, %r2, %r4;
; CHECK-NEXT: div.approx.f32 %r6, %r1, %r3;
; CHECK-NEXT: add.rn.f32 %r7, %r6, %r5;
; CHECK-NEXT: st.param.b32 [func_retval0], %r7;
; CHECK-NEXT: ret;
%ins_a0 = insertelement <2 x float> poison, float %a, i32 0
%ins_a1 = insertelement <2 x float> %ins_a0, float %b, i32 1
%ins_b0 = insertelement <2 x float> poison, float %c, i32 0
%ins_b1 = insertelement <2 x float> %ins_b0, float %d, i32 1
%fdiv = fdiv fast <2 x float> %ins_a1, %ins_b1
%ext0 = extractelement <2 x float> %fdiv, i32 0
%ext1 = extractelement <2 x float> %fdiv, i32 1
%fadd = fadd float %ext0, %ext1
ret float %fadd
}
define float @div_fast_vec_ftz(float %a, float %b, float %c, float %d) #0 {
; CHECK-LABEL: div_fast_vec_ftz(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<8>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [div_fast_vec_ftz_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [div_fast_vec_ftz_param_1];
; CHECK-NEXT: ld.param.b32 %r3, [div_fast_vec_ftz_param_2];
; CHECK-NEXT: ld.param.b32 %r4, [div_fast_vec_ftz_param_3];
; CHECK-NEXT: div.approx.ftz.f32 %r5, %r2, %r4;
; CHECK-NEXT: div.approx.ftz.f32 %r6, %r1, %r3;
; CHECK-NEXT: add.rn.ftz.f32 %r7, %r6, %r5;
; CHECK-NEXT: st.param.b32 [func_retval0], %r7;
; CHECK-NEXT: ret;
%ins_a0 = insertelement <2 x float> poison, float %a, i32 0
%ins_a1 = insertelement <2 x float> %ins_a0, float %b, i32 1
%ins_b0 = insertelement <2 x float> poison, float %c, i32 0
%ins_b1 = insertelement <2 x float> %ins_b0, float %d, i32 1
%fdiv = fdiv fast <2 x float> %ins_a1, %ins_b1
%ext0 = extractelement <2 x float> %fdiv, i32 0
%ext1 = extractelement <2 x float> %fdiv, i32 1
%fadd = fadd float %ext0, %ext1
ret float %fadd
}
attributes #0 = { "denormal-fp-math-f32" = "preserve-sign" }