| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s |
| ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_53 -mattr=+ptx60 | FileCheck %s |
| ; RUN: %if ptxas-sm_20 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify -arch=sm_20 %} |
| |
| define float @sub_f32(float %a, float %b) { |
| ; CHECK-LABEL: sub_f32( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<9>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [sub_f32_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [sub_f32_param_1]; |
| ; CHECK-NEXT: sub.rn.f32 %r3, %r1, %r2; |
| ; CHECK-NEXT: sub.rn.ftz.f32 %r4, %r1, %r3; |
| ; CHECK-NEXT: sub.rz.f32 %r5, %r1, %r4; |
| ; CHECK-NEXT: sub.rz.ftz.f32 %r6, %r1, %r5; |
| ; CHECK-NEXT: sub.rm.f32 %r7, %r1, %r6; |
| ; CHECK-NEXT: sub.rm.ftz.f32 %r8, %r1, %r7; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r8; |
| ; CHECK-NEXT: ret; |
| %f0 = fneg float %b |
| %r1 = call float @llvm.nvvm.add.rn.f(float %a, float %f0) |
| |
| %f1 = fneg float %r1 |
| %r2 = call float @llvm.nvvm.add.rn.ftz.f(float %a, float %f1) |
| |
| %f2 = fneg float %r2 |
| %r3 = call float @llvm.nvvm.add.rz.f(float %a, float %f2) |
| |
| %f3 = fneg float %r3 |
| %r4 = call float @llvm.nvvm.add.rz.ftz.f(float %a, float %f3) |
| |
| %f4 = fneg float %r4 |
| %r5 = call float @llvm.nvvm.add.rm.f(float %a, float %f4) |
| |
| %f5 = fneg float %r5 |
| %r6 = call float @llvm.nvvm.add.rm.ftz.f(float %a, float %f5) |
| |
| ret float %r6 |
| } |
| |
| define double @sub_f64(double %a, double %b) { |
| ; CHECK-LABEL: sub_f64( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<7>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [sub_f64_param_0]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [sub_f64_param_1]; |
| ; CHECK-NEXT: sub.rn.f64 %rd3, %rd1, %rd2; |
| ; CHECK-NEXT: sub.rz.f64 %rd4, %rd1, %rd3; |
| ; CHECK-NEXT: sub.rm.f64 %rd5, %rd1, %rd4; |
| ; CHECK-NEXT: sub.rp.f64 %rd6, %rd1, %rd5; |
| ; CHECK-NEXT: st.param.b64 [func_retval0], %rd6; |
| ; CHECK-NEXT: ret; |
| %f0 = fneg double %b |
| %r1 = call double @llvm.nvvm.add.rn.d(double %a, double %f0) |
| |
| %f1 = fneg double %r1 |
| %r2 = call double @llvm.nvvm.add.rz.d(double %a, double %f1) |
| |
| %f2 = fneg double %r2 |
| %r3 = call double @llvm.nvvm.add.rm.d(double %a, double %f2) |
| |
| %f3 = fneg double %r3 |
| %r4 = call double @llvm.nvvm.add.rp.d(double %a, double %f3) |
| |
| ret double %r4 |
| } |