| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_53 -mattr=+ptx42 | FileCheck %s |
| ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_53 -mattr=+ptx60 | FileCheck %s |
| ; RUN: %if ptxas-isa-4.2 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_53 -mattr=+ptx42 | %ptxas-verify%} |
| ; RUN: %if ptxas-isa-6.0 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_53 -mattr=+ptx60 | %ptxas-verify%} |
| |
| define half @sub_rn_sat_f16(half %a, half %b) { |
| ; CHECK-LABEL: sub_rn_sat_f16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b16 %rs1, [sub_rn_sat_f16_param_0]; |
| ; CHECK-NEXT: ld.param.b16 %rs2, [sub_rn_sat_f16_param_1]; |
| ; CHECK-NEXT: sub.rn.sat.f16 %rs3, %rs1, %rs2; |
| ; CHECK-NEXT: st.param.b16 [func_retval0], %rs3; |
| ; CHECK-NEXT: ret; |
| %1 = fneg half %b |
| %res = call half @llvm.nvvm.add.rn.sat.f16(half %a, half %1) |
| ret half %res |
| } |
| |
| define <2 x half> @sub_rn_sat_f16x2(<2 x half> %a, <2 x half> %b) { |
| ; CHECK-LABEL: sub_rn_sat_f16x2( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [sub_rn_sat_f16x2_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [sub_rn_sat_f16x2_param_1]; |
| ; CHECK-NEXT: sub.rn.sat.f16x2 %r3, %r1, %r2; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-NEXT: ret; |
| %1 = fneg <2 x half> %b |
| %res = call <2 x half> @llvm.nvvm.add.rn.sat.v2f16(<2 x half> %a, <2 x half> %1) |
| ret <2 x half> %res |
| } |
| |
| define half @sub_rn_ftz_sat_f16(half %a, half %b) { |
| ; CHECK-LABEL: sub_rn_ftz_sat_f16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b16 %rs1, [sub_rn_ftz_sat_f16_param_0]; |
| ; CHECK-NEXT: ld.param.b16 %rs2, [sub_rn_ftz_sat_f16_param_1]; |
| ; CHECK-NEXT: sub.rn.ftz.sat.f16 %rs3, %rs1, %rs2; |
| ; CHECK-NEXT: st.param.b16 [func_retval0], %rs3; |
| ; CHECK-NEXT: ret; |
| %1 = fneg half %b |
| %res = call half @llvm.nvvm.add.rn.ftz.sat.f16(half %a, half %1) |
| ret half %res |
| } |
| |
| define <2 x half> @sub_rn_ftz_sat_f16x2(<2 x half> %a, <2 x half> %b) { |
| ; CHECK-LABEL: sub_rn_ftz_sat_f16x2( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [sub_rn_ftz_sat_f16x2_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [sub_rn_ftz_sat_f16x2_param_1]; |
| ; CHECK-NEXT: sub.rn.ftz.sat.f16x2 %r3, %r1, %r2; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-NEXT: ret; |
| %1 = fneg <2 x half> %b |
| %res = call <2 x half> @llvm.nvvm.add.rn.ftz.sat.v2f16(<2 x half> %a, <2 x half> %1) |
| ret <2 x half> %res |
| } |