| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_53 -mattr=+ptx42 | FileCheck %s |
| ; RUN: %if ptxas-isa-4.2 %{ llc < %s -mtriple=nvptx64 -mcpu=sm_53 -mattr=+ptx42 | %ptxas-verify%} |
| |
| define half @add_rn_sat_f16(half %a, half %b) { |
| ; CHECK-LABEL: add_rn_sat_f16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b16 %rs1, [add_rn_sat_f16_param_0]; |
| ; CHECK-NEXT: ld.param.b16 %rs2, [add_rn_sat_f16_param_1]; |
| ; CHECK-NEXT: add.rn.sat.f16 %rs3, %rs1, %rs2; |
| ; CHECK-NEXT: st.param.b16 [func_retval0], %rs3; |
| ; CHECK-NEXT: ret; |
| %1 = call half @llvm.nvvm.add.rn.sat.f16(half %a, half %b) |
| ret half %1 |
| } |
| |
| define <2 x half> @add_rn_sat_f16x2(<2 x half> %a, <2 x half> %b) { |
| ; CHECK-LABEL: add_rn_sat_f16x2( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [add_rn_sat_f16x2_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [add_rn_sat_f16x2_param_1]; |
| ; CHECK-NEXT: add.rn.sat.f16x2 %r3, %r1, %r2; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-NEXT: ret; |
| %1 = call <2 x half> @llvm.nvvm.add.rn.sat.v2f16(<2 x half> %a, <2 x half> %b) |
| ret <2 x half> %1 |
| } |
| |
| define half @add_rn_ftz_sat_f16(half %a, half %b) { |
| ; CHECK-LABEL: add_rn_ftz_sat_f16( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b16 %rs<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b16 %rs1, [add_rn_ftz_sat_f16_param_0]; |
| ; CHECK-NEXT: ld.param.b16 %rs2, [add_rn_ftz_sat_f16_param_1]; |
| ; CHECK-NEXT: add.rn.ftz.sat.f16 %rs3, %rs1, %rs2; |
| ; CHECK-NEXT: st.param.b16 [func_retval0], %rs3; |
| ; CHECK-NEXT: ret; |
| %1 = call half @llvm.nvvm.add.rn.ftz.sat.f16(half %a, half %b) |
| ret half %1 |
| } |
| |
| define <2 x half> @add_rn_ftz_sat_f16x2(<2 x half> %a, <2 x half> %b) { |
| ; CHECK-LABEL: add_rn_ftz_sat_f16x2( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b32 %r<4>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b32 %r1, [add_rn_ftz_sat_f16x2_param_0]; |
| ; CHECK-NEXT: ld.param.b32 %r2, [add_rn_ftz_sat_f16x2_param_1]; |
| ; CHECK-NEXT: add.rn.ftz.sat.f16x2 %r3, %r1, %r2; |
| ; CHECK-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-NEXT: ret; |
| %1 = call <2 x half> @llvm.nvvm.add.rn.ftz.sat.v2f16(<2 x half> %a, <2 x half> %b) |
| ret <2 x half> %1 |
| } |