| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| |
| ; ## FP16 abs is not supported by PTX version (PTX < 65). |
| ; RUN: llc < %s -mcpu=sm_53 -mattr=+ptx60 \ |
| ; RUN: -O0 -disable-post-ra -verify-machineinstrs \ |
| ; RUN: | FileCheck -check-prefix CHECK-NOF16 %s |
| ; RUN: %if ptxas %{ \ |
| ; RUN: llc < %s -mcpu=sm_53 -mattr=+ptx60 \ |
| ; RUN: -O0 -disable-post-ra -verify-machineinstrs \ |
| ; RUN: | %ptxas-verify -arch=sm_53 \ |
| ; RUN: %} |
| |
| ; ## FP16 support explicitly disabled (--nvptx-no-f16-math). |
| ; RUN: llc < %s -mcpu=sm_53 -mattr=+ptx65 --nvptx-no-f16-math \ |
| ; RUN: -O0 -disable-post-ra -verify-machineinstrs \ |
| ; RUN: | FileCheck -check-prefix CHECK-NOF16 %s |
| ; RUN: %if ptxas %{ \ |
| ; RUN: llc < %s -mcpu=sm_53 -mattr=+ptx65 --nvptx-no-f16-math \ |
| ; RUN: -O0 -disable-post-ra -verify-machineinstrs \ |
| ; RUN: | %ptxas-verify -arch=sm_53 \ |
| ; RUN: %} |
| |
| ; ## FP16 is not supported by hardware (SM < 53). |
| ; RUN: llc < %s -mcpu=sm_52 -mattr=+ptx65 \ |
| ; RUN: -O0 -disable-post-ra -verify-machineinstrs \ |
| ; RUN: | FileCheck -check-prefix CHECK-NOF16 %s |
| ; RUN: %if ptxas %{ \ |
| ; RUN: llc < %s -mcpu=sm_52 -mattr=+ptx65 \ |
| ; RUN: -O0 -disable-post-ra -verify-machineinstrs \ |
| ; RUN: | %ptxas-verify -arch=sm_52 \ |
| ; RUN: %} |
| |
| ; ## Full FP16 abs support. |
| ; RUN: llc < %s -mcpu=sm_53 -mattr=+ptx65 \ |
| ; RUN: -O0 -disable-post-ra -verify-machineinstrs \ |
| ; RUN: | FileCheck -check-prefix CHECK-F16-ABS %s |
| ; RUN: %if ptxas %{ \ |
| ; RUN: llc < %s -mcpu=sm_53 -mattr=+ptx65 \ |
| ; RUN: -O0 -disable-post-ra -verify-machineinstrs \ |
| ; RUN: | %ptxas-verify -arch=sm_53 \ |
| ; RUN: %} |
| |
| target triple = "nvptx64-nvidia-cuda" |
| |
| declare half @llvm.fabs.f16(half %a) |
| declare <2 x half> @llvm.fabs.v2f16(<2 x half> %a) |
| |
| define half @test_fabs(half %a) { |
| ; CHECK-NOF16-LABEL: test_fabs( |
| ; CHECK-NOF16: { |
| ; CHECK-NOF16-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-NOF16-NEXT: .reg .b32 %f<3>; |
| ; CHECK-NOF16-EMPTY: |
| ; CHECK-NOF16-NEXT: // %bb.0: |
| ; CHECK-NOF16-NEXT: ld.param.b16 %rs1, [test_fabs_param_0]; |
| ; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs1; |
| ; CHECK-NOF16-NEXT: abs.f32 %f2, %f1; |
| ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs2, %f2; |
| ; CHECK-NOF16-NEXT: st.param.b16 [func_retval0], %rs2; |
| ; CHECK-NOF16-NEXT: ret; |
| ; |
| ; CHECK-F16-ABS-LABEL: test_fabs( |
| ; CHECK-F16-ABS: { |
| ; CHECK-F16-ABS-NEXT: .reg .b16 %rs<3>; |
| ; CHECK-F16-ABS-EMPTY: |
| ; CHECK-F16-ABS-NEXT: // %bb.0: |
| ; CHECK-F16-ABS-NEXT: ld.param.b16 %rs1, [test_fabs_param_0]; |
| ; CHECK-F16-ABS-NEXT: abs.f16 %rs2, %rs1; |
| ; CHECK-F16-ABS-NEXT: st.param.b16 [func_retval0], %rs2; |
| ; CHECK-F16-ABS-NEXT: ret; |
| %r = call half @llvm.fabs.f16(half %a) |
| ret half %r |
| } |
| |
| define <2 x half> @test_fabs_2(<2 x half> %a) #0 { |
| ; CHECK-F16-LABEL: test_fabs_2( |
| ; CHECK-F16: { |
| ; CHECK-F16-NEXT: .reg .b32 %r<5>; |
| ; CHECK-F16-EMPTY: |
| ; CHECK-F16-NEXT: // %bb.0: |
| ; CHECK-F16-NEXT: ld.param.b32 %r1, [test_fabs_2_param_0]; |
| ; CHECK-F16-NEXT: and.b32 %r3, %r1, 2147450879; |
| ; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3; |
| ; CHECK-F16-NEXT: ret; |
| ; |
| ; CHECK-F16-ABS-LABEL: test_fabs_2( |
| ; CHECK-F16-ABS: { |
| ; CHECK-F16-ABS-NEXT: .reg .b32 %r<3>; |
| ; CHECK-F16-ABS-EMPTY: |
| ; CHECK-F16-ABS-NEXT: // %bb.0: |
| ; CHECK-F16-ABS-NEXT: ld.param.b32 %r1, [test_fabs_2_param_0]; |
| ; CHECK-F16-ABS-NEXT: abs.f16x2 %r2, %r1; |
| ; CHECK-F16-ABS-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK-F16-ABS-NEXT: ret; |
| %r = call <2 x half> @llvm.fabs.v2f16(<2 x half> %a) |
| ret <2 x half> %r |
| } |
| |