| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -mcpu=sm_100 | FileCheck %s --check-prefix=F32X2 |
| ; RUN: llc < %s -mcpu=sm_90 | FileCheck %s --check-prefix=NOF32X2 |
| ; RUN: llc < %s -mcpu=sm_100 -nvptx-no-f32x2 | FileCheck %s --check-prefix=NOF32X2 |
| |
| target triple = "nvptx64-nvidia-cuda" |
| |
| define <2 x float> @test(<2 x float> %a, <2 x float> %b) { |
| ; F32X2-LABEL: test( |
| ; F32X2: { |
| ; F32X2-NEXT: .reg .b64 %rd<4>; |
| ; F32X2-EMPTY: |
| ; F32X2-NEXT: // %bb.0: |
| ; F32X2-NEXT: ld.param.b64 %rd1, [test_param_0]; |
| ; F32X2-NEXT: ld.param.b64 %rd2, [test_param_1]; |
| ; F32X2-NEXT: add.rn.f32x2 %rd3, %rd1, %rd2; |
| ; F32X2-NEXT: st.param.b64 [func_retval0], %rd3; |
| ; F32X2-NEXT: ret; |
| ; |
| ; NOF32X2-LABEL: test( |
| ; NOF32X2: { |
| ; NOF32X2-NEXT: .reg .b32 %r<7>; |
| ; NOF32X2-EMPTY: |
| ; NOF32X2-NEXT: // %bb.0: |
| ; NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_param_0]; |
| ; NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_param_1]; |
| ; NOF32X2-NEXT: add.rn.f32 %r5, %r2, %r4; |
| ; NOF32X2-NEXT: add.rn.f32 %r6, %r1, %r3; |
| ; NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; |
| ; NOF32X2-NEXT: ret; |
| %c = fadd <2 x float> %a, %b |
| ret <2 x float> %c |
| } |