| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| ; RUN: llc < %s -mcpu=sm_70 | FileCheck %s |
| ; RUN: %if ptxas %{ llc < %s -mcpu=sm_70 | %ptxas-verify -arch=sm_70 %} |
| |
| target triple = "nvptx64-nvidia-cuda" |
| |
| %struct.double2 = type { double, double } |
| |
| declare %struct.double2 @add(ptr align(16) byval(%struct.double2), ptr align(16) byval(%struct.double2)) |
| |
| define void @call_byval(ptr %out, ptr %in1, ptr %in2) { |
| ; CHECK-LABEL: call_byval( |
| ; CHECK: { |
| ; CHECK-NEXT: .reg .b64 %rd<12>; |
| ; CHECK-EMPTY: |
| ; CHECK-NEXT: // %bb.0: |
| ; CHECK-NEXT: ld.param.b64 %rd1, [call_byval_param_0]; |
| ; CHECK-NEXT: { // callseq 0, 0 |
| ; CHECK-NEXT: .param .align 16 .b8 param0[16]; |
| ; CHECK-NEXT: .param .align 16 .b8 param1[16]; |
| ; CHECK-NEXT: .param .align 8 .b8 retval0[16]; |
| ; CHECK-NEXT: ld.param.b64 %rd2, [call_byval_param_2]; |
| ; CHECK-NEXT: ld.v2.b64 {%rd3, %rd4}, [%rd2]; |
| ; CHECK-NEXT: st.param.v2.b64 [param1], {%rd3, %rd4}; |
| ; CHECK-NEXT: ld.param.b64 %rd5, [call_byval_param_1]; |
| ; CHECK-NEXT: ld.v2.b64 {%rd6, %rd7}, [%rd5]; |
| ; CHECK-NEXT: st.param.v2.b64 [param0], {%rd6, %rd7}; |
| ; CHECK-NEXT: call.uni (retval0), add, (param0, param1); |
| ; CHECK-NEXT: ld.param.b64 %rd8, [retval0+8]; |
| ; CHECK-NEXT: ld.param.b64 %rd9, [retval0]; |
| ; CHECK-NEXT: } // callseq 0 |
| ; CHECK-NEXT: st.b64 [%rd1+8], %rd8; |
| ; CHECK-NEXT: st.b64 [%rd1], %rd9; |
| ; CHECK-NEXT: ret; |
| %call = call %struct.double2 @add(ptr align(16) byval(%struct.double2) %in1, ptr align(16) byval(%struct.double2) %in2) |
| store %struct.double2 %call, ptr %out, align 16 |
| ret void |
| } |