blob: 9988d5b122cc109f4c4466ac279c3cb11f11f2ae [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mcpu=sm_70 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -mcpu=sm_70 | %ptxas-verify -arch=sm_70 %}
target triple = "nvptx64-nvidia-cuda"
%struct.double2 = type { double, double }
declare %struct.double2 @add(ptr align(16) byval(%struct.double2), ptr align(16) byval(%struct.double2))
define void @call_byval(ptr %out, ptr %in1, ptr %in2) {
; CHECK-LABEL: call_byval(
; CHECK: {
; CHECK-NEXT: .reg .b64 %rd<12>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [call_byval_param_0];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
; CHECK-NEXT: .param .align 16 .b8 param1[16];
; CHECK-NEXT: .param .align 8 .b8 retval0[16];
; CHECK-NEXT: ld.param.b64 %rd2, [call_byval_param_2];
; CHECK-NEXT: ld.v2.b64 {%rd3, %rd4}, [%rd2];
; CHECK-NEXT: st.param.v2.b64 [param1], {%rd3, %rd4};
; CHECK-NEXT: ld.param.b64 %rd5, [call_byval_param_1];
; CHECK-NEXT: ld.v2.b64 {%rd6, %rd7}, [%rd5];
; CHECK-NEXT: st.param.v2.b64 [param0], {%rd6, %rd7};
; CHECK-NEXT: call.uni (retval0), add, (param0, param1);
; CHECK-NEXT: ld.param.b64 %rd8, [retval0+8];
; CHECK-NEXT: ld.param.b64 %rd9, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.b64 [%rd1+8], %rd8;
; CHECK-NEXT: st.b64 [%rd1], %rd9;
; CHECK-NEXT: ret;
%call = call %struct.double2 @add(ptr align(16) byval(%struct.double2) %in1, ptr align(16) byval(%struct.double2) %in2)
store %struct.double2 %call, ptr %out, align 16
ret void
}