| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: llc < %s -O0 -mtriple=nvptx-- -mattr=+ptx60 -mcpu=sm_30 | FileCheck %s --check-prefix=CHECK32 |
| ; RUN: llc < %s -O0 -mtriple=nvptx64-- -mattr=+ptx60 -mcpu=sm_30 | FileCheck %s --check-prefix=CHECK64 |
| ; RUN: %if ptxas-isa-6.0 && ptxas-ptr32 %{ llc < %s -O0 -mtriple=nvptx -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %} |
| ; RUN: %if ptxas-isa-6.0 %{ llc < %s -O0 -mtriple=nvptx64 -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %} |
| |
| ; CHECK: .address_size [[BITS:32|64]] |
| |
| %struct.__va_list_tag = type { ptr, ptr, i32, i32 } |
| |
| @foo_ptr = internal addrspace(1) global ptr @foo, align 8 |
| |
| define i32 @foo(i32 %a, ...) { |
| ; CHECK32-LABEL: foo( |
| ; CHECK32: { |
| ; CHECK32-NEXT: .local .align 8 .b8 __local_depot0[32]; |
| ; CHECK32-NEXT: .reg .b32 %SP; |
| ; CHECK32-NEXT: .reg .b32 %SPL; |
| ; CHECK32-NEXT: .reg .b32 %r<29>; |
| ; CHECK32-NEXT: .reg .b64 %rd<3>; |
| ; CHECK32-EMPTY: |
| ; CHECK32-NEXT: // %bb.0: // %entry |
| ; CHECK32-NEXT: mov.b32 %SPL, __local_depot0; |
| ; CHECK32-NEXT: cvta.local.u32 %SP, %SPL; |
| ; CHECK32-NEXT: ld.param.b32 %r2, [foo_param_1]; |
| ; CHECK32-NEXT: ld.param.b32 %r1, [foo_param_0]; |
| ; CHECK32-NEXT: st.b32 [%SP], %r2; |
| ; CHECK32-NEXT: ld.b32 %r3, [%SP]; |
| ; CHECK32-NEXT: st.b32 [%SP+16], %r3; |
| ; CHECK32-NEXT: ld.b32 %r4, [%SP]; |
| ; CHECK32-NEXT: add.s32 %r5, %r4, 3; |
| ; CHECK32-NEXT: and.b32 %r6, %r5, -4; |
| ; CHECK32-NEXT: add.s32 %r7, %r6, 4; |
| ; CHECK32-NEXT: st.b32 [%SP], %r7; |
| ; CHECK32-NEXT: ld.b32 %r8, [%r6]; |
| ; CHECK32-NEXT: ld.b32 %r9, [%SP]; |
| ; CHECK32-NEXT: add.s32 %r10, %r9, 7; |
| ; CHECK32-NEXT: and.b32 %r11, %r10, -8; |
| ; CHECK32-NEXT: add.s32 %r12, %r11, 8; |
| ; CHECK32-NEXT: st.b32 [%SP], %r12; |
| ; CHECK32-NEXT: ld.b64 %rd1, [%r11]; |
| ; CHECK32-NEXT: ld.b32 %r13, [%SP]; |
| ; CHECK32-NEXT: add.s32 %r14, %r13, 7; |
| ; CHECK32-NEXT: and.b32 %r15, %r14, -8; |
| ; CHECK32-NEXT: add.s32 %r16, %r15, 8; |
| ; CHECK32-NEXT: st.b32 [%SP], %r16; |
| ; CHECK32-NEXT: ld.b64 %rd2, [%r15]; |
| ; CHECK32-NEXT: ld.b32 %r17, [%SP]; |
| ; CHECK32-NEXT: add.s32 %r18, %r17, 3; |
| ; CHECK32-NEXT: and.b32 %r19, %r18, -4; |
| ; CHECK32-NEXT: add.s32 %r20, %r19, 4; |
| ; CHECK32-NEXT: st.b32 [%SP], %r20; |
| ; CHECK32-NEXT: ld.b32 %r21, [%r19]; |
| ; CHECK32-NEXT: { // callseq 0, 0 |
| ; CHECK32-NEXT: .param .b32 param0; |
| ; CHECK32-NEXT: .param .b32 param1; |
| ; CHECK32-NEXT: .param .b64 param2; |
| ; CHECK32-NEXT: .param .b64 param3; |
| ; CHECK32-NEXT: .param .b32 param4; |
| ; CHECK32-NEXT: .param .b32 retval0; |
| ; CHECK32-NEXT: st.param.b32 [param4], %r21; |
| ; CHECK32-NEXT: st.param.b64 [param3], %rd2; |
| ; CHECK32-NEXT: st.param.b64 [param2], %rd1; |
| ; CHECK32-NEXT: st.param.b32 [param1], %r8; |
| ; CHECK32-NEXT: st.param.b32 [param0], %r1; |
| ; CHECK32-NEXT: call.uni (retval0), bar, (param0, param1, param2, param3, param4); |
| ; CHECK32-NEXT: ld.param.b32 %r22, [retval0]; |
| ; CHECK32-NEXT: } // callseq 0 |
| ; CHECK32-NEXT: ld.b32 %r23, [%SP+16]; |
| ; CHECK32-NEXT: add.s32 %r24, %r23, 3; |
| ; CHECK32-NEXT: and.b32 %r25, %r24, -4; |
| ; CHECK32-NEXT: add.s32 %r26, %r25, 4; |
| ; CHECK32-NEXT: st.b32 [%SP+16], %r26; |
| ; CHECK32-NEXT: ld.b32 %r27, [%r25]; |
| ; CHECK32-NEXT: add.s32 %r28, %r22, %r27; |
| ; CHECK32-NEXT: st.param.b32 [func_retval0], %r28; |
| ; CHECK32-NEXT: ret; |
| ; |
| ; CHECK64-LABEL: foo( |
| ; CHECK64: { |
| ; CHECK64-NEXT: .local .align 8 .b8 __local_depot0[48]; |
| ; CHECK64-NEXT: .reg .b64 %SP; |
| ; CHECK64-NEXT: .reg .b64 %SPL; |
| ; CHECK64-NEXT: .reg .b32 %r<6>; |
| ; CHECK64-NEXT: .reg .b64 %rd<26>; |
| ; CHECK64-EMPTY: |
| ; CHECK64-NEXT: // %bb.0: // %entry |
| ; CHECK64-NEXT: mov.b64 %SPL, __local_depot0; |
| ; CHECK64-NEXT: cvta.local.u64 %SP, %SPL; |
| ; CHECK64-NEXT: ld.param.b64 %rd1, [foo_param_1]; |
| ; CHECK64-NEXT: ld.param.b32 %r1, [foo_param_0]; |
| ; CHECK64-NEXT: st.b64 [%SP], %rd1; |
| ; CHECK64-NEXT: ld.b64 %rd2, [%SP]; |
| ; CHECK64-NEXT: st.b64 [%SP+24], %rd2; |
| ; CHECK64-NEXT: ld.b64 %rd3, [%SP]; |
| ; CHECK64-NEXT: add.s64 %rd4, %rd3, 3; |
| ; CHECK64-NEXT: and.b64 %rd5, %rd4, -4; |
| ; CHECK64-NEXT: add.s64 %rd6, %rd5, 4; |
| ; CHECK64-NEXT: st.b64 [%SP], %rd6; |
| ; CHECK64-NEXT: ld.b32 %r2, [%rd5]; |
| ; CHECK64-NEXT: ld.b64 %rd7, [%SP]; |
| ; CHECK64-NEXT: add.s64 %rd8, %rd7, 7; |
| ; CHECK64-NEXT: and.b64 %rd9, %rd8, -8; |
| ; CHECK64-NEXT: add.s64 %rd10, %rd9, 8; |
| ; CHECK64-NEXT: st.b64 [%SP], %rd10; |
| ; CHECK64-NEXT: ld.b64 %rd11, [%rd9]; |
| ; CHECK64-NEXT: ld.b64 %rd12, [%SP]; |
| ; CHECK64-NEXT: add.s64 %rd13, %rd12, 7; |
| ; CHECK64-NEXT: and.b64 %rd14, %rd13, -8; |
| ; CHECK64-NEXT: add.s64 %rd15, %rd14, 8; |
| ; CHECK64-NEXT: st.b64 [%SP], %rd15; |
| ; CHECK64-NEXT: ld.b64 %rd16, [%rd14]; |
| ; CHECK64-NEXT: ld.b64 %rd17, [%SP]; |
| ; CHECK64-NEXT: add.s64 %rd18, %rd17, 7; |
| ; CHECK64-NEXT: and.b64 %rd19, %rd18, -8; |
| ; CHECK64-NEXT: add.s64 %rd20, %rd19, 8; |
| ; CHECK64-NEXT: st.b64 [%SP], %rd20; |
| ; CHECK64-NEXT: ld.b64 %rd21, [%rd19]; |
| ; CHECK64-NEXT: { // callseq 0, 0 |
| ; CHECK64-NEXT: .param .b32 param0; |
| ; CHECK64-NEXT: .param .b32 param1; |
| ; CHECK64-NEXT: .param .b64 param2; |
| ; CHECK64-NEXT: .param .b64 param3; |
| ; CHECK64-NEXT: .param .b64 param4; |
| ; CHECK64-NEXT: .param .b32 retval0; |
| ; CHECK64-NEXT: st.param.b64 [param4], %rd21; |
| ; CHECK64-NEXT: st.param.b64 [param3], %rd16; |
| ; CHECK64-NEXT: st.param.b64 [param2], %rd11; |
| ; CHECK64-NEXT: st.param.b32 [param1], %r2; |
| ; CHECK64-NEXT: st.param.b32 [param0], %r1; |
| ; CHECK64-NEXT: call.uni (retval0), bar, (param0, param1, param2, param3, param4); |
| ; CHECK64-NEXT: ld.param.b32 %r3, [retval0]; |
| ; CHECK64-NEXT: } // callseq 0 |
| ; CHECK64-NEXT: ld.b64 %rd22, [%SP+24]; |
| ; CHECK64-NEXT: add.s64 %rd23, %rd22, 3; |
| ; CHECK64-NEXT: and.b64 %rd24, %rd23, -4; |
| ; CHECK64-NEXT: add.s64 %rd25, %rd24, 4; |
| ; CHECK64-NEXT: st.b64 [%SP+24], %rd25; |
| ; CHECK64-NEXT: ld.b32 %r4, [%rd24]; |
| ; CHECK64-NEXT: add.s32 %r5, %r3, %r4; |
| ; CHECK64-NEXT: st.param.b32 [func_retval0], %r5; |
| ; CHECK64-NEXT: ret; |
| entry: |
| %al = alloca [1 x %struct.__va_list_tag], align 8 |
| %al2 = alloca [1 x %struct.__va_list_tag], align 8 |
| |
| ; Test va_start |
| |
| call void @llvm.va_start(ptr %al) |
| |
| ; Test va_copy() |
| |
| call void @llvm.va_copy(ptr %al2, ptr %al) |
| |
| ; Test va_arg(ap, int32_t) |
| |
| %0 = va_arg ptr %al, i32 |
| |
| ; Test va_arg(ap, int64_t) |
| |
| %1 = va_arg ptr %al, i64 |
| |
| ; Test va_arg(ap, double) |
| |
| %2 = va_arg ptr %al, double |
| |
| ; Test va_arg(ap, ptr) |
| |
| %3 = va_arg ptr %al, ptr |
| %call = call i32 @bar(i32 %a, i32 %0, i64 %1, double %2, ptr %3) |
| |
| call void @llvm.va_end(ptr %al) |
| %4 = va_arg ptr %al2, i32 |
| call void @llvm.va_end(ptr %al2) |
| %5 = add i32 %call, %4 |
| ret i32 %5 |
| } |
| |
| define i32 @test_foo(i32 %i, i64 %l, double %d, ptr %p) { |
| ; Test indirect variadic function call. |
| ; Load arguments to temporary variables |
| ; CHECK32-LABEL: test_foo( |
| ; CHECK32: { |
| ; CHECK32-NEXT: .local .align 8 .b8 __local_depot1[32]; |
| ; CHECK32-NEXT: .reg .b32 %SP; |
| ; CHECK32-NEXT: .reg .b32 %SPL; |
| ; CHECK32-NEXT: .reg .b32 %r<8>; |
| ; CHECK32-NEXT: .reg .b64 %rd<3>; |
| ; CHECK32-EMPTY: |
| ; CHECK32-NEXT: // %bb.0: // %entry |
| ; CHECK32-NEXT: mov.b32 %SPL, __local_depot1; |
| ; CHECK32-NEXT: cvta.local.u32 %SP, %SPL; |
| ; CHECK32-NEXT: ld.param.b32 %r2, [test_foo_param_3]; |
| ; CHECK32-NEXT: ld.param.b64 %rd2, [test_foo_param_2]; |
| ; CHECK32-NEXT: ld.param.b64 %rd1, [test_foo_param_1]; |
| ; CHECK32-NEXT: ld.param.b32 %r1, [test_foo_param_0]; |
| ; CHECK32-NEXT: mov.b32 %r3, foo_ptr; |
| ; CHECK32-NEXT: cvta.global.u32 %r4, %r3; |
| ; CHECK32-NEXT: ld.b32 %r5, [%r4]; |
| ; CHECK32-NEXT: st.b32 [%SP], %r1; |
| ; CHECK32-NEXT: st.b64 [%SP+8], %rd1; |
| ; CHECK32-NEXT: st.b64 [%SP+16], %rd2; |
| ; CHECK32-NEXT: st.b32 [%SP+24], %r2; |
| ; CHECK32-NEXT: { // callseq 1, 0 |
| ; CHECK32-NEXT: .param .b32 param0; |
| ; CHECK32-NEXT: .param .b32 param1; |
| ; CHECK32-NEXT: .param .b32 retval0; |
| ; CHECK32-NEXT: add.u32 %r6, %SP, 0; |
| ; CHECK32-NEXT: st.param.b32 [param1], %r6; |
| ; CHECK32-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b32 _, .param .b32 _); |
| ; CHECK32-NEXT: st.param.b32 [param0], 4; |
| ; CHECK32-NEXT: call (retval0), %r5, (param0, param1), prototype_1; |
| ; CHECK32-NEXT: ld.param.b32 %r7, [retval0]; |
| ; CHECK32-NEXT: } // callseq 1 |
| ; CHECK32-NEXT: st.param.b32 [func_retval0], %r7; |
| ; CHECK32-NEXT: ret; |
| ; |
| ; CHECK64-LABEL: test_foo( |
| ; CHECK64: { |
| ; CHECK64-NEXT: .local .align 8 .b8 __local_depot1[32]; |
| ; CHECK64-NEXT: .reg .b64 %SP; |
| ; CHECK64-NEXT: .reg .b64 %SPL; |
| ; CHECK64-NEXT: .reg .b32 %r<3>; |
| ; CHECK64-NEXT: .reg .b64 %rd<8>; |
| ; CHECK64-EMPTY: |
| ; CHECK64-NEXT: // %bb.0: // %entry |
| ; CHECK64-NEXT: mov.b64 %SPL, __local_depot1; |
| ; CHECK64-NEXT: cvta.local.u64 %SP, %SPL; |
| ; CHECK64-NEXT: ld.param.b64 %rd3, [test_foo_param_3]; |
| ; CHECK64-NEXT: ld.param.b64 %rd2, [test_foo_param_2]; |
| ; CHECK64-NEXT: ld.param.b64 %rd1, [test_foo_param_1]; |
| ; CHECK64-NEXT: ld.param.b32 %r1, [test_foo_param_0]; |
| ; CHECK64-NEXT: mov.b64 %rd4, foo_ptr; |
| ; CHECK64-NEXT: cvta.global.u64 %rd5, %rd4; |
| ; CHECK64-NEXT: ld.b64 %rd6, [%rd5]; |
| ; CHECK64-NEXT: st.b32 [%SP], %r1; |
| ; CHECK64-NEXT: st.b64 [%SP+8], %rd1; |
| ; CHECK64-NEXT: st.b64 [%SP+16], %rd2; |
| ; CHECK64-NEXT: st.b64 [%SP+24], %rd3; |
| ; CHECK64-NEXT: { // callseq 1, 0 |
| ; CHECK64-NEXT: .param .b32 param0; |
| ; CHECK64-NEXT: .param .b64 param1; |
| ; CHECK64-NEXT: .param .b32 retval0; |
| ; CHECK64-NEXT: add.u64 %rd7, %SP, 0; |
| ; CHECK64-NEXT: st.param.b64 [param1], %rd7; |
| ; CHECK64-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b32 _, .param .b64 _); |
| ; CHECK64-NEXT: st.param.b32 [param0], 4; |
| ; CHECK64-NEXT: call (retval0), %rd6, (param0, param1), prototype_1; |
| ; CHECK64-NEXT: ld.param.b32 %r2, [retval0]; |
| ; CHECK64-NEXT: } // callseq 1 |
| ; CHECK64-NEXT: st.param.b32 [func_retval0], %r2; |
| ; CHECK64-NEXT: ret; |
| |
| ; Store arguments to an array |
| |
| entry: |
| %ptr = load ptr, ptr addrspacecast (ptr addrspace(1) @foo_ptr to ptr), align 8 |
| %call = call i32 (i32, ...) %ptr(i32 4, i32 %i, i64 %l, double %d, ptr %p) |
| ret i32 %call |
| } |
| |
| declare void @llvm.va_start(ptr) |
| declare void @llvm.va_end(ptr) |
| declare void @llvm.va_copy(ptr, ptr) |
| declare i32 @bar(i32, i32, i64, double, ptr) |