| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc < %s -mtriple=i686-unknown-unknown -x86-asm-syntax=intel | FileCheck %s --check-prefix=X32 |
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -x86-asm-syntax=intel | FileCheck %s --check-prefix=X64 |
| |
| ; Test correct handling of a musttail call with a byval struct argument. |
| |
| %struct.1xi32 = type { [1 x i32] } |
| %struct.3xi32 = type { [3 x i32] } |
| %struct.5xi32 = type { [5 x i32] } |
| |
| declare dso_local i32 @Func1(ptr byval(%struct.1xi32) %0) |
| declare dso_local i32 @Func3(ptr byval(%struct.3xi32) %0) |
| declare dso_local i32 @Func5(ptr byval(%struct.5xi32) %0) |
| declare dso_local i32 @FuncManyArgs(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i8 %6, ptr byval(%struct.5xi32) %7) |
| |
| define dso_local i32 @test1(ptr byval(%struct.1xi32) %0) { |
| ; X32-LABEL: test1: |
| ; X32: # %bb.0: |
| ; X32-NEXT: jmp Func1 # TAILCALL |
| ; |
| ; X64-LABEL: test1: |
| ; X64: # %bb.0: |
| ; X64-NEXT: jmp Func1 # TAILCALL |
| %r = musttail call i32 @Func1(ptr byval(%struct.1xi32) %0) |
| ret i32 %r |
| } |
| |
| define dso_local i32 @test3(ptr byval(%struct.3xi32) %0) { |
| ; X32-LABEL: test3: |
| ; X32: # %bb.0: |
| ; X32-NEXT: jmp Func3 # TAILCALL |
| ; |
| ; X64-LABEL: test3: |
| ; X64: # %bb.0: |
| ; X64-NEXT: jmp Func3 # TAILCALL |
| %r = musttail call i32 @Func3(ptr byval(%struct.3xi32) %0) |
| ret i32 %r |
| } |
| |
| ; sizeof(%struct.5xi32) > 16, in x64 this is passed on stack. |
| define dso_local i32 @test5(ptr byval(%struct.5xi32) %0) { |
| ; X32-LABEL: test5: |
| ; X32: # %bb.0: |
| ; X32-NEXT: jmp Func5 # TAILCALL |
| ; |
| ; X64-LABEL: test5: |
| ; X64: # %bb.0: |
| ; X64-NEXT: jmp Func5 # TAILCALL |
| %r = musttail call i32 @Func5(ptr byval(%struct.5xi32) %0) |
| ret i32 %r |
| } |
| |
| ; Test passing multiple arguments with different sizes on stack. In x64 Linux |
| ; the first 6 are passed by register. |
| define dso_local i32 @testManyArgs(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i8 %6, ptr byval(%struct.5xi32) %7) { |
| ; X32-LABEL: testManyArgs: |
| ; X32: # %bb.0: |
| ; X32-NEXT: jmp FuncManyArgs # TAILCALL |
| ; |
| ; X64-LABEL: testManyArgs: |
| ; X64: # %bb.0: |
| ; X64-NEXT: jmp FuncManyArgs # TAILCALL |
| %r = musttail call i32 @FuncManyArgs(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i8 %6, ptr byval(%struct.5xi32) %7) |
| ret i32 %r |
| } |
| |
| define dso_local i32 @testRecursion(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i8 %6, ptr byval(%struct.5xi32) %7) { |
| ; X32-LABEL: testRecursion: |
| ; X32: # %bb.0: |
| ; X32-NEXT: jmp testRecursion # TAILCALL |
| ; |
| ; X64-LABEL: testRecursion: |
| ; X64: # %bb.0: |
| ; X64-NEXT: jmp testRecursion # TAILCALL |
| %r = musttail call i32 @testRecursion(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i8 %6, ptr byval(%struct.5xi32) %7) |
| ret i32 %r |
| } |
| |
| define dso_local i32 @swap(ptr byval(%struct.1xi32) %0, ptr byval(%struct.1xi32) %1) noinline { |
| ; X32-LABEL: swap: |
| ; X32: # %bb.0: # %entry |
| ; X32-NEXT: mov eax, dword ptr [esp + 4] |
| ; X32-NEXT: add eax, dword ptr [esp + 8] |
| ; X32-NEXT: ret |
| ; |
| ; X64-LABEL: swap: |
| ; X64: # %bb.0: # %entry |
| ; X64-NEXT: mov eax, dword ptr [rsp + 8] |
| ; X64-NEXT: add eax, dword ptr [rsp + 16] |
| ; X64-NEXT: ret |
| entry: |
| %a.ptr = getelementptr inbounds %struct.1xi32, ptr %0, i32 0, i32 0, i32 0 |
| %a = load i32, ptr %a.ptr, align 4 |
| %b.ptr = getelementptr inbounds %struct.1xi32, ptr %1, i32 0, i32 0, i32 0 |
| %b = load i32, ptr %b.ptr, align 4 |
| %sum = add i32 %a, %b |
| ret i32 %sum |
| } |
| |
| define dso_local i32 @swapByValArguments(ptr byval(%struct.1xi32) %0, ptr byval(%struct.1xi32) %1) { |
| ; X32-LABEL: swapByValArguments: |
| ; X32: # %bb.0: |
| ; X32-NEXT: sub esp, 8 |
| ; X32-NEXT: .cfi_def_cfa_offset 12 |
| ; X32-NEXT: mov eax, dword ptr [esp + 12] |
| ; X32-NEXT: mov dword ptr [esp], eax |
| ; X32-NEXT: mov eax, dword ptr [esp + 16] |
| ; X32-NEXT: mov dword ptr [esp + 4], eax |
| ; X32-NEXT: add esp, 8 |
| ; X32-NEXT: .cfi_def_cfa_offset 4 |
| ; X32-NEXT: jmp swap # TAILCALL |
| ; |
| ; X64-LABEL: swapByValArguments: |
| ; X64: # %bb.0: |
| ; X64-NEXT: mov eax, dword ptr [rsp + 8] |
| ; X64-NEXT: mov dword ptr [rsp - 16], eax |
| ; X64-NEXT: mov eax, dword ptr [rsp + 16] |
| ; X64-NEXT: mov dword ptr [rsp - 8], eax |
| ; X64-NEXT: jmp swap # TAILCALL |
| %r = musttail call i32 @swap(ptr byval(%struct.1xi32) %1, ptr byval(%struct.1xi32) %0) |
| ret i32 %r |
| } |
| |
| ; Clang only uses byval for arguments of 65 bytes or larger, but e.g. rustc |
| ; does use byval for smaller types. Here we use a 20 byte struct to keep |
| ; the tests more readable. |
| %twenty_bytes = type { [5 x i32] } |
| declare void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4) |
| |
| ; Functions with byval parameters can be tail-called, because the value is |
| ; actually passed in registers and the stack in the same way for the caller and |
| ; callee. On x86 byval arguments are never (partially) passed via registers. |
| define void @large_caller(%twenty_bytes* byval(%twenty_bytes) align 4 %a) { |
| ; X32-LABEL: large_caller: |
| ; X32: # %bb.0: # %entry |
| ; X32-NEXT: jmp large_callee@PLT # TAILCALL |
| ; |
| ; X64-LABEL: large_caller: |
| ; X64: # %bb.0: # %entry |
| ; X64-NEXT: jmp large_callee@PLT # TAILCALL |
| entry: |
| musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %a) |
| ret void |
| } |
| |
| ; The IR for this one looks dodgy, because it has an alloca passed to a |
| ; musttail function, but it is passed as a byval argument, so will be copied |
| ; into the stack space allocated by @large_caller_new_value's caller, so is |
| ; valid. |
| define void @large_caller_new_value(%twenty_bytes* byval(%twenty_bytes) align 4 %a) { |
| ; X32-LABEL: large_caller_new_value: |
| ; X32: # %bb.0: # %entry |
| ; X32-NEXT: sub esp, 20 |
| ; X32-NEXT: .cfi_def_cfa_offset 24 |
| ; X32-NEXT: mov dword ptr [esp], 0 |
| ; X32-NEXT: mov dword ptr [esp + 4], 1 |
| ; X32-NEXT: mov dword ptr [esp + 8], 2 |
| ; X32-NEXT: mov dword ptr [esp + 12], 3 |
| ; X32-NEXT: mov dword ptr [esp + 16], 4 |
| ; X32-NEXT: mov dword ptr [esp + 24], 0 |
| ; X32-NEXT: mov dword ptr [esp + 28], 1 |
| ; X32-NEXT: mov dword ptr [esp + 32], 2 |
| ; X32-NEXT: mov dword ptr [esp + 36], 3 |
| ; X32-NEXT: mov dword ptr [esp + 40], 4 |
| ; X32-NEXT: add esp, 20 |
| ; X32-NEXT: .cfi_def_cfa_offset 4 |
| ; X32-NEXT: jmp large_callee@PLT # TAILCALL |
| ; |
| ; X64-LABEL: large_caller_new_value: |
| ; X64: # %bb.0: # %entry |
| ; X64-NEXT: movabs rax, 4294967296 |
| ; X64-NEXT: mov qword ptr [rsp - 20], rax |
| ; X64-NEXT: movabs rcx, 12884901890 |
| ; X64-NEXT: mov qword ptr [rsp - 12], rcx |
| ; X64-NEXT: mov dword ptr [rsp - 4], 4 |
| ; X64-NEXT: mov qword ptr [rsp + 8], rax |
| ; X64-NEXT: mov qword ptr [rsp + 16], rcx |
| ; X64-NEXT: mov dword ptr [rsp + 24], 4 |
| ; X64-NEXT: jmp large_callee@PLT # TAILCALL |
| entry: |
| %y = alloca %twenty_bytes, align 4 |
| store i32 0, ptr %y, align 4 |
| %0 = getelementptr inbounds i8, ptr %y, i32 4 |
| store i32 1, ptr %0, align 4 |
| %1 = getelementptr inbounds i8, ptr %y, i32 8 |
| store i32 2, ptr %1, align 4 |
| %2 = getelementptr inbounds i8, ptr %y, i32 12 |
| store i32 3, ptr %2, align 4 |
| %3 = getelementptr inbounds i8, ptr %y, i32 16 |
| store i32 4, ptr %3, align 4 |
| musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %y) |
| ret void |
| } |
| |
| declare void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4, %twenty_bytes* byval(%twenty_bytes) align 4) |
| define void @swap_byvals(%twenty_bytes* byval(%twenty_bytes) align 4 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) { |
| ; X32-LABEL: swap_byvals: |
| ; X32: # %bb.0: # %entry |
| ; X32-NEXT: sub esp, 40 |
| ; X32-NEXT: .cfi_def_cfa_offset 44 |
| ; X32-NEXT: mov eax, dword ptr [esp + 60] |
| ; X32-NEXT: mov dword ptr [esp + 16], eax |
| ; X32-NEXT: mov eax, dword ptr [esp + 56] |
| ; X32-NEXT: mov dword ptr [esp + 12], eax |
| ; X32-NEXT: mov eax, dword ptr [esp + 52] |
| ; X32-NEXT: mov dword ptr [esp + 8], eax |
| ; X32-NEXT: mov eax, dword ptr [esp + 44] |
| ; X32-NEXT: mov ecx, dword ptr [esp + 48] |
| ; X32-NEXT: mov dword ptr [esp + 4], ecx |
| ; X32-NEXT: mov dword ptr [esp], eax |
| ; X32-NEXT: mov eax, dword ptr [esp + 80] |
| ; X32-NEXT: mov dword ptr [esp + 36], eax |
| ; X32-NEXT: mov eax, dword ptr [esp + 76] |
| ; X32-NEXT: mov dword ptr [esp + 32], eax |
| ; X32-NEXT: mov eax, dword ptr [esp + 72] |
| ; X32-NEXT: mov dword ptr [esp + 28], eax |
| ; X32-NEXT: mov eax, dword ptr [esp + 64] |
| ; X32-NEXT: mov ecx, dword ptr [esp + 68] |
| ; X32-NEXT: mov dword ptr [esp + 24], ecx |
| ; X32-NEXT: mov dword ptr [esp + 20], eax |
| ; X32-NEXT: add esp, 40 |
| ; X32-NEXT: .cfi_def_cfa_offset 4 |
| ; X32-NEXT: jmp two_byvals_callee@PLT # TAILCALL |
| ; |
| ; X64-LABEL: swap_byvals: |
| ; X64: # %bb.0: # %entry |
| ; X64-NEXT: mov eax, dword ptr [rsp + 24] |
| ; X64-NEXT: mov dword ptr [rsp - 8], eax |
| ; X64-NEXT: movaps xmm0, xmmword ptr [rsp + 8] |
| ; X64-NEXT: movaps xmmword ptr [rsp - 24], xmm0 |
| ; X64-NEXT: mov eax, dword ptr [rsp + 48] |
| ; X64-NEXT: mov dword ptr [rsp - 32], eax |
| ; X64-NEXT: mov rax, qword ptr [rsp + 32] |
| ; X64-NEXT: mov rcx, qword ptr [rsp + 40] |
| ; X64-NEXT: mov qword ptr [rsp - 40], rcx |
| ; X64-NEXT: mov qword ptr [rsp - 48], rax |
| ; X64-NEXT: jmp two_byvals_callee@PLT # TAILCALL |
| entry: |
| musttail call void @two_byvals_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b, %twenty_bytes* byval(%twenty_bytes) align 4 %a) |
| ret void |
| } |
| |
| ; A forwarded byval arg, but at a different argument position. Because |
| ; x86 does not (partially) pass byval arguments in registers, the byval |
| ; arg is in the correct position already, so this is not a sibcall but |
| ; can be tail-call optimized. |
| declare void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4) |
| define void @shift_byval(i32 %a, %twenty_bytes* byval(%twenty_bytes) align 4 %b) { |
| ; X32-LABEL: shift_byval: |
| ; X32: # %bb.0: # %entry |
| ; X32-NEXT: push edi |
| ; X32-NEXT: .cfi_def_cfa_offset 8 |
| ; X32-NEXT: push esi |
| ; X32-NEXT: .cfi_def_cfa_offset 12 |
| ; X32-NEXT: .cfi_offset esi, -12 |
| ; X32-NEXT: .cfi_offset edi, -8 |
| ; X32-NEXT: mov eax, dword ptr [esp + 32] |
| ; X32-NEXT: mov ecx, dword ptr [esp + 28] |
| ; X32-NEXT: mov edx, dword ptr [esp + 24] |
| ; X32-NEXT: mov esi, dword ptr [esp + 16] |
| ; X32-NEXT: mov edi, dword ptr [esp + 20] |
| ; X32-NEXT: push eax |
| ; X32-NEXT: .cfi_adjust_cfa_offset 4 |
| ; X32-NEXT: push ecx |
| ; X32-NEXT: .cfi_adjust_cfa_offset 4 |
| ; X32-NEXT: push edx |
| ; X32-NEXT: .cfi_adjust_cfa_offset 4 |
| ; X32-NEXT: push edi |
| ; X32-NEXT: .cfi_adjust_cfa_offset 4 |
| ; X32-NEXT: push esi |
| ; X32-NEXT: .cfi_adjust_cfa_offset 4 |
| ; X32-NEXT: call shift_byval_callee@PLT |
| ; X32-NEXT: add esp, 20 |
| ; X32-NEXT: .cfi_adjust_cfa_offset -20 |
| ; X32-NEXT: pop esi |
| ; X32-NEXT: .cfi_def_cfa_offset 8 |
| ; X32-NEXT: pop edi |
| ; X32-NEXT: .cfi_def_cfa_offset 4 |
| ; X32-NEXT: ret |
| ; |
| ; X64-LABEL: shift_byval: |
| ; X64: # %bb.0: # %entry |
| ; X64-NEXT: jmp shift_byval_callee@PLT # TAILCALL |
| entry: |
| tail call void @shift_byval_callee(%twenty_bytes* byval(%twenty_bytes) align 4 %b) |
| ret void |
| } |
| |
| ; A global object passed to a byval argument, so it must be copied, but doesn't |
| ; need a stack temporary. |
| @large_global = external global %twenty_bytes |
| define void @large_caller_from_global(%twenty_bytes* byval(%twenty_bytes) align 4 %a) { |
| ; X32-LABEL: large_caller_from_global: |
| ; X32: # %bb.0: # %entry |
| ; X32-NEXT: mov eax, dword ptr [large_global+16] |
| ; X32-NEXT: mov dword ptr [esp + 20], eax |
| ; X32-NEXT: mov eax, dword ptr [large_global+12] |
| ; X32-NEXT: mov dword ptr [esp + 16], eax |
| ; X32-NEXT: mov eax, dword ptr [large_global+8] |
| ; X32-NEXT: mov dword ptr [esp + 12], eax |
| ; X32-NEXT: mov eax, dword ptr [large_global+4] |
| ; X32-NEXT: mov dword ptr [esp + 8], eax |
| ; X32-NEXT: mov eax, dword ptr [large_global] |
| ; X32-NEXT: mov dword ptr [esp + 4], eax |
| ; X32-NEXT: jmp large_callee@PLT # TAILCALL |
| ; |
| ; X64-LABEL: large_caller_from_global: |
| ; X64: # %bb.0: # %entry |
| ; X64-NEXT: mov rax, qword ptr [rip + large_global@GOTPCREL] |
| ; X64-NEXT: mov ecx, dword ptr [rax + 16] |
| ; X64-NEXT: mov dword ptr [rsp + 24], ecx |
| ; X64-NEXT: mov rcx, qword ptr [rax] |
| ; X64-NEXT: mov rax, qword ptr [rax + 8] |
| ; X64-NEXT: mov qword ptr [rsp + 16], rax |
| ; X64-NEXT: mov qword ptr [rsp + 8], rcx |
| ; X64-NEXT: jmp large_callee@PLT # TAILCALL |
| entry: |
| musttail call void @large_callee(%twenty_bytes* byval(%twenty_bytes) align 4 @large_global) |
| ret void |
| } |