llvm/test/CodeGen/X86/arg-copy-elide.ll - llvm-project - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=i686-windows < %s | FileCheck %s

 declare void @addrof_i1(i1*)
 declare void @addrof_i32(i32*)
 declare void @addrof_i64(i64*)
 declare void @addrof_i128(i128*)
 declare void @addrof_i32_x3(i32*, i32*, i32*)

 define void @simple(i32 %x) {
 ; CHECK-LABEL: simple:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    calll _addrof_i32
 ; CHECK-NEXT:    addl $4, %esp
 ; CHECK-NEXT:    retl
 entry:
   %x.addr = alloca i32
   store i32 %x, i32* %x.addr
   call void @addrof_i32(i32* %x.addr)
   ret void
 }

 ; We need to load %x before calling addrof_i32 now because it could mutate %x in
 ; place.

 define i32 @use_arg(i32 %x) {
 ; CHECK-LABEL: use_arg:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushl %esi
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    calll _addrof_i32
 ; CHECK-NEXT:    addl $4, %esp
 ; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    popl %esi
 ; CHECK-NEXT:    retl
 entry:
   %x.addr = alloca i32
   store i32 %x, i32* %x.addr
   call void @addrof_i32(i32* %x.addr)
   ret i32 %x
 }

 ; We won't copy elide for types needing legalization such as i64 or i1.

 define i64 @split_i64(i64 %x) {
 ; CHECK-LABEL: split_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushl %edi
 ; CHECK-NEXT:    pushl %esi
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    calll _addrof_i64
 ; CHECK-NEXT:    addl $4, %esp
 ; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    movl %edi, %edx
 ; CHECK-NEXT:    popl %esi
 ; CHECK-NEXT:    popl %edi
 ; CHECK-NEXT:    retl
 entry:
   %x.addr = alloca i64, align 4
   store i64 %x, i64* %x.addr, align 4
   call void @addrof_i64(i64* %x.addr)
   ret i64 %x
 }

 define i1 @i1_arg(i1 %x) {
 ; CHECK-LABEL: i1_arg:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pushl %ebx
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %bl
 ; CHECK-NEXT:    movl %ebx, %eax
 ; CHECK-NEXT:    andb $1, %al
 ; CHECK-NEXT:    movb %al, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    calll _addrof_i1
 ; CHECK-NEXT:    addl $4, %esp
 ; CHECK-NEXT:    movl %ebx, %eax
 ; CHECK-NEXT:    addl $4, %esp
 ; CHECK-NEXT:    popl %ebx
 ; CHECK-NEXT:    retl
   %x.addr = alloca i1
   store i1 %x, i1* %x.addr
   call void @addrof_i1(i1* %x.addr)
   ret i1 %x
 }

 ; We can't copy elide when an i64 is split between registers and memory in a
 ; fastcc function.

 define fastcc i64 @fastcc_split_i64(i64* %p, i64 %x) {
 ; CHECK-LABEL: fastcc_split_i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushl %edi
 ; CHECK-NEXT:    pushl %esi
 ; CHECK-NEXT:    subl $8, %esp
 ; CHECK-NEXT:    movl %edx, %esi
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; CHECK-NEXT:    movl %edi, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movl %edx, (%esp)
 ; CHECK-NEXT:    movl %esp, %eax
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    calll _addrof_i64
 ; CHECK-NEXT:    addl $4, %esp
 ; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    movl %edi, %edx
 ; CHECK-NEXT:    addl $8, %esp
 ; CHECK-NEXT:    popl %esi
 ; CHECK-NEXT:    popl %edi
 ; CHECK-NEXT:    retl
 entry:
   %x.addr = alloca i64, align 4
   store i64 %x, i64* %x.addr, align 4
   call void @addrof_i64(i64* %x.addr)
   ret i64 %x
 }

 ; We can't copy elide when it would reduce the user requested alignment.

 define void @high_alignment(i32 %x) {
 ; CHECK-LABEL: high_alignment:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushl %ebp
 ; CHECK-NEXT:    movl %esp, %ebp
 ; CHECK-NEXT:    andl $-128, %esp
 ; CHECK-NEXT:    subl $128, %esp
 ; CHECK-NEXT:    movl 8(%ebp), %eax
 ; CHECK-NEXT:    movl %eax, (%esp)
 ; CHECK-NEXT:    movl %esp, %eax
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    calll _addrof_i32
 ; CHECK-NEXT:    addl $4, %esp
 ; CHECK-NEXT:    movl %ebp, %esp
 ; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    retl
 entry:
   %x.p = alloca i32, align 128
   store i32 %x, i32* %x.p
   call void @addrof_i32(i32* %x.p)
   ret void
 }

 ; We can't copy elide when it would reduce the ABI required alignment.
 ; FIXME: We should lower the ABI alignment of i64 on Windows, since MSVC
 ; doesn't guarantee it.

 define void @abi_alignment(i64 %x) {
 ; CHECK-LABEL: abi_alignment:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushl %ebp
 ; CHECK-NEXT:    movl %esp, %ebp
 ; CHECK-NEXT:    andl $-8, %esp
 ; CHECK-NEXT:    subl $8, %esp
 ; CHECK-NEXT:    movl 8(%ebp), %eax
 ; CHECK-NEXT:    movl 12(%ebp), %ecx
 ; CHECK-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movl %eax, (%esp)
 ; CHECK-NEXT:    movl %esp, %eax
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    calll _addrof_i64
 ; CHECK-NEXT:    addl $4, %esp
 ; CHECK-NEXT:    movl %ebp, %esp
 ; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    retl
 entry:
   %x.p = alloca i64
   store i64 %x, i64* %x.p
   call void @addrof_i64(i64* %x.p)
   ret void
 }

 ; The code we generate for this is unimportant. This is mostly a crash test.

 define void @split_i128(i128* %sret, i128 %x) {
 ; CHECK-LABEL: split_i128:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushl %ebp
 ; CHECK-NEXT:    movl %esp, %ebp
 ; CHECK-NEXT:    pushl %ebx
 ; CHECK-NEXT:    pushl %edi
 ; CHECK-NEXT:    pushl %esi
 ; CHECK-NEXT:    andl $-8, %esp
 ; CHECK-NEXT:    subl $32, %esp
 ; CHECK-NEXT:    movl 12(%ebp), %eax
 ; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; CHECK-NEXT:    movl 16(%ebp), %ebx
 ; CHECK-NEXT:    movl 20(%ebp), %esi
 ; CHECK-NEXT:    movl 24(%ebp), %edi
 ; CHECK-NEXT:    movl %edi, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movl %esi, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    calll _addrof_i128
 ; CHECK-NEXT:    addl $4, %esp
 ; CHECK-NEXT:    movl 8(%ebp), %eax
 ; CHECK-NEXT:    movl %edi, 12(%eax)
 ; CHECK-NEXT:    movl %esi, 8(%eax)
 ; CHECK-NEXT:    movl %ebx, 4(%eax)
 ; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; CHECK-NEXT:    movl %ecx, (%eax)
 ; CHECK-NEXT:    leal -12(%ebp), %esp
 ; CHECK-NEXT:    popl %esi
 ; CHECK-NEXT:    popl %edi
 ; CHECK-NEXT:    popl %ebx
 ; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    retl
 entry:
   %x.addr = alloca i128
   store i128 %x, i128* %x.addr
   call void @addrof_i128(i128* %x.addr)
   store i128 %x, i128* %sret
   ret void
 }

 ; Check that we load all of x, y, and z before the call.

 define i32 @three_args(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: three_args:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushl %esi
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; CHECK-NEXT:    addl {{[0-9]+}}(%esp), %esi
 ; CHECK-NEXT:    addl {{[0-9]+}}(%esp), %esi
 ; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %ecx
 ; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %edx
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    pushl %ecx
 ; CHECK-NEXT:    pushl %edx
 ; CHECK-NEXT:    calll _addrof_i32_x3
 ; CHECK-NEXT:    addl $12, %esp
 ; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    popl %esi
 ; CHECK-NEXT:    retl
 entry:
   %z.addr = alloca i32, align 4
   %y.addr = alloca i32, align 4
   %x.addr = alloca i32, align 4
   store i32 %z, i32* %z.addr, align 4
   store i32 %y, i32* %y.addr, align 4
   store i32 %x, i32* %x.addr, align 4
   call void @addrof_i32_x3(i32* %x.addr, i32* %y.addr, i32* %z.addr)
   %s1 = add i32 %x, %y
   %sum = add i32 %s1, %z
   ret i32 %sum
 }

 define void @two_args_same_alloca(i32 %x, i32 %y) {
 ; CHECK-LABEL: two_args_same_alloca:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl %eax, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    calll _addrof_i32
 ; CHECK-NEXT:    addl $4, %esp
 ; CHECK-NEXT:    retl
 entry:
   %x.addr = alloca i32
   store i32 %x, i32* %x.addr
   store i32 %y, i32* %x.addr
   call void @addrof_i32(i32* %x.addr)
   ret void
 }

 define void @avoid_byval(i32* byval(i32) %x) {
 ; CHECK-LABEL: avoid_byval:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl %eax, (%esp)
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    calll _addrof_i32
 ; CHECK-NEXT:    addl $4, %esp
 ; CHECK-NEXT:    popl %eax
 ; CHECK-NEXT:    retl
 entry:
   %x.p.p = alloca i32*
   store i32* %x, i32** %x.p.p
   call void @addrof_i32(i32* %x)
   ret void
 }

 define void @avoid_inalloca(i32* inalloca(i32) %x) {
 ; CHECK-LABEL: avoid_inalloca:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl %eax, (%esp)
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    calll _addrof_i32
 ; CHECK-NEXT:    addl $4, %esp
 ; CHECK-NEXT:    popl %eax
 ; CHECK-NEXT:    retl
 entry:
   %x.p.p = alloca i32*
   store i32* %x, i32** %x.p.p
   call void @addrof_i32(i32* %x)
   ret void
 }

 define void @avoid_preallocated(i32* preallocated(i32) %x) {
 ; CHECK-LABEL: avoid_preallocated:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl %eax, (%esp)
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    calll _addrof_i32
 ; CHECK-NEXT:    addl $4, %esp
 ; CHECK-NEXT:    popl %eax
 ; CHECK-NEXT:    retl
 entry:
   %x.p.p = alloca i32*
   store i32* %x, i32** %x.p.p
   call void @addrof_i32(i32* %x)
   ret void
 }

 ; Don't elide the copy when the alloca is escaped with a store.
 define void @escape_with_store(i32 %x) {
 ; CHECK-LABEL: escape_with_store:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    subl $8, %esp
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl %esp, %ecx
 ; CHECK-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    movl %eax, (%esp)
 ; CHECK-NEXT:    pushl %ecx
 ; CHECK-NEXT:    calll _addrof_i32
 ; CHECK-NEXT:    addl $12, %esp
 ; CHECK-NEXT:    retl
   %x1 = alloca i32
   %x2 = alloca i32*
   store i32* %x1, i32** %x2
   %x3 = load i32*, i32** %x2
   store i32 0, i32* %x3
   store i32 %x, i32* %x1
   call void @addrof_i32(i32* %x1)
   ret void
 }

 ; This test case exposed issues with the use of TokenFactor.

 define void @sret_and_elide(i32* sret(i32) %sret, i32 %v) {
 ; CHECK-LABEL: sret_and_elide:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pushl %edi
 ; CHECK-NEXT:    pushl %esi
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; CHECK-NEXT:    leal {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    calll _addrof_i32
 ; CHECK-NEXT:    addl $4, %esp
 ; CHECK-NEXT:    movl %edi, (%esi)
 ; CHECK-NEXT:    movl %esi, %eax
 ; CHECK-NEXT:    popl %esi
 ; CHECK-NEXT:    popl %edi
 ; CHECK-NEXT:    retl
   %v.p = alloca i32
   store i32 %v, i32* %v.p
   call void @addrof_i32(i32* %v.p)
   store i32 %v, i32* %sret
   ret void
 }

 define void @avoid_partially_initialized_alloca(i32 %x) {
 ; CHECK-LABEL: avoid_partially_initialized_alloca:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pushl %ebp
 ; CHECK-NEXT:    movl %esp, %ebp
 ; CHECK-NEXT:    andl $-8, %esp
 ; CHECK-NEXT:    subl $8, %esp
 ; CHECK-NEXT:    movl 8(%ebp), %eax
 ; CHECK-NEXT:    movl %eax, (%esp)
 ; CHECK-NEXT:    movl %esp, %eax
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    calll _addrof_i32
 ; CHECK-NEXT:    addl $4, %esp
 ; CHECK-NEXT:    movl %ebp, %esp
 ; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    retl
   %a = alloca i64
   %p = bitcast i64* %a to i32*
   store i32 %x, i32* %p
   call void @addrof_i32(i32* %p)
   ret void
 }

 ; Ensure no copy elision happens as the two i3 values fed into icmp may have
 ; garbage in the upper bits, a truncation is needed.

 define i1 @use_i3(i3 %a1, i3 %a2) {
 ; CHECK-LABEL: use_i3:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pushl %eax
 ; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; CHECK-NEXT:    andb $7, %al
 ; CHECK-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; CHECK-NEXT:    andb $7, %cl
 ; CHECK-NEXT:    movb %cl, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    cmpb %cl, %al
 ; CHECK-NEXT:    sete %al
 ; CHECK-NEXT:    popl %ecx
 ; CHECK-NEXT:    retl
   %tmp = alloca i3
   store i3 %a2, i3* %tmp
   %val = load i3, i3* %tmp
   %res = icmp eq i3 %a1, %val
   ret i1 %res
 }
	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -mtriple=i686-windows < %s \| FileCheck %s

	declare void @addrof_i1(i1*)
	declare void @addrof_i32(i32*)
	declare void @addrof_i64(i64*)
	declare void @addrof_i128(i128*)
	declare void @addrof_i32_x3(i32, i32, i32*)

	define void @simple(i32 %x) {
	; CHECK-LABEL: simple:
	; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax
	; CHECK-NEXT: pushl %eax
	; CHECK-NEXT: calll _addrof_i32
	; CHECK-NEXT: addl $4, %esp
	; CHECK-NEXT: retl
	entry:
	%x.addr = alloca i32
	store i32 %x, i32* %x.addr
	call void @addrof_i32(i32* %x.addr)
	ret void
	}

	; We need to load %x before calling addrof_i32 now because it could mutate %x in
	; place.

	define i32 @use_arg(i32 %x) {
	; CHECK-LABEL: use_arg:
	; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: pushl %esi
	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
	; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax
	; CHECK-NEXT: pushl %eax
	; CHECK-NEXT: calll _addrof_i32
	; CHECK-NEXT: addl $4, %esp
	; CHECK-NEXT: movl %esi, %eax
	; CHECK-NEXT: popl %esi
	; CHECK-NEXT: retl
	entry:
	%x.addr = alloca i32
	store i32 %x, i32* %x.addr
	call void @addrof_i32(i32* %x.addr)
	ret i32 %x
	}

	; We won't copy elide for types needing legalization such as i64 or i1.

	define i64 @split_i64(i64 %x) {
	; CHECK-LABEL: split_i64:
	; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: pushl %edi
	; CHECK-NEXT: pushl %esi
	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
	; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax
	; CHECK-NEXT: pushl %eax
	; CHECK-NEXT: calll _addrof_i64
	; CHECK-NEXT: addl $4, %esp
	; CHECK-NEXT: movl %esi, %eax
	; CHECK-NEXT: movl %edi, %edx
	; CHECK-NEXT: popl %esi
	; CHECK-NEXT: popl %edi
	; CHECK-NEXT: retl
	entry:
	%x.addr = alloca i64, align 4
	store i64 %x, i64* %x.addr, align 4
	call void @addrof_i64(i64* %x.addr)
	ret i64 %x
	}

	define i1 @i1_arg(i1 %x) {
	; CHECK-LABEL: i1_arg:
	; CHECK: # %bb.0:
	; CHECK-NEXT: pushl %ebx
	; CHECK-NEXT: pushl %eax
	; CHECK-NEXT: movb {{[0-9]+}}(%esp), %bl
	; CHECK-NEXT: movl %ebx, %eax
	; CHECK-NEXT: andb $1, %al
	; CHECK-NEXT: movb %al, {{[0-9]+}}(%esp)
	; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax
	; CHECK-NEXT: pushl %eax
	; CHECK-NEXT: calll _addrof_i1
	; CHECK-NEXT: addl $4, %esp
	; CHECK-NEXT: movl %ebx, %eax
	; CHECK-NEXT: addl $4, %esp
	; CHECK-NEXT: popl %ebx
	; CHECK-NEXT: retl
	%x.addr = alloca i1
	store i1 %x, i1* %x.addr
	call void @addrof_i1(i1* %x.addr)
	ret i1 %x
	}

	; We can't copy elide when an i64 is split between registers and memory in a
	; fastcc function.

	define fastcc i64 @fastcc_split_i64(i64* %p, i64 %x) {
	; CHECK-LABEL: fastcc_split_i64:
	; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: pushl %edi
	; CHECK-NEXT: pushl %esi
	; CHECK-NEXT: subl $8, %esp
	; CHECK-NEXT: movl %edx, %esi
	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
	; CHECK-NEXT: movl %edi, {{[0-9]+}}(%esp)
	; CHECK-NEXT: movl %edx, (%esp)
	; CHECK-NEXT: movl %esp, %eax
	; CHECK-NEXT: pushl %eax
	; CHECK-NEXT: calll _addrof_i64
	; CHECK-NEXT: addl $4, %esp
	; CHECK-NEXT: movl %esi, %eax
	; CHECK-NEXT: movl %edi, %edx
	; CHECK-NEXT: addl $8, %esp
	; CHECK-NEXT: popl %esi
	; CHECK-NEXT: popl %edi
	; CHECK-NEXT: retl
	entry:
	%x.addr = alloca i64, align 4
	store i64 %x, i64* %x.addr, align 4
	call void @addrof_i64(i64* %x.addr)
	ret i64 %x
	}

	; We can't copy elide when it would reduce the user requested alignment.

	define void @high_alignment(i32 %x) {
	; CHECK-LABEL: high_alignment:
	; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: pushl %ebp
	; CHECK-NEXT: movl %esp, %ebp
	; CHECK-NEXT: andl $-128, %esp
	; CHECK-NEXT: subl $128, %esp
	; CHECK-NEXT: movl 8(%ebp), %eax
	; CHECK-NEXT: movl %eax, (%esp)
	; CHECK-NEXT: movl %esp, %eax
	; CHECK-NEXT: pushl %eax
	; CHECK-NEXT: calll _addrof_i32
	; CHECK-NEXT: addl $4, %esp
	; CHECK-NEXT: movl %ebp, %esp
	; CHECK-NEXT: popl %ebp
	; CHECK-NEXT: retl
	entry:
	%x.p = alloca i32, align 128
	store i32 %x, i32* %x.p
	call void @addrof_i32(i32* %x.p)
	ret void
	}

	; We can't copy elide when it would reduce the ABI required alignment.
	; FIXME: We should lower the ABI alignment of i64 on Windows, since MSVC
	; doesn't guarantee it.

	define void @abi_alignment(i64 %x) {
	; CHECK-LABEL: abi_alignment:
	; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: pushl %ebp
	; CHECK-NEXT: movl %esp, %ebp
	; CHECK-NEXT: andl $-8, %esp
	; CHECK-NEXT: subl $8, %esp
	; CHECK-NEXT: movl 8(%ebp), %eax
	; CHECK-NEXT: movl 12(%ebp), %ecx
	; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
	; CHECK-NEXT: movl %eax, (%esp)
	; CHECK-NEXT: movl %esp, %eax
	; CHECK-NEXT: pushl %eax
	; CHECK-NEXT: calll _addrof_i64
	; CHECK-NEXT: addl $4, %esp
	; CHECK-NEXT: movl %ebp, %esp
	; CHECK-NEXT: popl %ebp
	; CHECK-NEXT: retl
	entry:
	%x.p = alloca i64
	store i64 %x, i64* %x.p
	call void @addrof_i64(i64* %x.p)
	ret void
	}

	; The code we generate for this is unimportant. This is mostly a crash test.

	define void @split_i128(i128* %sret, i128 %x) {
	; CHECK-LABEL: split_i128:
	; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: pushl %ebp
	; CHECK-NEXT: movl %esp, %ebp
	; CHECK-NEXT: pushl %ebx
	; CHECK-NEXT: pushl %edi
	; CHECK-NEXT: pushl %esi
	; CHECK-NEXT: andl $-8, %esp
	; CHECK-NEXT: subl $32, %esp
	; CHECK-NEXT: movl 12(%ebp), %eax
	; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
	; CHECK-NEXT: movl 16(%ebp), %ebx
	; CHECK-NEXT: movl 20(%ebp), %esi
	; CHECK-NEXT: movl 24(%ebp), %edi
	; CHECK-NEXT: movl %edi, {{[0-9]+}}(%esp)
	; CHECK-NEXT: movl %esi, {{[0-9]+}}(%esp)
	; CHECK-NEXT: movl %ebx, {{[0-9]+}}(%esp)
	; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
	; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax
	; CHECK-NEXT: pushl %eax
	; CHECK-NEXT: calll _addrof_i128
	; CHECK-NEXT: addl $4, %esp
	; CHECK-NEXT: movl 8(%ebp), %eax
	; CHECK-NEXT: movl %edi, 12(%eax)
	; CHECK-NEXT: movl %esi, 8(%eax)
	; CHECK-NEXT: movl %ebx, 4(%eax)
	; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
	; CHECK-NEXT: movl %ecx, (%eax)
	; CHECK-NEXT: leal -12(%ebp), %esp
	; CHECK-NEXT: popl %esi
	; CHECK-NEXT: popl %edi
	; CHECK-NEXT: popl %ebx
	; CHECK-NEXT: popl %ebp
	; CHECK-NEXT: retl
	entry:
	%x.addr = alloca i128
	store i128 %x, i128* %x.addr
	call void @addrof_i128(i128* %x.addr)
	store i128 %x, i128* %sret
	ret void
	}

	; Check that we load all of x, y, and z before the call.

	define i32 @three_args(i32 %x, i32 %y, i32 %z) {
	; CHECK-LABEL: three_args:
	; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: pushl %esi
	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
	; CHECK-NEXT: addl {{[0-9]+}}(%esp), %esi
	; CHECK-NEXT: addl {{[0-9]+}}(%esp), %esi
	; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax
	; CHECK-NEXT: leal {{[0-9]+}}(%esp), %ecx
	; CHECK-NEXT: leal {{[0-9]+}}(%esp), %edx
	; CHECK-NEXT: pushl %eax
	; CHECK-NEXT: pushl %ecx
	; CHECK-NEXT: pushl %edx
	; CHECK-NEXT: calll _addrof_i32_x3
	; CHECK-NEXT: addl $12, %esp
	; CHECK-NEXT: movl %esi, %eax
	; CHECK-NEXT: popl %esi
	; CHECK-NEXT: retl
	entry:
	%z.addr = alloca i32, align 4
	%y.addr = alloca i32, align 4
	%x.addr = alloca i32, align 4
	store i32 %z, i32* %z.addr, align 4
	store i32 %y, i32* %y.addr, align 4
	store i32 %x, i32* %x.addr, align 4
	call void @addrof_i32_x3(i32* %x.addr, i32* %y.addr, i32* %z.addr)
	%s1 = add i32 %x, %y
	%sum = add i32 %s1, %z
	ret i32 %sum
	}

	define void @two_args_same_alloca(i32 %x, i32 %y) {
	; CHECK-LABEL: two_args_same_alloca:
	; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
	; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
	; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax
	; CHECK-NEXT: pushl %eax
	; CHECK-NEXT: calll _addrof_i32
	; CHECK-NEXT: addl $4, %esp
	; CHECK-NEXT: retl
	entry:
	%x.addr = alloca i32
	store i32 %x, i32* %x.addr
	store i32 %y, i32* %x.addr
	call void @addrof_i32(i32* %x.addr)
	ret void
	}

	define void @avoid_byval(i32* byval(i32) %x) {
	; CHECK-LABEL: avoid_byval:
	; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: pushl %eax
	; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax
	; CHECK-NEXT: movl %eax, (%esp)
	; CHECK-NEXT: pushl %eax
	; CHECK-NEXT: calll _addrof_i32
	; CHECK-NEXT: addl $4, %esp
	; CHECK-NEXT: popl %eax
	; CHECK-NEXT: retl
	entry:
	%x.p.p = alloca i32*
	store i32* %x, i32** %x.p.p
	call void @addrof_i32(i32* %x)
	ret void
	}

	define void @avoid_inalloca(i32* inalloca(i32) %x) {
	; CHECK-LABEL: avoid_inalloca:
	; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: pushl %eax
	; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax
	; CHECK-NEXT: movl %eax, (%esp)
	; CHECK-NEXT: pushl %eax
	; CHECK-NEXT: calll _addrof_i32
	; CHECK-NEXT: addl $4, %esp
	; CHECK-NEXT: popl %eax
	; CHECK-NEXT: retl
	entry:
	%x.p.p = alloca i32*
	store i32* %x, i32** %x.p.p
	call void @addrof_i32(i32* %x)
	ret void
	}

	define void @avoid_preallocated(i32* preallocated(i32) %x) {
	; CHECK-LABEL: avoid_preallocated:
	; CHECK: # %bb.0: # %entry
	; CHECK-NEXT: pushl %eax
	; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax
	; CHECK-NEXT: movl %eax, (%esp)
	; CHECK-NEXT: pushl %eax
	; CHECK-NEXT: calll _addrof_i32
	; CHECK-NEXT: addl $4, %esp
	; CHECK-NEXT: popl %eax
	; CHECK-NEXT: retl
	entry:
	%x.p.p = alloca i32*
	store i32* %x, i32** %x.p.p
	call void @addrof_i32(i32* %x)
	ret void
	}

	; Don't elide the copy when the alloca is escaped with a store.
	define void @escape_with_store(i32 %x) {
	; CHECK-LABEL: escape_with_store:
	; CHECK: # %bb.0:
	; CHECK-NEXT: subl $8, %esp
	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
	; CHECK-NEXT: movl %esp, %ecx
	; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
	; CHECK-NEXT: movl %eax, (%esp)
	; CHECK-NEXT: pushl %ecx
	; CHECK-NEXT: calll _addrof_i32
	; CHECK-NEXT: addl $12, %esp
	; CHECK-NEXT: retl
	%x1 = alloca i32
	%x2 = alloca i32*
	store i32* %x1, i32** %x2
	%x3 = load i32, i32* %x2
	store i32 0, i32* %x3
	store i32 %x, i32* %x1
	call void @addrof_i32(i32* %x1)
	ret void
	}

	; This test case exposed issues with the use of TokenFactor.

	define void @sret_and_elide(i32* sret(i32) %sret, i32 %v) {
	; CHECK-LABEL: sret_and_elide:
	; CHECK: # %bb.0:
	; CHECK-NEXT: pushl %edi
	; CHECK-NEXT: pushl %esi
	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
	; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi
	; CHECK-NEXT: leal {{[0-9]+}}(%esp), %eax
	; CHECK-NEXT: pushl %eax
	; CHECK-NEXT: calll _addrof_i32
	; CHECK-NEXT: addl $4, %esp
	; CHECK-NEXT: movl %edi, (%esi)
	; CHECK-NEXT: movl %esi, %eax
	; CHECK-NEXT: popl %esi
	; CHECK-NEXT: popl %edi
	; CHECK-NEXT: retl
	%v.p = alloca i32
	store i32 %v, i32* %v.p
	call void @addrof_i32(i32* %v.p)
	store i32 %v, i32* %sret
	ret void
	}

	define void @avoid_partially_initialized_alloca(i32 %x) {
	; CHECK-LABEL: avoid_partially_initialized_alloca:
	; CHECK: # %bb.0:
	; CHECK-NEXT: pushl %ebp
	; CHECK-NEXT: movl %esp, %ebp
	; CHECK-NEXT: andl $-8, %esp
	; CHECK-NEXT: subl $8, %esp
	; CHECK-NEXT: movl 8(%ebp), %eax
	; CHECK-NEXT: movl %eax, (%esp)
	; CHECK-NEXT: movl %esp, %eax
	; CHECK-NEXT: pushl %eax
	; CHECK-NEXT: calll _addrof_i32
	; CHECK-NEXT: addl $4, %esp
	; CHECK-NEXT: movl %ebp, %esp
	; CHECK-NEXT: popl %ebp
	; CHECK-NEXT: retl
	%a = alloca i64
	%p = bitcast i64* %a to i32*
	store i32 %x, i32* %p
	call void @addrof_i32(i32* %p)
	ret void
	}

	; Ensure no copy elision happens as the two i3 values fed into icmp may have
	; garbage in the upper bits, a truncation is needed.

	define i1 @use_i3(i3 %a1, i3 %a2) {
	; CHECK-LABEL: use_i3:
	; CHECK: # %bb.0:
	; CHECK-NEXT: pushl %eax
	; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al
	; CHECK-NEXT: andb $7, %al
	; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl
	; CHECK-NEXT: andb $7, %cl
	; CHECK-NEXT: movb %cl, {{[0-9]+}}(%esp)
	; CHECK-NEXT: cmpb %cl, %al
	; CHECK-NEXT: sete %al
	; CHECK-NEXT: popl %ecx
	; CHECK-NEXT: retl
	%tmp = alloca i3
	store i3 %a2, i3* %tmp
	%val = load i3, i3* %tmp
	%res = icmp eq i3 %a1, %val
	ret i1 %res
	}