test/CodeGen/AArch64/sincos-stack-slots.ll - llvm-project/llvm - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s

 ; This file tests eliding stack slots when lowering the FSINCOS ISD node.

 define { float, float } @sincos_f32_value_return(float %x) {
 ; CHECK-LABEL: sincos_f32_value_return:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    add x0, sp, #12
 ; CHECK-NEXT:    add x1, sp, #8
 ; CHECK-NEXT:    bl sincosf
 ; CHECK-NEXT:    ldp s1, s0, [sp, #8]
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
 entry:
   %sin = tail call float @llvm.sin.f32(float %x)
   %cos = tail call float @llvm.cos.f32(float %x)
   %ret_0 = insertvalue { float, float } poison, float %sin, 0
   %ret_1 = insertvalue { float, float } %ret_0, float %cos, 1
   ret { float, float } %ret_1
 }

 define void @sincos_f32_ptr_return(float %x, ptr noalias %out_sin, ptr noalias %out_cos) {
 ; CHECK-LABEL: sincos_f32_ptr_return:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    bl sincosf
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
 entry:
   %sin = tail call float @llvm.sin.f32(float %x)
   %cos = tail call float @llvm.cos.f32(float %x)
   store float %sin, ptr %out_sin, align 4
   store float %cos, ptr %out_cos, align 4
   ret void
 }

 define float @sincos_f32_mixed_return(float %x, ptr %out_sin) {
 ; CHECK-LABEL: sincos_f32_mixed_return:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    add x1, sp, #12
 ; CHECK-NEXT:    bl sincosf
 ; CHECK-NEXT:    ldr s0, [sp, #12]
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
 entry:
   %sin = tail call float @llvm.sin.f32(float %x)
   %cos = tail call float @llvm.cos.f32(float %x)
   store float %sin, ptr %out_sin, align 4
   ret float %cos
 }

 define { double, double } @sincos_f64_value_return(double %x) {
 ; CHECK-LABEL: sincos_f64_value_return:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub sp, sp, #32
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    add x0, sp, #24
 ; CHECK-NEXT:    add x1, sp, #8
 ; CHECK-NEXT:    bl sincos
 ; CHECK-NEXT:    ldr d0, [sp, #24]
 ; CHECK-NEXT:    ldr d1, [sp, #8]
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-NEXT:    add sp, sp, #32
 ; CHECK-NEXT:    ret
 entry:
   %sin = tail call double @llvm.sin.f64(double %x)
   %cos = tail call double @llvm.cos.f64(double %x)
   %ret_0 = insertvalue { double, double } poison, double %sin, 0
   %ret_1 = insertvalue { double, double } %ret_0, double %cos, 1
   ret { double, double } %ret_1
 }

 define void @sincos_f64_ptr_return(double %x, ptr noalias %out_sin, ptr noalias %out_cos) {
 ; CHECK-LABEL: sincos_f64_ptr_return:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    bl sincos
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
 entry:
   %sin = tail call double @llvm.sin.f64(double %x)
   %cos = tail call double @llvm.cos.f64(double %x)
   store double %sin, ptr %out_sin, align 8
   store double %cos, ptr %out_cos, align 8
   ret void
 }

 define double @sincos_f64_mixed_return(double %x, ptr %out_sin) {
 ; CHECK-LABEL: sincos_f64_mixed_return:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    add x1, sp, #8
 ; CHECK-NEXT:    bl sincos
 ; CHECK-NEXT:    ldr d0, [sp, #8]
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
 entry:
   %sin = tail call double @llvm.sin.f64(double %x)
   %cos = tail call double @llvm.cos.f64(double %x)
   store double %sin, ptr %out_sin, align 8
   ret double %cos
 }

 ; Here %out_sin and %out_cos may alias so we can't replace both stores with the
 ; call to sincosf (as the order of stores in sincosf is not defined).
 define void @sincos_may_alias(float %x, ptr %out_sin, ptr %out_cos) {
 ; CHECK-LABEL: sincos_may_alias:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub sp, sp, #32
 ; CHECK-NEXT:    stp x30, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    mov x19, x1
 ; CHECK-NEXT:    add x1, sp, #12
 ; CHECK-NEXT:    bl sincosf
 ; CHECK-NEXT:    ldr s0, [sp, #12]
 ; CHECK-NEXT:    str s0, [x19]
 ; CHECK-NEXT:    ldp x30, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    add sp, sp, #32
 ; CHECK-NEXT:    ret
 entry:
   %sin = tail call float @llvm.sin.f32(float %x)
   %cos = tail call float @llvm.cos.f32(float %x)
   store float %sin, ptr %out_sin, align 4
   store float %cos, ptr %out_cos, align 4
   ret void
 }

 ; Here %out is used for both sin and cos (with the final value stored being cos).
 define float @sincos_multiple_uses(float %x, ptr %out) {
 ; CHECK-LABEL: sincos_multiple_uses:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    .cfi_offset w30, -16
 ; CHECK-NEXT:    mov x1, x0
 ; CHECK-NEXT:    add x0, sp, #12
 ; CHECK-NEXT:    bl sincosf
 ; CHECK-NEXT:    ldr s0, [sp, #12]
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
   %sin = call float @llvm.sin.f32(float %x)
   store float %sin, ptr %out, align 4
   %reload = load float, ptr %out, align 4
   %cos = call float @llvm.cos.f32(float %x)
   store float %cos, ptr %out, align 4
   ret float %reload
 }

 ; Negative test. We can't fold volatile stores into the library call.
 define void @sincos_volatile_result_stores(float %x, ptr noalias %out_sin, ptr noalias %out_cos) {
 ; CHECK-LABEL: sincos_volatile_result_stores:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    str x30, [sp, #-32]! // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
 ; CHECK-NEXT:    mov x19, x1
 ; CHECK-NEXT:    mov x20, x0
 ; CHECK-NEXT:    add x0, sp, #12
 ; CHECK-NEXT:    add x1, sp, #8
 ; CHECK-NEXT:    bl sincosf
 ; CHECK-NEXT:    ldp s1, s0, [sp, #8]
 ; CHECK-NEXT:    str s0, [x20]
 ; CHECK-NEXT:    str s1, [x19]
 ; CHECK-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x30, [sp], #32 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
 entry:
   %sin = tail call float @llvm.sin.f32(float %x)
   %cos = tail call float @llvm.cos.f32(float %x)
   store volatile float %sin, ptr %out_sin, align 4
   store volatile float %cos, ptr %out_cos, align 4
   ret void
 }

 ; Negative test. We can't fold atomic stores into the library call.
 define void @sincos_atomic_result_stores(float %x, ptr noalias %out_sin, ptr noalias %out_cos) {
 ; CHECK-LABEL: sincos_atomic_result_stores:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    str x30, [sp, #-32]! // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
 ; CHECK-NEXT:    mov x19, x1
 ; CHECK-NEXT:    mov x20, x0
 ; CHECK-NEXT:    add x0, sp, #12
 ; CHECK-NEXT:    add x1, sp, #8
 ; CHECK-NEXT:    bl sincosf
 ; CHECK-NEXT:    ldr w8, [sp, #12]
 ; CHECK-NEXT:    str w8, [x20]
 ; CHECK-NEXT:    ldr w8, [sp, #8]
 ; CHECK-NEXT:    str w8, [x19]
 ; CHECK-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x30, [sp], #32 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
 entry:
   %sin = tail call float @llvm.sin.f32(float %x)
   %cos = tail call float @llvm.cos.f32(float %x)
   store atomic float %sin, ptr %out_sin unordered, align 4
   store atomic float %cos, ptr %out_cos unordered, align 4
   ret void
 }

 ; Negative test. We can't fold misaligned stores into the library call.
 define void @sincos_misaligned_result_stores(double %x, ptr noalias %out_sin, ptr noalias %out_cos) {
 ; CHECK-LABEL: sincos_misaligned_result_stores:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    sub sp, sp, #48
 ; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #32] // 16-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 48
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -32
 ; CHECK-NEXT:    mov x19, x1
 ; CHECK-NEXT:    mov x20, x0
 ; CHECK-NEXT:    add x0, sp, #24
 ; CHECK-NEXT:    add x1, sp, #8
 ; CHECK-NEXT:    bl sincos
 ; CHECK-NEXT:    ldr d0, [sp, #24]
 ; CHECK-NEXT:    ldr d1, [sp, #8]
 ; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
 ; CHECK-NEXT:    str d0, [x20]
 ; CHECK-NEXT:    str d1, [x19]
 ; CHECK-NEXT:    ldp x20, x19, [sp, #32] // 16-byte Folded Reload
 ; CHECK-NEXT:    add sp, sp, #48
 ; CHECK-NEXT:    ret
 entry:
   %sin = tail call double @llvm.sin.f64(double %x)
   %cos = tail call double @llvm.cos.f64(double %x)
   store double %sin, ptr %out_sin, align 4
   store double %cos, ptr %out_cos, align 4
   ret void
 }

 declare void @foo(ptr, ptr)

 define void @can_fold_with_call_in_chain(float %x, ptr noalias %a, ptr noalias %b) {
 ; CHECK-LABEL: can_fold_with_call_in_chain:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    str d8, [sp, #-32]! // 8-byte Folded Spill
 ; CHECK-NEXT:    str x30, [sp, #8] // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-NEXT:    .cfi_offset w19, -8
 ; CHECK-NEXT:    .cfi_offset w20, -16
 ; CHECK-NEXT:    .cfi_offset w30, -24
 ; CHECK-NEXT:    .cfi_offset b8, -32
 ; CHECK-NEXT:    mov x19, x1
 ; CHECK-NEXT:    mov x20, x0
 ; CHECK-NEXT:    fmov s8, s0
 ; CHECK-NEXT:    bl foo
 ; CHECK-NEXT:    fmov s0, s8
 ; CHECK-NEXT:    mov x0, x20
 ; CHECK-NEXT:    mov x1, x19
 ; CHECK-NEXT:    bl sincosf
 ; CHECK-NEXT:    ldp x20, x19, [sp, #16] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldr x30, [sp, #8] // 8-byte Folded Reload
 ; CHECK-NEXT:    ldr d8, [sp], #32 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
 entry:
   %sin = tail call float @llvm.sin.f32(float %x)
   %cos = tail call float @llvm.cos.f32(float %x)
   call void @foo(ptr %a, ptr %b)
   store float %sin, ptr %a, align 4
   store float %cos, ptr %b, align 4
   ret void
 }
	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
	; RUN: llc -mtriple=aarch64-linux-gnu -o - %s \| FileCheck %s

	; This file tests eliding stack slots when lowering the FSINCOS ISD node.

	define { float, float } @sincos_f32_value_return(float %x) {
	; CHECK-LABEL: sincos_f32_value_return:
	; CHECK: // %bb.0: // %entry
	; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
	; CHECK-NEXT: .cfi_def_cfa_offset 16
	; CHECK-NEXT: .cfi_offset w30, -16
	; CHECK-NEXT: add x0, sp, #12
	; CHECK-NEXT: add x1, sp, #8
	; CHECK-NEXT: bl sincosf
	; CHECK-NEXT: ldp s1, s0, [sp, #8]
	; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
	; CHECK-NEXT: ret
	entry:
	%sin = tail call float @llvm.sin.f32(float %x)
	%cos = tail call float @llvm.cos.f32(float %x)
	%ret_0 = insertvalue { float, float } poison, float %sin, 0
	%ret_1 = insertvalue { float, float } %ret_0, float %cos, 1
	ret { float, float } %ret_1
	}

	define void @sincos_f32_ptr_return(float %x, ptr noalias %out_sin, ptr noalias %out_cos) {
	; CHECK-LABEL: sincos_f32_ptr_return:
	; CHECK: // %bb.0: // %entry
	; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
	; CHECK-NEXT: .cfi_def_cfa_offset 16
	; CHECK-NEXT: .cfi_offset w30, -16
	; CHECK-NEXT: bl sincosf
	; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
	; CHECK-NEXT: ret
	entry:
	%sin = tail call float @llvm.sin.f32(float %x)
	%cos = tail call float @llvm.cos.f32(float %x)
	store float %sin, ptr %out_sin, align 4
	store float %cos, ptr %out_cos, align 4
	ret void
	}

	define float @sincos_f32_mixed_return(float %x, ptr %out_sin) {
	; CHECK-LABEL: sincos_f32_mixed_return:
	; CHECK: // %bb.0: // %entry
	; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
	; CHECK-NEXT: .cfi_def_cfa_offset 16
	; CHECK-NEXT: .cfi_offset w30, -16
	; CHECK-NEXT: add x1, sp, #12
	; CHECK-NEXT: bl sincosf
	; CHECK-NEXT: ldr s0, [sp, #12]
	; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
	; CHECK-NEXT: ret
	entry:
	%sin = tail call float @llvm.sin.f32(float %x)
	%cos = tail call float @llvm.cos.f32(float %x)
	store float %sin, ptr %out_sin, align 4
	ret float %cos
	}

	define { double, double } @sincos_f64_value_return(double %x) {
	; CHECK-LABEL: sincos_f64_value_return:
	; CHECK: // %bb.0: // %entry
	; CHECK-NEXT: sub sp, sp, #32
	; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
	; CHECK-NEXT: .cfi_def_cfa_offset 32
	; CHECK-NEXT: .cfi_offset w30, -16
	; CHECK-NEXT: add x0, sp, #24
	; CHECK-NEXT: add x1, sp, #8
	; CHECK-NEXT: bl sincos
	; CHECK-NEXT: ldr d0, [sp, #24]
	; CHECK-NEXT: ldr d1, [sp, #8]
	; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
	; CHECK-NEXT: add sp, sp, #32
	; CHECK-NEXT: ret
	entry:
	%sin = tail call double @llvm.sin.f64(double %x)
	%cos = tail call double @llvm.cos.f64(double %x)
	%ret_0 = insertvalue { double, double } poison, double %sin, 0
	%ret_1 = insertvalue { double, double } %ret_0, double %cos, 1
	ret { double, double } %ret_1
	}

	define void @sincos_f64_ptr_return(double %x, ptr noalias %out_sin, ptr noalias %out_cos) {
	; CHECK-LABEL: sincos_f64_ptr_return:
	; CHECK: // %bb.0: // %entry
	; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
	; CHECK-NEXT: .cfi_def_cfa_offset 16
	; CHECK-NEXT: .cfi_offset w30, -16
	; CHECK-NEXT: bl sincos
	; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
	; CHECK-NEXT: ret
	entry:
	%sin = tail call double @llvm.sin.f64(double %x)
	%cos = tail call double @llvm.cos.f64(double %x)
	store double %sin, ptr %out_sin, align 8
	store double %cos, ptr %out_cos, align 8
	ret void
	}

	define double @sincos_f64_mixed_return(double %x, ptr %out_sin) {
	; CHECK-LABEL: sincos_f64_mixed_return:
	; CHECK: // %bb.0: // %entry
	; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
	; CHECK-NEXT: .cfi_def_cfa_offset 16
	; CHECK-NEXT: .cfi_offset w30, -16
	; CHECK-NEXT: add x1, sp, #8
	; CHECK-NEXT: bl sincos
	; CHECK-NEXT: ldr d0, [sp, #8]
	; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
	; CHECK-NEXT: ret
	entry:
	%sin = tail call double @llvm.sin.f64(double %x)
	%cos = tail call double @llvm.cos.f64(double %x)
	store double %sin, ptr %out_sin, align 8
	ret double %cos
	}

	; Here %out_sin and %out_cos may alias so we can't replace both stores with the
	; call to sincosf (as the order of stores in sincosf is not defined).
	define void @sincos_may_alias(float %x, ptr %out_sin, ptr %out_cos) {
	; CHECK-LABEL: sincos_may_alias:
	; CHECK: // %bb.0: // %entry
	; CHECK-NEXT: sub sp, sp, #32
	; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
	; CHECK-NEXT: .cfi_def_cfa_offset 32
	; CHECK-NEXT: .cfi_offset w19, -8
	; CHECK-NEXT: .cfi_offset w30, -16
	; CHECK-NEXT: mov x19, x1
	; CHECK-NEXT: add x1, sp, #12
	; CHECK-NEXT: bl sincosf
	; CHECK-NEXT: ldr s0, [sp, #12]
	; CHECK-NEXT: str s0, [x19]
	; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
	; CHECK-NEXT: add sp, sp, #32
	; CHECK-NEXT: ret
	entry:
	%sin = tail call float @llvm.sin.f32(float %x)
	%cos = tail call float @llvm.cos.f32(float %x)
	store float %sin, ptr %out_sin, align 4
	store float %cos, ptr %out_cos, align 4
	ret void
	}

	; Here %out is used for both sin and cos (with the final value stored being cos).
	define float @sincos_multiple_uses(float %x, ptr %out) {
	; CHECK-LABEL: sincos_multiple_uses:
	; CHECK: // %bb.0:
	; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
	; CHECK-NEXT: .cfi_def_cfa_offset 16
	; CHECK-NEXT: .cfi_offset w30, -16
	; CHECK-NEXT: mov x1, x0
	; CHECK-NEXT: add x0, sp, #12
	; CHECK-NEXT: bl sincosf
	; CHECK-NEXT: ldr s0, [sp, #12]
	; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
	; CHECK-NEXT: ret
	%sin = call float @llvm.sin.f32(float %x)
	store float %sin, ptr %out, align 4
	%reload = load float, ptr %out, align 4
	%cos = call float @llvm.cos.f32(float %x)
	store float %cos, ptr %out, align 4
	ret float %reload
	}

	; Negative test. We can't fold volatile stores into the library call.
	define void @sincos_volatile_result_stores(float %x, ptr noalias %out_sin, ptr noalias %out_cos) {
	; CHECK-LABEL: sincos_volatile_result_stores:
	; CHECK: // %bb.0: // %entry
	; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
	; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
	; CHECK-NEXT: .cfi_def_cfa_offset 32
	; CHECK-NEXT: .cfi_offset w19, -8
	; CHECK-NEXT: .cfi_offset w20, -16
	; CHECK-NEXT: .cfi_offset w30, -32
	; CHECK-NEXT: mov x19, x1
	; CHECK-NEXT: mov x20, x0
	; CHECK-NEXT: add x0, sp, #12
	; CHECK-NEXT: add x1, sp, #8
	; CHECK-NEXT: bl sincosf
	; CHECK-NEXT: ldp s1, s0, [sp, #8]
	; CHECK-NEXT: str s0, [x20]
	; CHECK-NEXT: str s1, [x19]
	; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
	; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
	; CHECK-NEXT: ret
	entry:
	%sin = tail call float @llvm.sin.f32(float %x)
	%cos = tail call float @llvm.cos.f32(float %x)
	store volatile float %sin, ptr %out_sin, align 4
	store volatile float %cos, ptr %out_cos, align 4
	ret void
	}

	; Negative test. We can't fold atomic stores into the library call.
	define void @sincos_atomic_result_stores(float %x, ptr noalias %out_sin, ptr noalias %out_cos) {
	; CHECK-LABEL: sincos_atomic_result_stores:
	; CHECK: // %bb.0: // %entry
	; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
	; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
	; CHECK-NEXT: .cfi_def_cfa_offset 32
	; CHECK-NEXT: .cfi_offset w19, -8
	; CHECK-NEXT: .cfi_offset w20, -16
	; CHECK-NEXT: .cfi_offset w30, -32
	; CHECK-NEXT: mov x19, x1
	; CHECK-NEXT: mov x20, x0
	; CHECK-NEXT: add x0, sp, #12
	; CHECK-NEXT: add x1, sp, #8
	; CHECK-NEXT: bl sincosf
	; CHECK-NEXT: ldr w8, [sp, #12]
	; CHECK-NEXT: str w8, [x20]
	; CHECK-NEXT: ldr w8, [sp, #8]
	; CHECK-NEXT: str w8, [x19]
	; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
	; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
	; CHECK-NEXT: ret
	entry:
	%sin = tail call float @llvm.sin.f32(float %x)
	%cos = tail call float @llvm.cos.f32(float %x)
	store atomic float %sin, ptr %out_sin unordered, align 4
	store atomic float %cos, ptr %out_cos unordered, align 4
	ret void
	}

	; Negative test. We can't fold misaligned stores into the library call.
	define void @sincos_misaligned_result_stores(double %x, ptr noalias %out_sin, ptr noalias %out_cos) {
	; CHECK-LABEL: sincos_misaligned_result_stores:
	; CHECK: // %bb.0: // %entry
	; CHECK-NEXT: sub sp, sp, #48
	; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
	; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
	; CHECK-NEXT: .cfi_def_cfa_offset 48
	; CHECK-NEXT: .cfi_offset w19, -8
	; CHECK-NEXT: .cfi_offset w20, -16
	; CHECK-NEXT: .cfi_offset w30, -32
	; CHECK-NEXT: mov x19, x1
	; CHECK-NEXT: mov x20, x0
	; CHECK-NEXT: add x0, sp, #24
	; CHECK-NEXT: add x1, sp, #8
	; CHECK-NEXT: bl sincos
	; CHECK-NEXT: ldr d0, [sp, #24]
	; CHECK-NEXT: ldr d1, [sp, #8]
	; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
	; CHECK-NEXT: str d0, [x20]
	; CHECK-NEXT: str d1, [x19]
	; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
	; CHECK-NEXT: add sp, sp, #48
	; CHECK-NEXT: ret
	entry:
	%sin = tail call double @llvm.sin.f64(double %x)
	%cos = tail call double @llvm.cos.f64(double %x)
	store double %sin, ptr %out_sin, align 4
	store double %cos, ptr %out_cos, align 4
	ret void
	}

	declare void @foo(ptr, ptr)

	define void @can_fold_with_call_in_chain(float %x, ptr noalias %a, ptr noalias %b) {
	; CHECK-LABEL: can_fold_with_call_in_chain:
	; CHECK: // %bb.0: // %entry
	; CHECK-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill
	; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill
	; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
	; CHECK-NEXT: .cfi_def_cfa_offset 32
	; CHECK-NEXT: .cfi_offset w19, -8
	; CHECK-NEXT: .cfi_offset w20, -16
	; CHECK-NEXT: .cfi_offset w30, -24
	; CHECK-NEXT: .cfi_offset b8, -32
	; CHECK-NEXT: mov x19, x1
	; CHECK-NEXT: mov x20, x0
	; CHECK-NEXT: fmov s8, s0
	; CHECK-NEXT: bl foo
	; CHECK-NEXT: fmov s0, s8
	; CHECK-NEXT: mov x0, x20
	; CHECK-NEXT: mov x1, x19
	; CHECK-NEXT: bl sincosf
	; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
	; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
	; CHECK-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload
	; CHECK-NEXT: ret
	entry:
	%sin = tail call float @llvm.sin.f32(float %x)
	%cos = tail call float @llvm.cos.f32(float %x)
	call void @foo(ptr %a, ptr %b)
	store float %sin, ptr %a, align 4
	store float %cos, ptr %b, align 4
	ret void
	}