|  | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 | 
|  | ; RUN: llc -aarch64-streaming-hazard-size=0 < %s | FileCheck %s | 
|  |  | 
|  | target triple = "aarch64-unknown-unknown-eabi-elf" | 
|  |  | 
|  | ; This test verifies that call arguments and results are not coalesced | 
|  | ; with SVE vector registers by the coalescer, such that no 'mul vl' | 
|  | ; ldr/str pairs are generated in the streaming-mode-changing call | 
|  | ; sequence. | 
|  |  | 
|  | ; | 
|  | ; Scalar arguments | 
|  | ; | 
|  |  | 
|  | define void @dont_coalesce_arg_i8(i8 %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_i8: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    fmov s0, w0 | 
|  | ; CHECK-NEXT:    mov x19, x1 | 
|  | ; CHECK-NEXT:    str z0, [sp] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl use_i8 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.b | 
|  | ; CHECK-NEXT:    ldr z0, [sp] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1b { z0.b }, p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %vec = insertelement <vscale x 16 x i8> poison, i8 %arg, i32 0 | 
|  | call void @use_i8(i8 %arg) | 
|  | store <vscale x 16 x i8> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_arg_i16(i16 %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_i16: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    fmov s0, w0 | 
|  | ; CHECK-NEXT:    mov x19, x1 | 
|  | ; CHECK-NEXT:    str z0, [sp] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl use_i16 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.h | 
|  | ; CHECK-NEXT:    ldr z0, [sp] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1h { z0.h }, p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %vec = insertelement <vscale x 8 x i16> poison, i16 %arg, i32 0 | 
|  | call void @use_i16(i16 %arg) | 
|  | store <vscale x 8 x i16> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_arg_i32(i32 %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_i32: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    fmov s0, w0 | 
|  | ; CHECK-NEXT:    mov x19, x1 | 
|  | ; CHECK-NEXT:    str z0, [sp] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl use_i32 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.s | 
|  | ; CHECK-NEXT:    ldr z0, [sp] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1w { z0.s }, p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %vec = insertelement <vscale x 4 x i32> poison, i32 %arg, i32 0 | 
|  | call void @use_i32(i32 %arg) | 
|  | store <vscale x 4 x i32> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_arg_i64(i64 %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_i64: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    fmov d0, x0 | 
|  | ; CHECK-NEXT:    mov x19, x1 | 
|  | ; CHECK-NEXT:    str z0, [sp] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl use_i64 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.d | 
|  | ; CHECK-NEXT:    ldr z0, [sp] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1d { z0.d }, p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %vec = insertelement <vscale x 2 x i64> poison, i64 %arg, i32 0 | 
|  | call void @use_i64(i64 %arg) | 
|  | store <vscale x 2 x i64> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_arg_f16(half %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_f16: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    sub sp, sp, #16 | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0 | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0 | 
|  | ; CHECK-NEXT:    str h0, [sp, #14] // 2-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    ldr h0, [sp, #14] // 2-byte Folded Reload | 
|  | ; CHECK-NEXT:    bl use_f16 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.h | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    ldr z0, [x8] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1h { z0.h }, p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    add sp, sp, #16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %vec = insertelement <vscale x 8 x half> poison, half %arg, i32 0 | 
|  | call void @use_f16(half %arg) | 
|  | store <vscale x 8 x half> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_arg_f32(float %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_f32: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    sub sp, sp, #16 | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    // kill: def $s0 killed $s0 def $z0 | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $z0 | 
|  | ; CHECK-NEXT:    str s0, [sp, #12] // 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    ldr s0, [sp, #12] // 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    bl use_f32 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.s | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    ldr z0, [x8] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1w { z0.s }, p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    add sp, sp, #16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %vec = insertelement <vscale x 4 x float> poison, float %arg, i32 0 | 
|  | call void @use_f32(float %arg) | 
|  | store <vscale x 4 x float> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_arg_f64(double %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_f64: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    sub sp, sp, #16 | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0 | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0 | 
|  | ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    bl use_f64 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.d | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    ldr z0, [x8] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1d { z0.d }, p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    add sp, sp, #16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %vec = insertelement <vscale x 2 x double> poison, double %arg, i32 0 | 
|  | call void @use_f64(double %arg) | 
|  | store <vscale x 2 x double> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  |  | 
|  | ; | 
|  | ; Single-element vector arguments | 
|  | ; | 
|  |  | 
|  | define void @dont_coalesce_arg_v1i8(<1 x i8> %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_v1i8: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    sub sp, sp, #16 | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0 | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0 | 
|  | ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    bl use_v16i8 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.b | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    ldr z0, [x8] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1b { z0.b }, p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    add sp, sp, #16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %elt = extractelement <1 x i8> %arg, i32 0 | 
|  | %vec = insertelement <vscale x 16 x i8> poison, i8 %elt, i32 0 | 
|  | call void @use_v16i8(<1 x i8> %arg) | 
|  | store <vscale x 16 x i8> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_arg_v1i16(<1 x i16> %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_v1i16: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    sub sp, sp, #16 | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0 | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0 | 
|  | ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    bl use_v8i16 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.h | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    ldr z0, [x8] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1h { z0.h }, p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    add sp, sp, #16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %elt = extractelement <1 x i16> %arg, i32 0 | 
|  | %vec = insertelement <vscale x 8 x i16> poison, i16 %elt, i32 0 | 
|  | call void @use_v8i16(<1 x i16> %arg) | 
|  | store <vscale x 8 x i16> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_arg_v1i32(<1 x i32> %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_v1i32: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    sub sp, sp, #16 | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0 | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0 | 
|  | ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    bl use_v4i32 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.s | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    ldr z0, [x8] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1w { z0.s }, p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    add sp, sp, #16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %elt = extractelement <1 x i32> %arg, i32 0 | 
|  | %vec = insertelement <vscale x 4 x i32> poison, i32 %elt, i32 0 | 
|  | call void @use_v4i32(<1 x i32> %arg) | 
|  | store <vscale x 4 x i32> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_arg_v1i64(<1 x i64> %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_v1i64: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    sub sp, sp, #16 | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0 | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0 | 
|  | ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    bl use_v2i64 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.d | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    ldr z0, [x8] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1d { z0.d }, p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    add sp, sp, #16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %elt = extractelement <1 x i64> %arg, i32 0 | 
|  | %vec = insertelement <vscale x 2 x i64> poison, i64 %elt, i32 0 | 
|  | call void @use_v2i64(<1 x i64> %arg) | 
|  | store <vscale x 2 x i64> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_arg_v1f16(<1 x half> %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_v1f16: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    sub sp, sp, #16 | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0 | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    // kill: def $h0 killed $h0 killed $z0 | 
|  | ; CHECK-NEXT:    str h0, [sp, #14] // 2-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    ldr h0, [sp, #14] // 2-byte Folded Reload | 
|  | ; CHECK-NEXT:    bl use_v8f16 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.h | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    ldr z0, [x8] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1h { z0.h }, p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    add sp, sp, #16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %elt = extractelement <1 x half> %arg, i32 0 | 
|  | %vec = insertelement <vscale x 8 x half> poison, half %elt, i32 0 | 
|  | call void @use_v8f16(<1 x half> %arg) | 
|  | store <vscale x 8 x half> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_arg_v1f32(<1 x float> %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_v1f32: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    sub sp, sp, #16 | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0 | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0 | 
|  | ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    bl use_v4f32 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.s | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    ldr z0, [x8] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1w { z0.s }, p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    add sp, sp, #16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %elt = extractelement <1 x float> %arg, i32 0 | 
|  | %vec = insertelement <vscale x 4 x float> poison, float %elt, i32 0 | 
|  | call void @use_v4f32(<1 x float> %arg) | 
|  | store <vscale x 4 x float> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_arg_v1f64(<1 x double> %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_v1f64: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    sub sp, sp, #16 | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0 | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0 | 
|  | ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    bl use_v2f64 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.d | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    ldr z0, [x8] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1d { z0.d }, p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    add sp, sp, #16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %elt = extractelement <1 x double> %arg, i32 0 | 
|  | %vec = insertelement <vscale x 2 x double> poison, double %elt, i32 0 | 
|  | call void @use_v2f64(<1 x double> %arg) | 
|  | store <vscale x 2 x double> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; | 
|  | ; Full vector arguments | 
|  | ; | 
|  |  | 
|  | define void @dont_coalesce_arg_v16i8(<16 x i8> %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_v16i8: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    sub sp, sp, #16 | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0 | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0 | 
|  | ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    bl use_v16i8 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.b | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    ldr z0, [x8] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1b { z0.b }, p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    add sp, sp, #16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %vec = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> %arg, i64 0) | 
|  | call void @use_v16i8(<16 x i8> %arg) | 
|  | store <vscale x 16 x i8> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_arg_v8i16(<8 x i16> %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_v8i16: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    sub sp, sp, #16 | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0 | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0 | 
|  | ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    bl use_v8i16 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.h | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    ldr z0, [x8] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1h { z0.h }, p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    add sp, sp, #16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %vec = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> %arg, i64 0) | 
|  | call void @use_v8i16(<8 x i16> %arg) | 
|  | store <vscale x 8 x i16> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_arg_v4i32(<4 x i32> %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_v4i32: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    sub sp, sp, #16 | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0 | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0 | 
|  | ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    bl use_v4i32 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.s | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    ldr z0, [x8] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1w { z0.s }, p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    add sp, sp, #16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %vec = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> %arg, i64 0) | 
|  | call void @use_v4i32(<4 x i32> %arg) | 
|  | store <vscale x 4 x i32> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_arg_v2i64(<2 x i64> %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_v2i64: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    sub sp, sp, #16 | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0 | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0 | 
|  | ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    bl use_v2i64 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.d | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    ldr z0, [x8] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1d { z0.d }, p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    add sp, sp, #16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %vec = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> %arg, i64 0) | 
|  | call void @use_v2i64(<2 x i64> %arg) | 
|  | store <vscale x 2 x i64> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_arg_v8f16(<8 x half> %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_v8f16: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    sub sp, sp, #16 | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0 | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0 | 
|  | ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    bl use_v8f16 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.h | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    ldr z0, [x8] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1h { z0.h }, p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    add sp, sp, #16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %vec = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> poison, <8 x half> %arg, i64 0) | 
|  | call void @use_v8f16(<8 x half> %arg) | 
|  | store <vscale x 8 x half> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_arg_v8bf16(<8 x bfloat> %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_v8bf16: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    sub sp, sp, #16 | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0 | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0 | 
|  | ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    bl use_v8bf16 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.h | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    ldr z0, [x8] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1h { z0.h }, p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    add sp, sp, #16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %vec = call <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> poison, <8 x bfloat> %arg, i64 0) | 
|  | call void @use_v8bf16(<8 x bfloat> %arg) | 
|  | store <vscale x 8 x bfloat> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_arg_v4f32(<4 x float> %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_v4f32: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    sub sp, sp, #16 | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0 | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0 | 
|  | ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    bl use_v4f32 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.d | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    ldr z0, [x8] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1d { z0.d }, p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    add sp, sp, #16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %vec = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> %arg, i64 0) | 
|  | call void @use_v4f32(<4 x float> %arg) | 
|  | store <vscale x 4 x float> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_arg_v2f64(<2 x double> %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_v2f64: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    sub sp, sp, #16 | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0 | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    str z0, [x8] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0 | 
|  | ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    bl use_v2f64 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.d | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    ldr z0, [x8] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1d { z0.d }, p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    add sp, sp, #16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %vec = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> poison, <2 x double> %arg, i64 0) | 
|  | call void @use_v2f64(<2 x double> %arg) | 
|  | store <vscale x 2 x double> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; | 
|  | ; <8 x i1> type will need type promotion. | 
|  | ; | 
|  | define void @dont_coalesce_arg_v8i1(<8 x i1> %arg, ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_arg_v8i1: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x29, x30, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x9, x19, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    sub sp, sp, #16 | 
|  | ; CHECK-NEXT:    addvl sp, sp, #-1 | 
|  | ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0 | 
|  | ; CHECK-NEXT:    mov z1.d, z0.d | 
|  | ; CHECK-NEXT:    ptrue p0.b | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0 | 
|  | ; CHECK-NEXT:    and z1.b, z1.b, #0x1 | 
|  | ; CHECK-NEXT:    cmpne p0.b, p0/z, z1.b, #0 | 
|  | ; CHECK-NEXT:    str p0, [x8, #7, mul vl] // 2-byte Folded Spill | 
|  | ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    bl use_v8i1 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    add x8, sp, #16 | 
|  | ; CHECK-NEXT:    ldr p0, [x8, #7, mul vl] // 2-byte Folded Reload | 
|  | ; CHECK-NEXT:    str p0, [x19] | 
|  | ; CHECK-NEXT:    addvl sp, sp, #1 | 
|  | ; CHECK-NEXT:    add sp, sp, #16 | 
|  | ; CHECK-NEXT:    ldp x29, x30, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #88] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %vec = call <vscale x 16 x i1> @llvm.vector.insert.nxv8i1.v8i1(<vscale x 16 x i1> poison, <8 x i1> %arg, i64 0) | 
|  | call void @use_v8i1(<8 x i1> %arg) | 
|  | store <vscale x 16 x i1> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; | 
|  | ; Scalar return values | 
|  | ; | 
|  |  | 
|  | define void @dont_coalesce_res_i8(ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_res_i8: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x30, x9, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    str x19, [sp, #80] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl get_i8 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.b | 
|  | ; CHECK-NEXT:    fmov s0, w0 | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1b { z0.b }, p0, [x19] | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #80] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %res = call i8 @get_i8() | 
|  | %vec = insertelement <vscale x 16 x i8> poison, i8 %res, i32 0 | 
|  | store <vscale x 16 x i8> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_res_i16(ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_res_i16: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x30, x9, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    str x19, [sp, #80] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl get_i16 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.h | 
|  | ; CHECK-NEXT:    fmov s0, w0 | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1h { z0.h }, p0, [x19] | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #80] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %res = call i16 @get_i16() | 
|  | %vec = insertelement <vscale x 8 x i16> poison, i16 %res, i32 0 | 
|  | store <vscale x 8 x i16> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_res_i32(ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_res_i32: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x30, x9, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    str x19, [sp, #80] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl get_i32 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.s | 
|  | ; CHECK-NEXT:    fmov s0, w0 | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1w { z0.s }, p0, [x19] | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #80] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %res = call i32 @get_i32() | 
|  | %vec = insertelement <vscale x 4 x i32> poison, i32 %res, i32 0 | 
|  | store <vscale x 4 x i32> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_res_i64(ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_res_i64: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #-96]! // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x30, x9, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    str x19, [sp, #80] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl get_i64 | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.d | 
|  | ; CHECK-NEXT:    fmov d0, x0 | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1d { z0.d }, p0, [x19] | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #80] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp], #96 // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ret | 
|  | %res = call i64 @get_i64() | 
|  | %vec = insertelement <vscale x 2 x i64> poison, i64 %res, i32 0 | 
|  | store <vscale x 2 x i64> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_res_f16(ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_res_f16: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    sub sp, sp, #112 | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x30, x9, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    str x19, [sp, #96] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl get_f16 | 
|  | ; CHECK-NEXT:    str h0, [sp, #14] // 2-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ldr h0, [sp, #14] // 2-byte Folded Reload | 
|  | ; CHECK-NEXT:    ptrue p0.h | 
|  | ; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0 | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1h { z0.h }, p0, [x19] | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #96] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    add sp, sp, #112 | 
|  | ; CHECK-NEXT:    ret | 
|  | %res = call half @get_f16() | 
|  | %vec = insertelement <vscale x 8 x half> poison, half %res, i32 0 | 
|  | store <vscale x 8 x half> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_res_f32(ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_res_f32: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    sub sp, sp, #112 | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x30, x9, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    str x19, [sp, #96] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl get_f32 | 
|  | ; CHECK-NEXT:    str s0, [sp, #12] // 4-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.s | 
|  | ; CHECK-NEXT:    ldr s0, [sp, #12] // 4-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1w { z0.s }, p0, [x19] | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #96] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    add sp, sp, #112 | 
|  | ; CHECK-NEXT:    ret | 
|  | %res = call float @get_f32() | 
|  | %vec = insertelement <vscale x 4 x float> poison, float %res, i32 0 | 
|  | store <vscale x 4 x float> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_res_f64(ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_res_f64: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    sub sp, sp, #112 | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x30, x9, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    str x19, [sp, #96] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl get_f64 | 
|  | ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.d | 
|  | ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1d { z0.d }, p0, [x19] | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #96] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    add sp, sp, #112 | 
|  | ; CHECK-NEXT:    ret | 
|  | %res = call double @get_f64() | 
|  | %vec = insertelement <vscale x 2 x double> poison, double %res, i32 0 | 
|  | store <vscale x 2 x double> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; | 
|  | ; Single-element vector result values | 
|  | ; | 
|  |  | 
|  | define void @dont_coalesce_res_v1i8(ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_res_v1i8: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    sub sp, sp, #112 | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x30, x9, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    str x19, [sp, #96] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl get_v1i8 | 
|  | ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.b | 
|  | ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1b { z0.b }, p0, [x19] | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #96] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    add sp, sp, #112 | 
|  | ; CHECK-NEXT:    ret | 
|  | %res = call <1 x i8> @get_v1i8() | 
|  | %elt = extractelement <1 x i8> %res, i32 0 | 
|  | %vec = insertelement <vscale x 16 x i8> poison, i8 %elt, i32 0 | 
|  | store <vscale x 16 x i8> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_res_v1i16(ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_res_v1i16: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    sub sp, sp, #112 | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x30, x9, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    str x19, [sp, #96] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl get_v1i16 | 
|  | ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.h | 
|  | ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1h { z0.h }, p0, [x19] | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #96] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    add sp, sp, #112 | 
|  | ; CHECK-NEXT:    ret | 
|  | %res = call <1 x i16> @get_v1i16() | 
|  | %elt = extractelement <1 x i16> %res, i32 0 | 
|  | %vec = insertelement <vscale x 8 x i16> poison, i16 %elt, i32 0 | 
|  | store <vscale x 8 x i16> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_res_v1i32(ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_res_v1i32: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    sub sp, sp, #112 | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x30, x9, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    str x19, [sp, #96] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl get_v1i32 | 
|  | ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.s | 
|  | ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1w { z0.s }, p0, [x19] | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #96] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    add sp, sp, #112 | 
|  | ; CHECK-NEXT:    ret | 
|  | %res = call <1 x i32> @get_v1i32() | 
|  | %elt = extractelement <1 x i32> %res, i32 0 | 
|  | %vec = insertelement <vscale x 4 x i32> poison, i32 %elt, i32 0 | 
|  | store <vscale x 4 x i32> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_res_v1i64(ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_res_v1i64: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    sub sp, sp, #112 | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x30, x9, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    str x19, [sp, #96] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl get_v1i64 | 
|  | ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.d | 
|  | ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1d { z0.d }, p0, [x19] | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #96] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    add sp, sp, #112 | 
|  | ; CHECK-NEXT:    ret | 
|  | %res = call <1 x i64> @get_v1i64() | 
|  | %elt = extractelement <1 x i64> %res, i32 0 | 
|  | %vec = insertelement <vscale x 2 x i64> poison, i64 %elt, i32 0 | 
|  | store <vscale x 2 x i64> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_res_v1f16(ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_res_v1f16: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    sub sp, sp, #112 | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x30, x9, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    str x19, [sp, #96] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl get_v1f16 | 
|  | ; CHECK-NEXT:    str h0, [sp, #14] // 2-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ldr h0, [sp, #14] // 2-byte Folded Reload | 
|  | ; CHECK-NEXT:    ptrue p0.h | 
|  | ; CHECK-NEXT:    // kill: def $h0 killed $h0 def $z0 | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1h { z0.h }, p0, [x19] | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #96] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    add sp, sp, #112 | 
|  | ; CHECK-NEXT:    ret | 
|  | %res = call <1 x half> @get_v1f16() | 
|  | %elt = extractelement <1 x half> %res, i32 0 | 
|  | %vec = insertelement <vscale x 8 x half> poison, half %elt, i32 0 | 
|  | store <vscale x 8 x half> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_res_v1f32(ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_res_v1f32: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    sub sp, sp, #112 | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x30, x9, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    str x19, [sp, #96] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl get_v1f32 | 
|  | ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.s | 
|  | ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1w { z0.s }, p0, [x19] | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #96] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    add sp, sp, #112 | 
|  | ; CHECK-NEXT:    ret | 
|  | %res = call <1 x float> @get_v1f32() | 
|  | %elt = extractelement <1 x float> %res, i32 0 | 
|  | %vec = insertelement <vscale x 4 x float> poison, float %elt, i32 0 | 
|  | store <vscale x 4 x float> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_res_v1f64(ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_res_v1f64: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    sub sp, sp, #112 | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x30, x9, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    str x19, [sp, #96] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl get_v1f64 | 
|  | ; CHECK-NEXT:    str d0, [sp, #8] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ptrue p0.d | 
|  | ; CHECK-NEXT:    ldr d0, [sp, #8] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1d { z0.d }, p0, [x19] | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #96] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    add sp, sp, #112 | 
|  | ; CHECK-NEXT:    ret | 
|  | %res = call <1 x double> @get_v1f64() | 
|  | %elt = extractelement <1 x double> %res, i32 0 | 
|  | %vec = insertelement <vscale x 2 x double> poison, double %elt, i32 0 | 
|  | store <vscale x 2 x double> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | ; | 
|  | ; Full vector result values | 
|  | ; | 
|  |  | 
|  | define void @dont_coalesce_res_v16i8(ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_res_v16i8: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    sub sp, sp, #112 | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x30, x9, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    str x19, [sp, #96] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl get_v16i8 | 
|  | ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ptrue p0.b | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0 | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1b { z0.b }, p0, [x19] | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #96] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    add sp, sp, #112 | 
|  | ; CHECK-NEXT:    ret | 
|  | %res = call <16 x i8> @get_v16i8() | 
|  | %vec = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> %res, i64 0) | 
|  | store <vscale x 16 x i8> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_res_v8i16(ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_res_v8i16: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    sub sp, sp, #112 | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x30, x9, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    str x19, [sp, #96] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl get_v8i16 | 
|  | ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ptrue p0.h | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0 | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1h { z0.h }, p0, [x19] | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #96] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    add sp, sp, #112 | 
|  | ; CHECK-NEXT:    ret | 
|  | %res = call <8 x i16> @get_v8i16() | 
|  | %vec = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> %res, i64 0) | 
|  | store <vscale x 8 x i16> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_res_v4i32(ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_res_v4i32: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    sub sp, sp, #112 | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x30, x9, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    str x19, [sp, #96] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl get_v4i32 | 
|  | ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ptrue p0.s | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0 | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1w { z0.s }, p0, [x19] | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #96] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    add sp, sp, #112 | 
|  | ; CHECK-NEXT:    ret | 
|  | %res = call <4 x i32> @get_v4i32() | 
|  | %vec = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> %res, i64 0) | 
|  | store <vscale x 4 x i32> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_res_v2i64(ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_res_v2i64: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    sub sp, sp, #112 | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x30, x9, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    str x19, [sp, #96] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl get_v2i64 | 
|  | ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ptrue p0.d | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0 | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1d { z0.d }, p0, [x19] | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #96] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    add sp, sp, #112 | 
|  | ; CHECK-NEXT:    ret | 
|  | %res = call <2 x i64> @get_v2i64() | 
|  | %vec = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> %res, i64 0) | 
|  | store <vscale x 2 x i64> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_res_v8f16(ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_res_v8f16: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    sub sp, sp, #112 | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x30, x9, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    str x19, [sp, #96] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl get_v8f16 | 
|  | ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ptrue p0.h | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0 | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1h { z0.h }, p0, [x19] | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #96] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    add sp, sp, #112 | 
|  | ; CHECK-NEXT:    ret | 
|  | %res = call <8 x half> @get_v8f16() | 
|  | %vec = call <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> poison, <8 x half> %res, i64 0) | 
|  | store <vscale x 8 x half> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_res_v4f32(ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_res_v4f32: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    sub sp, sp, #112 | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x30, x9, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    str x19, [sp, #96] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl get_v4f32 | 
|  | ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ptrue p0.s | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0 | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1w { z0.s }, p0, [x19] | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #96] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    add sp, sp, #112 | 
|  | ; CHECK-NEXT:    ret | 
|  | %res = call <4 x float> @get_v4f32() | 
|  | %vec = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> %res, i64 0) | 
|  | store <vscale x 4 x float> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | define void @dont_coalesce_res_v2f64(ptr %ptr) #0 { | 
|  | ; CHECK-LABEL: dont_coalesce_res_v2f64: | 
|  | ; CHECK:       // %bb.0: | 
|  | ; CHECK-NEXT:    sub sp, sp, #112 | 
|  | ; CHECK-NEXT:    cntd x9 | 
|  | ; CHECK-NEXT:    stp d15, d14, [sp, #16] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d13, d12, [sp, #32] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d11, d10, [sp, #48] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp d9, d8, [sp, #64] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    stp x30, x9, [sp, #80] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    str x19, [sp, #96] // 8-byte Folded Spill | 
|  | ; CHECK-NEXT:    mov x19, x0 | 
|  | ; CHECK-NEXT:    smstop sm | 
|  | ; CHECK-NEXT:    bl get_v2f64 | 
|  | ; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill | 
|  | ; CHECK-NEXT:    smstart sm | 
|  | ; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ptrue p0.d | 
|  | ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0 | 
|  | ; CHECK-NEXT:    ldp d9, d8, [sp, #64] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    st1d { z0.d }, p0, [x19] | 
|  | ; CHECK-NEXT:    ldp d11, d10, [sp, #48] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x19, [sp, #96] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldr x30, [sp, #80] // 8-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d13, d12, [sp, #32] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    ldp d15, d14, [sp, #16] // 16-byte Folded Reload | 
|  | ; CHECK-NEXT:    add sp, sp, #112 | 
|  | ; CHECK-NEXT:    ret | 
|  | %res = call <2 x double> @get_v2f64() | 
|  | %vec = call <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> poison, <2 x double> %res, i64 0) | 
|  | store <vscale x 2 x double> %vec, ptr %ptr | 
|  | ret void | 
|  | } | 
|  |  | 
|  | declare half @get_f16() | 
|  | declare float @get_f32() | 
|  | declare double @get_f64() | 
|  | declare <1 x half> @get_v1f16() | 
|  | declare <1 x float> @get_v1f32() | 
|  | declare <1 x double> @get_v1f64() | 
|  | declare <8 x half> @get_v8f16() | 
|  | declare <4 x float> @get_v4f32() | 
|  | declare <2 x double> @get_v2f64() | 
|  |  | 
|  | declare i8 @get_i8() | 
|  | declare i16 @get_i16() | 
|  | declare i32 @get_i32() | 
|  | declare i64 @get_i64() | 
|  | declare <1 x i8> @get_v1i8() | 
|  | declare <1 x i16> @get_v1i16() | 
|  | declare <1 x i32> @get_v1i32() | 
|  | declare <2 x i64> @get_v1i64() | 
|  | declare <16 x i8> @get_v16i8() | 
|  | declare <8 x i16> @get_v8i16() | 
|  | declare <4 x i32> @get_v4i32() | 
|  | declare <2 x i64> @get_v2i64() | 
|  |  | 
|  | declare void @use_f16(half) | 
|  | declare void @use_f32(float) | 
|  | declare void @use_f64(double) | 
|  | declare void @use_v1f16(<1 x half>) | 
|  | declare void @use_v1f32(<1 x float>) | 
|  | declare void @use_v1f64(<1 x double>) | 
|  | declare void @use_v8f16(<8 x half>) | 
|  | declare void @use_v8bf16(<8 x bfloat>) | 
|  | declare void @use_v4f32(<4 x float>) | 
|  | declare void @use_v2f64(<2 x double>) | 
|  |  | 
|  | declare void @use_i8(i8) | 
|  | declare void @use_i16(i16) | 
|  | declare void @use_i32(i32) | 
|  | declare void @use_i64(i64) | 
|  | declare void @use_v1i8(<1 x i8>) | 
|  | declare void @use_v1i16(<1 x i16>) | 
|  | declare void @use_v1i32(<1 x i32>) | 
|  | declare void @use_v1i64(<1 x i64>) | 
|  | declare void @use_v16i8(<16 x i8>) | 
|  | declare void @use_v8i16(<8 x i16>) | 
|  | declare void @use_v4i32(<4 x i32>) | 
|  | declare void @use_v2i64(<2 x i64>) | 
|  | declare void @use_v8i1(<8 x i1>) | 
|  |  | 
|  | declare <vscale x 16 x i1> @llvm.vector.insert.nxv8i1.v8i1(<vscale x 16 x i1>, <8 x i1>, i64) | 
|  | declare <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8>, <16 x i8>, i64) | 
|  | declare <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16>, <8 x i16>, i64) | 
|  | declare <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32>, <4 x i32>, i64) | 
|  | declare <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64>, <2 x i64>, i64) | 
|  | declare <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half>, <8 x half>, i64) | 
|  | declare <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float>, <4 x float>, i64) | 
|  | declare <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double>, <2 x double>, i64) | 
|  |  | 
|  | attributes #0 = { nounwind "aarch64_pstate_sm_enabled" "target-features"="+sve,+sme"  } |