llvm/test/CodeGen/AArch64/aarch64-no-mov-spill-chain.ll - llvm-project - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
 ; RUN: llc -mtriple=aarch64-linux-gnu -o - -O3 %s | FileCheck %s

 ; During times of high register pressure, Greedy register allocation
 ; may emit large mov spill chains on AArch64. The Spill Copy Elimination
 ; pass can simplify these chains, and improve runtime performance. For situations
 ; where this is likely, we need to ensure it is simplifying the register allocation

 define void @_test(ptr readonly nocapture %0, ptr readonly nocapture %1, ptr readonly nocapture %2, ptr readonly nocapture %3, ptr readonly nocapture %4, ptr readonly nocapture %5, ptr readonly nocapture %6, ptr readonly nocapture %7, ptr readonly nocapture %8, ptr readonly nocapture %9, ptr readonly nocapture %10, ptr readonly nocapture %11, ptr readonly nocapture %12, ptr readonly nocapture %13, ptr readonly nocapture %14, ptr readonly nocapture %15) #0 {
 ; CHECK-LABEL: _test:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x30, [sp, #-96]! // 8-byte Folded Spill
 ; CHECK-NEXT:    stp x28, x27, [sp, #16] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x26, x25, [sp, #32] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
 ; CHECK-NEXT:    sub sp, sp, #1, lsl #12 // =4096
 ; CHECK-NEXT:    sub sp, sp, #3648
 ; CHECK-NEXT:    ldr x9, [sp, #7896]
 ; CHECK-NEXT:    ldr x10, [sp, #7888]
 ; CHECK-NEXT:    add x17, sp, #1, lsl #12 // =4096
 ; CHECK-NEXT:    ldr x11, [sp, #7880]
 ; CHECK-NEXT:    ldr x12, [sp, #7872]
 ; CHECK-NEXT:    add x18, sp, #1, lsl #12 // =4096
 ; CHECK-NEXT:    ldr x13, [sp, #7864]
 ; CHECK-NEXT:    ldr x14, [sp, #7856]
 ; CHECK-NEXT:    add x19, sp, #1, lsl #12 // =4096
 ; CHECK-NEXT:    ldr x15, [sp, #7848]
 ; CHECK-NEXT:    ldr x16, [sp, #7840]
 ; CHECK-NEXT:    add x20, sp, #1, lsl #12 // =4096
 ; CHECK-NEXT:    add x21, sp, #1, lsl #12 // =4096
 ; CHECK-NEXT:    add x22, sp, #1, lsl #12 // =4096
 ; CHECK-NEXT:    mov x8, xzr
 ; CHECK-NEXT:    add x17, x17, #3148
 ; CHECK-NEXT:    add x18, x18, #2148
 ; CHECK-NEXT:    add x19, x19, #1648
 ; CHECK-NEXT:    add x20, x20, #2648
 ; CHECK-NEXT:    add x21, x21, #1148
 ; CHECK-NEXT:    add x22, x22, #148
 ; CHECK-NEXT:    add x23, sp, #3744
 ; CHECK-NEXT:    add x24, sp, #3244
 ; CHECK-NEXT:    add x25, sp, #2244
 ; CHECK-NEXT:    add x26, sp, #1744
 ; CHECK-NEXT:    add x27, sp, #244
 ; CHECK-NEXT:    mov w28, #1 // =0x1
 ; CHECK-NEXT:  .LBB0_1: // %.preheader167
 ; CHECK-NEXT:    // =>This Loop Header: Depth=1
 ; CHECK-NEXT:    // Child Loop BB0_2 Depth 2
 ; CHECK-NEXT:    // Child Loop BB0_3 Depth 3
 ; CHECK-NEXT:    stp x28, x27, [sp, #8] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov x30, x0
 ; CHECK-NEXT:    mov x28, x18
 ; CHECK-NEXT:    stp x0, x1, [sp, #168] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov x0, x9
 ; CHECK-NEXT:    stp x18, x17, [sp, #88] // 16-byte Folded Spill
 ; CHECK-NEXT:    mov x18, x17
 ; CHECK-NEXT:    mov w17, #1 // =0x1
 ; CHECK-NEXT:    stp x2, x3, [sp, #184] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x4, x5, [sp, #200] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x6, x7, [sp, #216] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x16, x15, [sp, #104] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x14, x13, [sp, #120] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x12, x11, [sp, #136] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x10, x9, [sp, #152] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x26, x25, [sp, #24] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x24, x23, [sp, #40] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x22, x21, [sp, #56] // 16-byte Folded Spill
 ; CHECK-NEXT:    stp x20, x19, [sp, #72] // 16-byte Folded Spill
 ; CHECK-NEXT:    str x17, [sp, #232] // 8-byte Spill
 ; CHECK-NEXT:  .LBB0_2: // %.preheader166
 ; CHECK-NEXT:    // Parent Loop BB0_1 Depth=1
 ; CHECK-NEXT:    // => This Loop Header: Depth=2
 ; CHECK-NEXT:    // Child Loop BB0_3 Depth 3
 ; CHECK-NEXT:    mov x9, #-500 // =0xfffffffffffffe0c
 ; CHECK-NEXT:    mov x17, x28
 ; CHECK-NEXT:    str x20, [sp, #7752] // 8-byte Spill
 ; CHECK-NEXT:  .LBB0_3: // Parent Loop BB0_1 Depth=1
 ; CHECK-NEXT:    // Parent Loop BB0_2 Depth=2
 ; CHECK-NEXT:    // => This Inner Loop Header: Depth=3
 ; CHECK-NEXT:    add x28, x26, x9
 ; CHECK-NEXT:    ldr s1, [x5, x9]
 ; CHECK-NEXT:    ldr s3, [x14, x9]
 ; CHECK-NEXT:    ldr s2, [x28, #500]
 ; CHECK-NEXT:    ldr s0, [x8]
 ; CHECK-NEXT:    add x28, x27, x9
 ; CHECK-NEXT:    ldr s6, [x30, x9]
 ; CHECK-NEXT:    ldr s7, [x28, #500]
 ; CHECK-NEXT:    add x28, x25, x9
 ; CHECK-NEXT:    fmul s4, s1, s2
 ; CHECK-NEXT:    fmul s5, s3, s2
 ; CHECK-NEXT:    ldr s16, [x1, x9]
 ; CHECK-NEXT:    fmul s2, s0, s2
 ; CHECK-NEXT:    ldr s17, [x3, x9]
 ; CHECK-NEXT:    ldr s18, [x28, #500]
 ; CHECK-NEXT:    add x28, x24, x9
 ; CHECK-NEXT:    ldr s19, [x6, x9]
 ; CHECK-NEXT:    mov x20, x21
 ; CHECK-NEXT:    ldr s21, [x28, #500]
 ; CHECK-NEXT:    add x28, x23, x9
 ; CHECK-NEXT:    fmadd s4, s6, s7, s4
 ; CHECK-NEXT:    ldr s6, [x4, x9]
 ; CHECK-NEXT:    fmadd s5, s16, s7, s5
 ; CHECK-NEXT:    fmadd s1, s1, s7, s2
 ; CHECK-NEXT:    fmadd s20, s6, s7, s2
 ; CHECK-NEXT:    fmadd s2, s17, s7, s2
 ; CHECK-NEXT:    ldr s7, [x15, x9]
 ; CHECK-NEXT:    fmadd s4, s16, s18, s4
 ; CHECK-NEXT:    fmadd s5, s19, s18, s5
 ; CHECK-NEXT:    ldr s16, [x16, x9]
 ; CHECK-NEXT:    fmul s19, s0, s21
 ; CHECK-NEXT:    fmadd s1, s3, s18, s1
 ; CHECK-NEXT:    fmadd s3, s7, s18, s20
 ; CHECK-NEXT:    fmadd s2, s16, s18, s2
 ; CHECK-NEXT:    fmadd s4, s6, s21, s4
 ; CHECK-NEXT:    fmadd s5, s7, s21, s5
 ; CHECK-NEXT:    ldr s6, [x0, x9]
 ; CHECK-NEXT:    fadd s1, s1, s19
 ; CHECK-NEXT:    ldr s7, [x28, #500]
 ; CHECK-NEXT:    add x28, x22, x9
 ; CHECK-NEXT:    fadd s3, s3, s19
 ; CHECK-NEXT:    fmadd s2, s6, s21, s2
 ; CHECK-NEXT:    ldr s18, [x28, #500]
 ; CHECK-NEXT:    add x28, x21, x9
 ; CHECK-NEXT:    mov x21, x8
 ; CHECK-NEXT:    ldr x8, [sp, #7752] // 8-byte Reload
 ; CHECK-NEXT:    fmadd s4, s17, s7, s4
 ; CHECK-NEXT:    fmadd s5, s16, s7, s5
 ; CHECK-NEXT:    ldr s16, [x10, x9]
 ; CHECK-NEXT:    fmadd s0, s0, s7, s1
 ; CHECK-NEXT:    ldr s17, [x2, x9]
 ; CHECK-NEXT:    ldr s1, [x7, x9]
 ; CHECK-NEXT:    fmadd s3, s6, s7, s3
 ; CHECK-NEXT:    ldr s6, [x11, x9]
 ; CHECK-NEXT:    fmadd s2, s16, s7, s2
 ; CHECK-NEXT:    ldr s7, [x12, x9]
 ; CHECK-NEXT:    add x8, x8, x9
 ; CHECK-NEXT:    fmadd s4, s17, s18, s4
 ; CHECK-NEXT:    fmadd s1, s1, s18, s5
 ; CHECK-NEXT:    ldr s5, [x13, x9]
 ; CHECK-NEXT:    fmadd s0, s6, s18, s0
 ; CHECK-NEXT:    fmadd s3, s7, s18, s3
 ; CHECK-NEXT:    fmadd s2, s5, s18, s2
 ; CHECK-NEXT:    str s4, [x28, #500]
 ; CHECK-NEXT:    add x28, x19, x9
 ; CHECK-NEXT:    str s1, [x8, #500]
 ; CHECK-NEXT:    add x8, x17, x9
 ; CHECK-NEXT:    str s0, [x28, #500]
 ; CHECK-NEXT:    add x28, x18, x9
 ; CHECK-NEXT:    add x9, x9, #4
 ; CHECK-NEXT:    str s3, [x8, #500]
 ; CHECK-NEXT:    mov x8, x21
 ; CHECK-NEXT:    mov x21, x20
 ; CHECK-NEXT:    cmn x9, #480
 ; CHECK-NEXT:    str s2, [x28, #500]
 ; CHECK-NEXT:    b.ne .LBB0_3
 ; CHECK-NEXT:  // %bb.4: // in Loop: Header=BB0_2 Depth=2
 ; CHECK-NEXT:    ldr x9, [sp, #232] // 8-byte Reload
 ; CHECK-NEXT:    ldr x20, [sp, #7752] // 8-byte Reload
 ; CHECK-NEXT:    add x18, x18, #20
 ; CHECK-NEXT:    add x28, x17, #20
 ; CHECK-NEXT:    add x19, x19, #20
 ; CHECK-NEXT:    add x21, x21, #20
 ; CHECK-NEXT:    add x9, x9, #1
 ; CHECK-NEXT:    add x20, x20, #20
 ; CHECK-NEXT:    add x22, x22, #20
 ; CHECK-NEXT:    add x23, x23, #20
 ; CHECK-NEXT:    add x24, x24, #20
 ; CHECK-NEXT:    add x25, x25, #20
 ; CHECK-NEXT:    add x26, x26, #20
 ; CHECK-NEXT:    add x27, x27, #20
 ; CHECK-NEXT:    add x0, x0, #20
 ; CHECK-NEXT:    add x10, x10, #20
 ; CHECK-NEXT:    add x11, x11, #20
 ; CHECK-NEXT:    add x12, x12, #20
 ; CHECK-NEXT:    add x13, x13, #20
 ; CHECK-NEXT:    add x14, x14, #20
 ; CHECK-NEXT:    add x15, x15, #20
 ; CHECK-NEXT:    add x16, x16, #20
 ; CHECK-NEXT:    add x7, x7, #20
 ; CHECK-NEXT:    add x6, x6, #20
 ; CHECK-NEXT:    add x5, x5, #20
 ; CHECK-NEXT:    add x4, x4, #20
 ; CHECK-NEXT:    add x3, x3, #20
 ; CHECK-NEXT:    cmp x9, #6
 ; CHECK-NEXT:    add x2, x2, #20
 ; CHECK-NEXT:    add x1, x1, #20
 ; CHECK-NEXT:    add x30, x30, #20
 ; CHECK-NEXT:    str x9, [sp, #232] // 8-byte Spill
 ; CHECK-NEXT:    b.ne .LBB0_2
 ; CHECK-NEXT:  // %bb.5: // in Loop: Header=BB0_1 Depth=1
 ; CHECK-NEXT:    ldp x18, x17, [sp, #88] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp x28, x27, [sp, #8] // 16-byte Folded Reload
 ; CHECK-NEXT:    ldp x20, x19, [sp, #72] // 16-byte Folded Reload
 ; CHECK-NEXT:    add x28, x28, #1
 ; CHECK-NEXT:    ldp x22, x21, [sp, #56] // 16-byte Folded Reload
 ; CHECK-NEXT:    add x17, x17, #100
 ; CHECK-NEXT:    add x18, x18, #100
 ; CHECK-NEXT:    add x27, x27, #100
 ; CHECK-NEXT:    ldp x24, x23, [sp, #40] // 16-byte Folded Reload
 ; CHECK-NEXT:    add x19, x19, #100
 ; CHECK-NEXT:    ldp x26, x25, [sp, #24] // 16-byte Folded Reload
 ; CHECK-NEXT:    add x20, x20, #100
 ; CHECK-NEXT:    ldp x10, x9, [sp, #152] // 16-byte Folded Reload
 ; CHECK-NEXT:    add x21, x21, #100
 ; CHECK-NEXT:    ldp x12, x11, [sp, #136] // 16-byte Folded Reload
 ; CHECK-NEXT:    add x22, x22, #100
 ; CHECK-NEXT:    ldp x14, x13, [sp, #120] // 16-byte Folded Reload
 ; CHECK-NEXT:    add x23, x23, #100
 ; CHECK-NEXT:    ldp x16, x15, [sp, #104] // 16-byte Folded Reload
 ; CHECK-NEXT:    add x24, x24, #100
 ; CHECK-NEXT:    ldp x6, x7, [sp, #216] // 16-byte Folded Reload
 ; CHECK-NEXT:    add x25, x25, #100
 ; CHECK-NEXT:    ldp x4, x5, [sp, #200] // 16-byte Folded Reload
 ; CHECK-NEXT:    add x26, x26, #100
 ; CHECK-NEXT:    ldp x2, x3, [sp, #184] // 16-byte Folded Reload
 ; CHECK-NEXT:    add x9, x9, #100
 ; CHECK-NEXT:    ldp x0, x1, [sp, #168] // 16-byte Folded Reload
 ; CHECK-NEXT:    add x10, x10, #100
 ; CHECK-NEXT:    add x11, x11, #100
 ; CHECK-NEXT:    add x12, x12, #100
 ; CHECK-NEXT:    add x13, x13, #100
 ; CHECK-NEXT:    add x14, x14, #100
 ; CHECK-NEXT:    add x15, x15, #100
 ; CHECK-NEXT:    add x16, x16, #100
 ; CHECK-NEXT:    add x7, x7, #100
 ; CHECK-NEXT:    add x6, x6, #100
 ; CHECK-NEXT:    add x5, x5, #100
 ; CHECK-NEXT:    add x4, x4, #100
 ; CHECK-NEXT:    add x3, x3, #100
 ; CHECK-NEXT:    add x2, x2, #100
 ; CHECK-NEXT:    add x1, x1, #100
 ; CHECK-NEXT:    add x0, x0, #100
 ; CHECK-NEXT:    b .LBB0_1
   %17 = alloca [5 x [5 x [5 x float]]], align 4
   %18 = alloca [5 x [5 x [5 x float]]], align 4
   %19 = alloca [5 x [5 x [5 x float]]], align 4
   %20 = alloca [5 x [5 x [5 x float]]], align 4
   %21 = alloca [5 x [5 x [5 x float]]], align 4
   %22 = alloca [5 x [5 x [5 x float]]], align 4
   %23 = alloca [5 x [5 x [5 x float]]], align 4
   %24 = alloca [5 x [5 x [5 x float]]], align 4
   %25 = alloca [5 x [5 x [5 x float]]], align 4
   %26 = alloca [5 x [5 x [5 x float]]], align 4
   %27 = alloca [5 x [5 x [5 x float]]], align 4
   %28 = alloca [5 x [5 x [5 x float]]], align 4
   %29 = alloca [5 x [5 x [5 x float]]], align 4
   %30 = alloca [5 x [5 x [5 x float]]], align 4
   %31 = alloca [5 x [5 x [5 x float]]], align 4
   %32 = sext i32 0 to i64
   %.idx = mul nsw i64 %32, 4500
   %33 = getelementptr i8, ptr null, i64 -4500
   %34 = mul nsw i64 %32, 125
   %35 = add nsw i64 %34, -125
   br label %.preheader167

 .preheader167:                                    ; preds = %154, %16
   %indvars.iv175 = phi i64 [ 1, %16 ], [ %indvars.iv.next176, %154 ]
   %36 = mul nuw nsw i64 %indvars.iv175, 25
   %37 = add nsw i64 %36, -31
   br label %.preheader166

 .preheader166:                                    ; preds = %153, %.preheader167
   %indvars.iv172 = phi i64 [ 1, %.preheader167 ], [ %indvars.iv.next173, %153 ]
   %38 = mul nuw nsw i64 %indvars.iv172, 5
   %39 = add nsw i64 %37, %38
   br label %40

 40:                                               ; preds = %40, %.preheader166
   %indvars.iv = phi i64 [ 1, %.preheader166 ], [ %indvars.iv.next, %40 ]
   %41 = phi i64 [ 5, %.preheader166 ], [ %152, %40 ]
   %42 = add nsw i64 %39, %indvars.iv
   %43 = add nsw i64 %35, %42
   %44 = getelementptr float, ptr %0, i64 %43
   %45 = load float, ptr %44, align 4
   %46 = getelementptr float, ptr %1, i64 %43
   %47 = load float, ptr %46, align 4
   %48 = getelementptr float, ptr %2, i64 %43
   %49 = load float, ptr %48, align 4
   %50 = getelementptr float, ptr %3, i64 %43
   %51 = load float, ptr %50, align 4
   %52 = getelementptr float, ptr %4, i64 %43
   %53 = load float, ptr %52, align 4
   %54 = getelementptr float, ptr %5, i64 %43
   %55 = load float, ptr %54, align 4
   %56 = getelementptr float, ptr %6, i64 %43
   %57 = load float, ptr %56, align 4
   %58 = getelementptr float, ptr %7, i64 %43
   %59 = load float, ptr %58, align 4
   %60 = getelementptr float, ptr %8, i64 %43
   %61 = load float, ptr %60, align 4
   %62 = getelementptr float, ptr %9, i64 %43
   %63 = load float, ptr %62, align 4
   %64 = getelementptr float, ptr %10, i64 %43
   %65 = load float, ptr %64, align 4
   %66 = getelementptr float, ptr %11, i64 %43
   %67 = load float, ptr %66, align 4
   %68 = getelementptr float, ptr %12, i64 %43
   %69 = load float, ptr %68, align 4
   %70 = getelementptr float, ptr %13, i64 %43
   %71 = load float, ptr %70, align 4
   %72 = getelementptr float, ptr %14, i64 %43
   %73 = load float, ptr %72, align 4
   %74 = getelementptr float, ptr %15, i64 %43
   %75 = load float, ptr %74, align 4
   %76 = load float, ptr null, align 4
   %77 = load float, ptr null, align 4
   %78 = load float, ptr null, align 4
   %79 = load float, ptr null, align 4
   %80 = getelementptr float, ptr %31, i64 %42
   %81 = load float, ptr %80, align 4
   %82 = fmul contract float %45, %81
   %83 = getelementptr float, ptr %28, i64 %42
   %84 = load float, ptr %83, align 4
   %85 = fmul contract float %55, %84
   %86 = fadd contract float %82, %85
   %87 = getelementptr float, ptr %27, i64 %42
   %88 = load float, ptr %87, align 4
   %89 = fmul contract float %47, %88
   %90 = fadd contract float %86, %89
   %91 = getelementptr float, ptr %25, i64 %42
   %92 = load float, ptr %91, align 4
   %93 = fmul contract float %53, %92
   %94 = fadd contract float %90, %93
   %95 = getelementptr float, ptr %24, i64 %42
   %96 = load float, ptr %95, align 4
   %97 = fmul contract float %51, %96
   %98 = fadd contract float %94, %97
   %99 = getelementptr float, ptr %23, i64 %42
   %100 = load float, ptr %99, align 4
   %101 = fmul contract float %49, %100
   %102 = fadd contract float %98, %101
   %103 = getelementptr float, ptr %21, i64 %42
   store float %102, ptr %103, align 4
   %104 = fmul contract float %47, %81
   %105 = fmul contract float %65, %84
   %106 = fadd contract float %104, %105
   %107 = fmul contract float %57, %88
   %108 = fadd contract float %106, %107
   %109 = fmul contract float %63, %92
   %110 = fadd contract float %108, %109
   %111 = fmul contract float %61, %96
   %112 = fadd contract float %110, %111
   %113 = fmul contract float %59, %100
   %114 = fadd contract float %112, %113
   %115 = getelementptr float, ptr %18, i64 %42
   store float %114, ptr %115, align 4
   %116 = fmul contract float %55, %81
   %117 = fmul contract float %79, %84
   %118 = fadd contract float %116, %117
   %119 = fmul contract float %65, %88
   %120 = fadd contract float %118, %119
   %121 = fmul contract float %78, %92
   %122 = fadd contract float %120, %121
   %123 = fmul contract float %76, %96
   %124 = fadd contract float %122, %123
   %125 = fmul contract float %71, %100
   %126 = fadd contract float %124, %125
   %127 = getelementptr float, ptr %20, i64 %42
   store float %126, ptr %127, align 4
   %128 = fmul contract float %53, %81
   %129 = fmul contract float %78, %84
   %130 = fadd contract float %128, %129
   %131 = fmul contract float %63, %88
   %132 = fadd contract float %130, %131
   %133 = fmul contract float %77, %92
   %134 = fadd contract float %132, %133
   %135 = fmul contract float %75, %96
   %136 = fadd contract float %134, %135
   %137 = fmul contract float %69, %100
   %138 = fadd contract float %136, %137
   %139 = getelementptr float, ptr %19, i64 %42
   store float %138, ptr %139, align 4
   %140 = fmul contract float %51, %81
   %141 = fmul contract float %76, %84
   %142 = fadd contract float %140, %141
   %143 = fmul contract float %61, %88
   %144 = fadd contract float %142, %143
   %145 = fmul contract float %75, %92
   %146 = fadd contract float %144, %145
   %147 = fmul contract float %73, %96
   %148 = fadd contract float %146, %147
   %149 = fmul contract float %67, %100
   %150 = fadd contract float %148, %149
   %151 = getelementptr float, ptr %17, i64 %42
   store float %150, ptr %151, align 4
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %152 = add nsw i64 %41, -1
   %.not145 = icmp eq i64 %152, 0
   br i1 %.not145, label %153, label %40

 153:                                              ; preds = %40
   %indvars.iv.next173 = add nuw nsw i64 %indvars.iv172, 1
   %exitcond.not = icmp eq i64 %indvars.iv.next173, 6
   br i1 %exitcond.not, label %154, label %.preheader166

 154:                                              ; preds = %153
   %indvars.iv.next176 = add nuw nsw i64 %indvars.iv175, 1
   %exitcond178.not = icmp eq i64 %indvars.iv.next176, 6
   br label %.preheader167
 }

 attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) "frame-pointer"="non-leaf" "target-cpu"="generic" "target-features"="+outline-atomics,+v8a,+fp-armv8,+neon" }
	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
	; RUN: llc -mtriple=aarch64-linux-gnu -o - -O3 %s \| FileCheck %s

	; During times of high register pressure, Greedy register allocation
	; may emit large mov spill chains on AArch64. The Spill Copy Elimination
	; pass can simplify these chains, and improve runtime performance. For situations
	; where this is likely, we need to ensure it is simplifying the register allocation

	define void @_test(ptr readonly nocapture %0, ptr readonly nocapture %1, ptr readonly nocapture %2, ptr readonly nocapture %3, ptr readonly nocapture %4, ptr readonly nocapture %5, ptr readonly nocapture %6, ptr readonly nocapture %7, ptr readonly nocapture %8, ptr readonly nocapture %9, ptr readonly nocapture %10, ptr readonly nocapture %11, ptr readonly nocapture %12, ptr readonly nocapture %13, ptr readonly nocapture %14, ptr readonly nocapture %15) #0 {
	; CHECK-LABEL: _test:
	; CHECK: // %bb.0:
	; CHECK-NEXT: str x30, [sp, #-96]! // 8-byte Folded Spill
	; CHECK-NEXT: stp x28, x27, [sp, #16] // 16-byte Folded Spill
	; CHECK-NEXT: stp x26, x25, [sp, #32] // 16-byte Folded Spill
	; CHECK-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
	; CHECK-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
	; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
	; CHECK-NEXT: sub sp, sp, #1, lsl #12 // =4096
	; CHECK-NEXT: sub sp, sp, #3648
	; CHECK-NEXT: ldr x9, [sp, #7896]
	; CHECK-NEXT: ldr x10, [sp, #7888]
	; CHECK-NEXT: add x17, sp, #1, lsl #12 // =4096
	; CHECK-NEXT: ldr x11, [sp, #7880]
	; CHECK-NEXT: ldr x12, [sp, #7872]
	; CHECK-NEXT: add x18, sp, #1, lsl #12 // =4096
	; CHECK-NEXT: ldr x13, [sp, #7864]
	; CHECK-NEXT: ldr x14, [sp, #7856]
	; CHECK-NEXT: add x19, sp, #1, lsl #12 // =4096
	; CHECK-NEXT: ldr x15, [sp, #7848]
	; CHECK-NEXT: ldr x16, [sp, #7840]
	; CHECK-NEXT: add x20, sp, #1, lsl #12 // =4096
	; CHECK-NEXT: add x21, sp, #1, lsl #12 // =4096
	; CHECK-NEXT: add x22, sp, #1, lsl #12 // =4096
	; CHECK-NEXT: mov x8, xzr
	; CHECK-NEXT: add x17, x17, #3148
	; CHECK-NEXT: add x18, x18, #2148
	; CHECK-NEXT: add x19, x19, #1648
	; CHECK-NEXT: add x20, x20, #2648
	; CHECK-NEXT: add x21, x21, #1148
	; CHECK-NEXT: add x22, x22, #148
	; CHECK-NEXT: add x23, sp, #3744
	; CHECK-NEXT: add x24, sp, #3244
	; CHECK-NEXT: add x25, sp, #2244
	; CHECK-NEXT: add x26, sp, #1744
	; CHECK-NEXT: add x27, sp, #244
	; CHECK-NEXT: mov w28, #1 // =0x1
	; CHECK-NEXT: .LBB0_1: // %.preheader167
	; CHECK-NEXT: // =>This Loop Header: Depth=1
	; CHECK-NEXT: // Child Loop BB0_2 Depth 2
	; CHECK-NEXT: // Child Loop BB0_3 Depth 3
	; CHECK-NEXT: stp x28, x27, [sp, #8] // 16-byte Folded Spill
	; CHECK-NEXT: mov x30, x0
	; CHECK-NEXT: mov x28, x18
	; CHECK-NEXT: stp x0, x1, [sp, #168] // 16-byte Folded Spill
	; CHECK-NEXT: mov x0, x9
	; CHECK-NEXT: stp x18, x17, [sp, #88] // 16-byte Folded Spill
	; CHECK-NEXT: mov x18, x17
	; CHECK-NEXT: mov w17, #1 // =0x1
	; CHECK-NEXT: stp x2, x3, [sp, #184] // 16-byte Folded Spill
	; CHECK-NEXT: stp x4, x5, [sp, #200] // 16-byte Folded Spill
	; CHECK-NEXT: stp x6, x7, [sp, #216] // 16-byte Folded Spill
	; CHECK-NEXT: stp x16, x15, [sp, #104] // 16-byte Folded Spill
	; CHECK-NEXT: stp x14, x13, [sp, #120] // 16-byte Folded Spill
	; CHECK-NEXT: stp x12, x11, [sp, #136] // 16-byte Folded Spill
	; CHECK-NEXT: stp x10, x9, [sp, #152] // 16-byte Folded Spill
	; CHECK-NEXT: stp x26, x25, [sp, #24] // 16-byte Folded Spill
	; CHECK-NEXT: stp x24, x23, [sp, #40] // 16-byte Folded Spill
	; CHECK-NEXT: stp x22, x21, [sp, #56] // 16-byte Folded Spill
	; CHECK-NEXT: stp x20, x19, [sp, #72] // 16-byte Folded Spill
	; CHECK-NEXT: str x17, [sp, #232] // 8-byte Spill
	; CHECK-NEXT: .LBB0_2: // %.preheader166
	; CHECK-NEXT: // Parent Loop BB0_1 Depth=1
	; CHECK-NEXT: // => This Loop Header: Depth=2
	; CHECK-NEXT: // Child Loop BB0_3 Depth 3
	; CHECK-NEXT: mov x9, #-500 // =0xfffffffffffffe0c
	; CHECK-NEXT: mov x17, x28
	; CHECK-NEXT: str x20, [sp, #7752] // 8-byte Spill
	; CHECK-NEXT: .LBB0_3: // Parent Loop BB0_1 Depth=1
	; CHECK-NEXT: // Parent Loop BB0_2 Depth=2
	; CHECK-NEXT: // => This Inner Loop Header: Depth=3
	; CHECK-NEXT: add x28, x26, x9
	; CHECK-NEXT: ldr s1, [x5, x9]
	; CHECK-NEXT: ldr s3, [x14, x9]
	; CHECK-NEXT: ldr s2, [x28, #500]
	; CHECK-NEXT: ldr s0, [x8]
	; CHECK-NEXT: add x28, x27, x9
	; CHECK-NEXT: ldr s6, [x30, x9]
	; CHECK-NEXT: ldr s7, [x28, #500]
	; CHECK-NEXT: add x28, x25, x9
	; CHECK-NEXT: fmul s4, s1, s2
	; CHECK-NEXT: fmul s5, s3, s2
	; CHECK-NEXT: ldr s16, [x1, x9]
	; CHECK-NEXT: fmul s2, s0, s2
	; CHECK-NEXT: ldr s17, [x3, x9]
	; CHECK-NEXT: ldr s18, [x28, #500]
	; CHECK-NEXT: add x28, x24, x9
	; CHECK-NEXT: ldr s19, [x6, x9]
	; CHECK-NEXT: mov x20, x21
	; CHECK-NEXT: ldr s21, [x28, #500]
	; CHECK-NEXT: add x28, x23, x9
	; CHECK-NEXT: fmadd s4, s6, s7, s4
	; CHECK-NEXT: ldr s6, [x4, x9]
	; CHECK-NEXT: fmadd s5, s16, s7, s5
	; CHECK-NEXT: fmadd s1, s1, s7, s2
	; CHECK-NEXT: fmadd s20, s6, s7, s2
	; CHECK-NEXT: fmadd s2, s17, s7, s2
	; CHECK-NEXT: ldr s7, [x15, x9]
	; CHECK-NEXT: fmadd s4, s16, s18, s4
	; CHECK-NEXT: fmadd s5, s19, s18, s5
	; CHECK-NEXT: ldr s16, [x16, x9]
	; CHECK-NEXT: fmul s19, s0, s21
	; CHECK-NEXT: fmadd s1, s3, s18, s1
	; CHECK-NEXT: fmadd s3, s7, s18, s20
	; CHECK-NEXT: fmadd s2, s16, s18, s2
	; CHECK-NEXT: fmadd s4, s6, s21, s4
	; CHECK-NEXT: fmadd s5, s7, s21, s5
	; CHECK-NEXT: ldr s6, [x0, x9]
	; CHECK-NEXT: fadd s1, s1, s19
	; CHECK-NEXT: ldr s7, [x28, #500]
	; CHECK-NEXT: add x28, x22, x9
	; CHECK-NEXT: fadd s3, s3, s19
	; CHECK-NEXT: fmadd s2, s6, s21, s2
	; CHECK-NEXT: ldr s18, [x28, #500]
	; CHECK-NEXT: add x28, x21, x9
	; CHECK-NEXT: mov x21, x8
	; CHECK-NEXT: ldr x8, [sp, #7752] // 8-byte Reload
	; CHECK-NEXT: fmadd s4, s17, s7, s4
	; CHECK-NEXT: fmadd s5, s16, s7, s5
	; CHECK-NEXT: ldr s16, [x10, x9]
	; CHECK-NEXT: fmadd s0, s0, s7, s1
	; CHECK-NEXT: ldr s17, [x2, x9]
	; CHECK-NEXT: ldr s1, [x7, x9]
	; CHECK-NEXT: fmadd s3, s6, s7, s3
	; CHECK-NEXT: ldr s6, [x11, x9]
	; CHECK-NEXT: fmadd s2, s16, s7, s2
	; CHECK-NEXT: ldr s7, [x12, x9]
	; CHECK-NEXT: add x8, x8, x9
	; CHECK-NEXT: fmadd s4, s17, s18, s4
	; CHECK-NEXT: fmadd s1, s1, s18, s5
	; CHECK-NEXT: ldr s5, [x13, x9]
	; CHECK-NEXT: fmadd s0, s6, s18, s0
	; CHECK-NEXT: fmadd s3, s7, s18, s3
	; CHECK-NEXT: fmadd s2, s5, s18, s2
	; CHECK-NEXT: str s4, [x28, #500]
	; CHECK-NEXT: add x28, x19, x9
	; CHECK-NEXT: str s1, [x8, #500]
	; CHECK-NEXT: add x8, x17, x9
	; CHECK-NEXT: str s0, [x28, #500]
	; CHECK-NEXT: add x28, x18, x9
	; CHECK-NEXT: add x9, x9, #4
	; CHECK-NEXT: str s3, [x8, #500]
	; CHECK-NEXT: mov x8, x21
	; CHECK-NEXT: mov x21, x20
	; CHECK-NEXT: cmn x9, #480
	; CHECK-NEXT: str s2, [x28, #500]
	; CHECK-NEXT: b.ne .LBB0_3
	; CHECK-NEXT: // %bb.4: // in Loop: Header=BB0_2 Depth=2
	; CHECK-NEXT: ldr x9, [sp, #232] // 8-byte Reload
	; CHECK-NEXT: ldr x20, [sp, #7752] // 8-byte Reload
	; CHECK-NEXT: add x18, x18, #20
	; CHECK-NEXT: add x28, x17, #20
	; CHECK-NEXT: add x19, x19, #20
	; CHECK-NEXT: add x21, x21, #20
	; CHECK-NEXT: add x9, x9, #1
	; CHECK-NEXT: add x20, x20, #20
	; CHECK-NEXT: add x22, x22, #20
	; CHECK-NEXT: add x23, x23, #20
	; CHECK-NEXT: add x24, x24, #20
	; CHECK-NEXT: add x25, x25, #20
	; CHECK-NEXT: add x26, x26, #20
	; CHECK-NEXT: add x27, x27, #20
	; CHECK-NEXT: add x0, x0, #20
	; CHECK-NEXT: add x10, x10, #20
	; CHECK-NEXT: add x11, x11, #20
	; CHECK-NEXT: add x12, x12, #20
	; CHECK-NEXT: add x13, x13, #20
	; CHECK-NEXT: add x14, x14, #20
	; CHECK-NEXT: add x15, x15, #20
	; CHECK-NEXT: add x16, x16, #20
	; CHECK-NEXT: add x7, x7, #20
	; CHECK-NEXT: add x6, x6, #20
	; CHECK-NEXT: add x5, x5, #20
	; CHECK-NEXT: add x4, x4, #20
	; CHECK-NEXT: add x3, x3, #20
	; CHECK-NEXT: cmp x9, #6
	; CHECK-NEXT: add x2, x2, #20
	; CHECK-NEXT: add x1, x1, #20
	; CHECK-NEXT: add x30, x30, #20
	; CHECK-NEXT: str x9, [sp, #232] // 8-byte Spill
	; CHECK-NEXT: b.ne .LBB0_2
	; CHECK-NEXT: // %bb.5: // in Loop: Header=BB0_1 Depth=1
	; CHECK-NEXT: ldp x18, x17, [sp, #88] // 16-byte Folded Reload
	; CHECK-NEXT: ldp x28, x27, [sp, #8] // 16-byte Folded Reload
	; CHECK-NEXT: ldp x20, x19, [sp, #72] // 16-byte Folded Reload
	; CHECK-NEXT: add x28, x28, #1
	; CHECK-NEXT: ldp x22, x21, [sp, #56] // 16-byte Folded Reload
	; CHECK-NEXT: add x17, x17, #100
	; CHECK-NEXT: add x18, x18, #100
	; CHECK-NEXT: add x27, x27, #100
	; CHECK-NEXT: ldp x24, x23, [sp, #40] // 16-byte Folded Reload
	; CHECK-NEXT: add x19, x19, #100
	; CHECK-NEXT: ldp x26, x25, [sp, #24] // 16-byte Folded Reload
	; CHECK-NEXT: add x20, x20, #100
	; CHECK-NEXT: ldp x10, x9, [sp, #152] // 16-byte Folded Reload
	; CHECK-NEXT: add x21, x21, #100
	; CHECK-NEXT: ldp x12, x11, [sp, #136] // 16-byte Folded Reload
	; CHECK-NEXT: add x22, x22, #100
	; CHECK-NEXT: ldp x14, x13, [sp, #120] // 16-byte Folded Reload
	; CHECK-NEXT: add x23, x23, #100
	; CHECK-NEXT: ldp x16, x15, [sp, #104] // 16-byte Folded Reload
	; CHECK-NEXT: add x24, x24, #100
	; CHECK-NEXT: ldp x6, x7, [sp, #216] // 16-byte Folded Reload
	; CHECK-NEXT: add x25, x25, #100
	; CHECK-NEXT: ldp x4, x5, [sp, #200] // 16-byte Folded Reload
	; CHECK-NEXT: add x26, x26, #100
	; CHECK-NEXT: ldp x2, x3, [sp, #184] // 16-byte Folded Reload
	; CHECK-NEXT: add x9, x9, #100
	; CHECK-NEXT: ldp x0, x1, [sp, #168] // 16-byte Folded Reload
	; CHECK-NEXT: add x10, x10, #100
	; CHECK-NEXT: add x11, x11, #100
	; CHECK-NEXT: add x12, x12, #100
	; CHECK-NEXT: add x13, x13, #100
	; CHECK-NEXT: add x14, x14, #100
	; CHECK-NEXT: add x15, x15, #100
	; CHECK-NEXT: add x16, x16, #100
	; CHECK-NEXT: add x7, x7, #100
	; CHECK-NEXT: add x6, x6, #100
	; CHECK-NEXT: add x5, x5, #100
	; CHECK-NEXT: add x4, x4, #100
	; CHECK-NEXT: add x3, x3, #100
	; CHECK-NEXT: add x2, x2, #100
	; CHECK-NEXT: add x1, x1, #100
	; CHECK-NEXT: add x0, x0, #100
	; CHECK-NEXT: b .LBB0_1
	%17 = alloca [5 x [5 x [5 x float]]], align 4
	%18 = alloca [5 x [5 x [5 x float]]], align 4
	%19 = alloca [5 x [5 x [5 x float]]], align 4
	%20 = alloca [5 x [5 x [5 x float]]], align 4
	%21 = alloca [5 x [5 x [5 x float]]], align 4
	%22 = alloca [5 x [5 x [5 x float]]], align 4
	%23 = alloca [5 x [5 x [5 x float]]], align 4
	%24 = alloca [5 x [5 x [5 x float]]], align 4
	%25 = alloca [5 x [5 x [5 x float]]], align 4
	%26 = alloca [5 x [5 x [5 x float]]], align 4
	%27 = alloca [5 x [5 x [5 x float]]], align 4
	%28 = alloca [5 x [5 x [5 x float]]], align 4
	%29 = alloca [5 x [5 x [5 x float]]], align 4
	%30 = alloca [5 x [5 x [5 x float]]], align 4
	%31 = alloca [5 x [5 x [5 x float]]], align 4
	%32 = sext i32 0 to i64
	%.idx = mul nsw i64 %32, 4500
	%33 = getelementptr i8, ptr null, i64 -4500
	%34 = mul nsw i64 %32, 125
	%35 = add nsw i64 %34, -125
	br label %.preheader167

	.preheader167: ; preds = %154, %16
	%indvars.iv175 = phi i64 [ 1, %16 ], [ %indvars.iv.next176, %154 ]
	%36 = mul nuw nsw i64 %indvars.iv175, 25
	%37 = add nsw i64 %36, -31
	br label %.preheader166

	.preheader166: ; preds = %153, %.preheader167
	%indvars.iv172 = phi i64 [ 1, %.preheader167 ], [ %indvars.iv.next173, %153 ]
	%38 = mul nuw nsw i64 %indvars.iv172, 5
	%39 = add nsw i64 %37, %38
	br label %40

	40: ; preds = %40, %.preheader166
	%indvars.iv = phi i64 [ 1, %.preheader166 ], [ %indvars.iv.next, %40 ]
	%41 = phi i64 [ 5, %.preheader166 ], [ %152, %40 ]
	%42 = add nsw i64 %39, %indvars.iv
	%43 = add nsw i64 %35, %42
	%44 = getelementptr float, ptr %0, i64 %43
	%45 = load float, ptr %44, align 4
	%46 = getelementptr float, ptr %1, i64 %43
	%47 = load float, ptr %46, align 4
	%48 = getelementptr float, ptr %2, i64 %43
	%49 = load float, ptr %48, align 4
	%50 = getelementptr float, ptr %3, i64 %43
	%51 = load float, ptr %50, align 4
	%52 = getelementptr float, ptr %4, i64 %43
	%53 = load float, ptr %52, align 4
	%54 = getelementptr float, ptr %5, i64 %43
	%55 = load float, ptr %54, align 4
	%56 = getelementptr float, ptr %6, i64 %43
	%57 = load float, ptr %56, align 4
	%58 = getelementptr float, ptr %7, i64 %43
	%59 = load float, ptr %58, align 4
	%60 = getelementptr float, ptr %8, i64 %43
	%61 = load float, ptr %60, align 4
	%62 = getelementptr float, ptr %9, i64 %43
	%63 = load float, ptr %62, align 4
	%64 = getelementptr float, ptr %10, i64 %43
	%65 = load float, ptr %64, align 4
	%66 = getelementptr float, ptr %11, i64 %43
	%67 = load float, ptr %66, align 4
	%68 = getelementptr float, ptr %12, i64 %43
	%69 = load float, ptr %68, align 4
	%70 = getelementptr float, ptr %13, i64 %43
	%71 = load float, ptr %70, align 4
	%72 = getelementptr float, ptr %14, i64 %43
	%73 = load float, ptr %72, align 4
	%74 = getelementptr float, ptr %15, i64 %43
	%75 = load float, ptr %74, align 4
	%76 = load float, ptr null, align 4
	%77 = load float, ptr null, align 4
	%78 = load float, ptr null, align 4
	%79 = load float, ptr null, align 4
	%80 = getelementptr float, ptr %31, i64 %42
	%81 = load float, ptr %80, align 4
	%82 = fmul contract float %45, %81
	%83 = getelementptr float, ptr %28, i64 %42
	%84 = load float, ptr %83, align 4
	%85 = fmul contract float %55, %84
	%86 = fadd contract float %82, %85
	%87 = getelementptr float, ptr %27, i64 %42
	%88 = load float, ptr %87, align 4
	%89 = fmul contract float %47, %88
	%90 = fadd contract float %86, %89
	%91 = getelementptr float, ptr %25, i64 %42
	%92 = load float, ptr %91, align 4
	%93 = fmul contract float %53, %92
	%94 = fadd contract float %90, %93
	%95 = getelementptr float, ptr %24, i64 %42
	%96 = load float, ptr %95, align 4
	%97 = fmul contract float %51, %96
	%98 = fadd contract float %94, %97
	%99 = getelementptr float, ptr %23, i64 %42
	%100 = load float, ptr %99, align 4
	%101 = fmul contract float %49, %100
	%102 = fadd contract float %98, %101
	%103 = getelementptr float, ptr %21, i64 %42
	store float %102, ptr %103, align 4
	%104 = fmul contract float %47, %81
	%105 = fmul contract float %65, %84
	%106 = fadd contract float %104, %105
	%107 = fmul contract float %57, %88
	%108 = fadd contract float %106, %107
	%109 = fmul contract float %63, %92
	%110 = fadd contract float %108, %109
	%111 = fmul contract float %61, %96
	%112 = fadd contract float %110, %111
	%113 = fmul contract float %59, %100
	%114 = fadd contract float %112, %113
	%115 = getelementptr float, ptr %18, i64 %42
	store float %114, ptr %115, align 4
	%116 = fmul contract float %55, %81
	%117 = fmul contract float %79, %84
	%118 = fadd contract float %116, %117
	%119 = fmul contract float %65, %88
	%120 = fadd contract float %118, %119
	%121 = fmul contract float %78, %92
	%122 = fadd contract float %120, %121
	%123 = fmul contract float %76, %96
	%124 = fadd contract float %122, %123
	%125 = fmul contract float %71, %100
	%126 = fadd contract float %124, %125
	%127 = getelementptr float, ptr %20, i64 %42
	store float %126, ptr %127, align 4
	%128 = fmul contract float %53, %81
	%129 = fmul contract float %78, %84
	%130 = fadd contract float %128, %129
	%131 = fmul contract float %63, %88
	%132 = fadd contract float %130, %131
	%133 = fmul contract float %77, %92
	%134 = fadd contract float %132, %133
	%135 = fmul contract float %75, %96
	%136 = fadd contract float %134, %135
	%137 = fmul contract float %69, %100
	%138 = fadd contract float %136, %137
	%139 = getelementptr float, ptr %19, i64 %42
	store float %138, ptr %139, align 4
	%140 = fmul contract float %51, %81
	%141 = fmul contract float %76, %84
	%142 = fadd contract float %140, %141
	%143 = fmul contract float %61, %88
	%144 = fadd contract float %142, %143
	%145 = fmul contract float %75, %92
	%146 = fadd contract float %144, %145
	%147 = fmul contract float %73, %96
	%148 = fadd contract float %146, %147
	%149 = fmul contract float %67, %100
	%150 = fadd contract float %148, %149
	%151 = getelementptr float, ptr %17, i64 %42
	store float %150, ptr %151, align 4
	%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
	%152 = add nsw i64 %41, -1
	%.not145 = icmp eq i64 %152, 0
	br i1 %.not145, label %153, label %40

	153: ; preds = %40
	%indvars.iv.next173 = add nuw nsw i64 %indvars.iv172, 1
	%exitcond.not = icmp eq i64 %indvars.iv.next173, 6
	br i1 %exitcond.not, label %154, label %.preheader166

	154: ; preds = %153
	%indvars.iv.next176 = add nuw nsw i64 %indvars.iv175, 1
	%exitcond178.not = icmp eq i64 %indvars.iv.next176, 6
	br label %.preheader167
	}

	attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) "frame-pointer"="non-leaf" "target-cpu"="generic" "target-features"="+outline-atomics,+v8a,+fp-armv8,+neon" }