| # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py |
| # RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mcpu=cortex-m85 -mattr=+use-mipipeliner -run-pass=pipeliner -o - %s | FileCheck %s --check-prefix=CHECK |
| |
| --- | |
| define hidden float @dot(float* nocapture noundef readonly %a, float* nocapture noundef readonly %b, i32 noundef %sz) local_unnamed_addr #0 { |
| entry: |
| %cmp8 = icmp sgt i32 %sz, 0 |
| br i1 %cmp8, label %for.body.preheader, label %for.end |
| |
| for.body.preheader: ; preds = %entry |
| %scevgep = getelementptr float, float* %b, i32 -1 |
| %scevgep4 = getelementptr float, float* %a, i32 -1 |
| br label %for.body |
| |
| for.body: ; preds = %for.body.preheader, %for.body |
| %lsr.iv5 = phi float* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ] |
| %lsr.iv1 = phi float* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ] |
| %lsr.iv = phi i32 [ %sz, %for.body.preheader ], [ %lsr.iv.next, %for.body ] |
| %sum.010 = phi float [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ] |
| %scevgep7 = getelementptr float, float* %lsr.iv5, i32 1 |
| %0 = load float, float* %scevgep7, align 4 |
| %scevgep3 = getelementptr float, float* %lsr.iv1, i32 1 |
| %1 = load float, float* %scevgep3, align 4 |
| %mul = fmul fast float %1, %0 |
| %add = fadd fast float %mul, %sum.010 |
| %lsr.iv.next = add i32 %lsr.iv, -1 |
| %scevgep2 = getelementptr float, float* %lsr.iv1, i32 1 |
| %scevgep6 = getelementptr float, float* %lsr.iv5, i32 1 |
| %exitcond.not = icmp eq i32 %lsr.iv.next, 0 |
| br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 |
| |
| for.end: ; preds = %for.body, %entry |
| %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] |
| ret float %sum.0.lcssa |
| } |
| |
| !0 = distinct !{!0, !1, !2, !3} |
| !1 = !{!"llvm.loop.mustprogress"} |
| !2 = !{!"llvm.loop.unroll.disable"} |
| !3 = !{!"llvm.loop.pipeline.initiationinterval", i32 3} |
| |
| ... |
| --- |
| name: dot |
| alignment: 2 |
| tracksRegLiveness: true |
| constants: |
| - id: 0 |
| value: 'float 0.000000e+00' |
| alignment: 4 |
| isTargetSpecific: false |
| body: | |
| ; CHECK-LABEL: name: dot |
| ; CHECK: bb.0.entry: |
| ; CHECK-NEXT: successors: %bb.2(0x50000000), %bb.1(0x30000000) |
| ; CHECK-NEXT: liveins: $r0, $r1, $r2 |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprlr = COPY $r2 |
| ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnopc = COPY $r1 |
| ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprnopc = COPY $r0 |
| ; CHECK-NEXT: t2CMPri [[COPY]], 1, 14 /* CC::al */, $noreg, implicit-def $cpsr |
| ; CHECK-NEXT: t2Bcc %bb.2, 10 /* CC::ge */, $cpsr |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.1: |
| ; CHECK-NEXT: successors: %bb.4(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[VLDRS:%[0-9]+]]:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) |
| ; CHECK-NEXT: t2B %bb.4, 14 /* CC::al */, $noreg |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.2.for.body.preheader: |
| ; CHECK-NEXT: successors: %bb.5(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[COPY1]], 4, 14 /* CC::al */, $noreg, $noreg |
| ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gprnopc = COPY [[t2SUBri]] |
| ; CHECK-NEXT: [[t2SUBri1:%[0-9]+]]:rgpr = t2SUBri [[COPY2]], 4, 14 /* CC::al */, $noreg, $noreg |
| ; CHECK-NEXT: [[VLDRS1:%[0-9]+]]:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) |
| ; CHECK-NEXT: [[t2DoLoopStart:%[0-9]+]]:gprlr = t2DoLoopStart [[COPY]] |
| ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gprnopc = COPY [[t2SUBri1]] |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.5.for.body: |
| ; CHECK-NEXT: successors: %bb.6(0x80000000), %bb.7(0x00000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY4]], 4, 14 /* CC::al */, $noreg, $noreg |
| ; CHECK-NEXT: [[VLDRS2:%[0-9]+]]:spr = VLDRS [[COPY4]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7) |
| ; CHECK-NEXT: [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[COPY3]], 4, 14 /* CC::al */, $noreg, $noreg |
| ; CHECK-NEXT: [[VLDRS3:%[0-9]+]]:spr = VLDRS [[COPY3]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3) |
| ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg |
| ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gprlr = COPY [[t2DoLoopStart]] |
| ; CHECK-NEXT: [[t2LoopDec:%[0-9]+]]:gprlr = t2LoopDec [[COPY5]], 1 |
| ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr = COPY [[t2LoopDec]] |
| ; CHECK-NEXT: [[COPY7:%[0-9]+]]:gpr = COPY [[t2ADDri1]] |
| ; CHECK-NEXT: [[COPY8:%[0-9]+]]:gpr = COPY [[t2ADDri]] |
| ; CHECK-NEXT: t2CMPri [[t2LoopDec]], 0, 14 /* CC::al */, $noreg, implicit-def $cpsr |
| ; CHECK-NEXT: t2Bcc %bb.7, 0 /* CC::eq */, $cpsr |
| ; CHECK-NEXT: t2B %bb.6, 14 /* CC::al */, $noreg |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.6.for.body: |
| ; CHECK-NEXT: successors: %bb.7(0x04000000), %bb.6(0x7c000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[PHI:%[0-9]+]]:gprnopc = PHI [[COPY8]], %bb.5, %43, %bb.6 |
| ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gprnopc = PHI [[COPY7]], %bb.5, %44, %bb.6 |
| ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gpr = PHI [[COPY6]], %bb.5, %47, %bb.6 |
| ; CHECK-NEXT: [[PHI3:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, %46, %bb.6 |
| ; CHECK-NEXT: [[PHI4:%[0-9]+]]:spr = PHI [[VMULS]], %bb.5, %45, %bb.6 |
| ; CHECK-NEXT: [[VLDRS4:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4) |
| ; CHECK-NEXT: [[VLDRS5:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4) |
| ; CHECK-NEXT: [[COPY9:%[0-9]+]]:gprlr = COPY [[PHI2]] |
| ; CHECK-NEXT: [[t2LoopDec1:%[0-9]+]]:gprlr = t2LoopDec [[COPY9]], 1 |
| ; CHECK-NEXT: [[t2ADDri2:%[0-9]+]]:rgpr = t2ADDri [[PHI]], 4, 14 /* CC::al */, $noreg, $noreg |
| ; CHECK-NEXT: [[t2ADDri3:%[0-9]+]]:rgpr = t2ADDri [[PHI1]], 4, 14 /* CC::al */, $noreg, $noreg |
| ; CHECK-NEXT: [[COPY10:%[0-9]+]]:gpr = COPY [[t2ADDri2]] |
| ; CHECK-NEXT: [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri3]] |
| ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS4]], [[VLDRS5]], 14 /* CC::al */, $noreg |
| ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI4]], [[PHI3]], 14 /* CC::al */, $noreg |
| ; CHECK-NEXT: [[COPY12:%[0-9]+]]:gpr = COPY [[t2LoopDec1]] |
| ; CHECK-NEXT: t2LoopEnd [[t2LoopDec1]], %bb.6, implicit-def $cpsr |
| ; CHECK-NEXT: t2B %bb.7, 14 /* CC::al */, $noreg |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.7: |
| ; CHECK-NEXT: successors: %bb.4(0x80000000) |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: [[PHI5:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, [[VADDS]], %bb.6 |
| ; CHECK-NEXT: [[PHI6:%[0-9]+]]:spr = PHI [[VMULS]], %bb.5, [[VMULS1]], %bb.6 |
| ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI6]], [[PHI5]], 14 /* CC::al */, $noreg |
| ; CHECK-NEXT: t2B %bb.4, 14 /* CC::al */, $noreg |
| ; CHECK-NEXT: {{ $}} |
| ; CHECK-NEXT: bb.4.for.end: |
| ; CHECK-NEXT: [[PHI7:%[0-9]+]]:spr = PHI [[VLDRS]], %bb.1, [[VADDS1]], %bb.7 |
| ; CHECK-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS [[PHI7]], 14 /* CC::al */, $noreg |
| ; CHECK-NEXT: $r0 = COPY [[VMOVRS]] |
| ; CHECK-NEXT: tBX_RET 14 /* CC::al */, $noreg, implicit $r0 |
| bb.0.entry: |
| successors: %bb.1(0x50000000), %bb.4(0x30000000) |
| liveins: $r0, $r1, $r2 |
| |
| %13:gprlr = COPY $r2 |
| %12:gprnopc = COPY $r1 |
| %11:gprnopc = COPY $r0 |
| t2CMPri %13, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr |
| t2Bcc %bb.1, 10 /* CC::ge */, $cpsr |
| |
| bb.4: |
| successors: %bb.3(0x80000000) |
| |
| %14:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) |
| t2B %bb.3, 14 /* CC::al */, $noreg |
| |
| bb.1.for.body.preheader: |
| successors: %bb.2(0x80000000) |
| |
| %16:rgpr = t2SUBri %12, 4, 14 /* CC::al */, $noreg, $noreg |
| %0:gpr = COPY %16 |
| %17:rgpr = t2SUBri %11, 4, 14 /* CC::al */, $noreg, $noreg |
| %15:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool) |
| %44:gprlr = t2DoLoopStart %13 |
| %1:gpr = COPY %17 |
| |
| bb.2.for.body: |
| successors: %bb.3(0x04000000), %bb.2(0x7c000000) |
| |
| %2:gprnopc = PHI %1, %bb.1, %9, %bb.2 |
| %3:gprnopc = PHI %0, %bb.1, %8, %bb.2 |
| %4:gpr = PHI %44, %bb.1, %7, %bb.2 |
| %5:spr = PHI %15, %bb.1, %6, %bb.2 |
| %18:rgpr = t2ADDri %2, 4, 14 /* CC::al */, $noreg, $noreg |
| %19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7) |
| %20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg |
| %21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3) |
| %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg |
| %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg |
| %42:gprlr = COPY %4 |
| %23:gprlr = t2LoopDec %42:gprlr, 1 |
| %7:gpr = COPY %23 |
| %8:gpr = COPY %20 |
| %9:gpr = COPY %18 |
| t2LoopEnd %23:gprlr, %bb.2, implicit-def dead $cpsr |
| t2B %bb.3, 14 /* CC::al */, $noreg |
| |
| bb.3.for.end: |
| %10:spr = PHI %14, %bb.4, %6, %bb.2 |
| %24:gpr = VMOVRS %10, 14 /* CC::al */, $noreg |
| $r0 = COPY %24 |
| tBX_RET 14 /* CC::al */, $noreg, implicit $r0 |
| |
| ... |