| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+lob --verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOMVE |
| ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+lob,+mve --verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-MVE |
| |
| ; Check that loop strength reduction understands that it can fold a sub into an |
| ; le instruction and reduces the cost appropriately, causing it to do this no |
| ; matter the preferred addressing mode. |
| |
| define void @test(ptr %dst, i32 %n) { |
| ; CHECK-NOMVE-LABEL: test: |
| ; CHECK-NOMVE: @ %bb.0: @ %entry |
| ; CHECK-NOMVE-NEXT: push {r7, lr} |
| ; CHECK-NOMVE-NEXT: add.w r0, r0, r1, lsl #1 |
| ; CHECK-NOMVE-NEXT: movs r2, #0 |
| ; CHECK-NOMVE-NEXT: sub.w r12, r0, #2 |
| ; CHECK-NOMVE-NEXT: movs r3, #0 |
| ; CHECK-NOMVE-NEXT: .LBB0_1: @ %outer_loop |
| ; CHECK-NOMVE-NEXT: @ =>This Loop Header: Depth=1 |
| ; CHECK-NOMVE-NEXT: @ Child Loop BB0_2 Depth 2 |
| ; CHECK-NOMVE-NEXT: dls lr, r1 |
| ; CHECK-NOMVE-NEXT: mov r0, r12 |
| ; CHECK-NOMVE-NEXT: .LBB0_2: @ %inner_loop |
| ; CHECK-NOMVE-NEXT: @ Parent Loop BB0_1 Depth=1 |
| ; CHECK-NOMVE-NEXT: @ => This Inner Loop Header: Depth=2 |
| ; CHECK-NOMVE-NEXT: strh r2, [r0, #2]! |
| ; CHECK-NOMVE-NEXT: le lr, .LBB0_2 |
| ; CHECK-NOMVE-NEXT: @ %bb.3: @ %outer_loop_end |
| ; CHECK-NOMVE-NEXT: @ in Loop: Header=BB0_1 Depth=1 |
| ; CHECK-NOMVE-NEXT: adds r3, #1 |
| ; CHECK-NOMVE-NEXT: cmp r3, r1 |
| ; CHECK-NOMVE-NEXT: it eq |
| ; CHECK-NOMVE-NEXT: popeq {r7, pc} |
| ; CHECK-NOMVE-NEXT: b .LBB0_1 |
| ; |
| ; CHECK-MVE-LABEL: test: |
| ; CHECK-MVE: @ %bb.0: @ %entry |
| ; CHECK-MVE-NEXT: push {r7, lr} |
| ; CHECK-MVE-NEXT: add.w r12, r0, r1, lsl #1 |
| ; CHECK-MVE-NEXT: movs r2, #0 |
| ; CHECK-MVE-NEXT: movs r3, #0 |
| ; CHECK-MVE-NEXT: .LBB0_1: @ %outer_loop |
| ; CHECK-MVE-NEXT: @ =>This Loop Header: Depth=1 |
| ; CHECK-MVE-NEXT: @ Child Loop BB0_2 Depth 2 |
| ; CHECK-MVE-NEXT: dls lr, r1 |
| ; CHECK-MVE-NEXT: mov r0, r12 |
| ; CHECK-MVE-NEXT: .LBB0_2: @ %inner_loop |
| ; CHECK-MVE-NEXT: @ Parent Loop BB0_1 Depth=1 |
| ; CHECK-MVE-NEXT: @ => This Inner Loop Header: Depth=2 |
| ; CHECK-MVE-NEXT: strh r2, [r0], #2 |
| ; CHECK-MVE-NEXT: le lr, .LBB0_2 |
| ; CHECK-MVE-NEXT: @ %bb.3: @ %outer_loop_end |
| ; CHECK-MVE-NEXT: @ in Loop: Header=BB0_1 Depth=1 |
| ; CHECK-MVE-NEXT: adds r3, #1 |
| ; CHECK-MVE-NEXT: cmp r3, r1 |
| ; CHECK-MVE-NEXT: it eq |
| ; CHECK-MVE-NEXT: popeq {r7, pc} |
| ; CHECK-MVE-NEXT: b .LBB0_1 |
| entry: |
| br label %outer_loop |
| |
| outer_loop: |
| %idx_outer = phi i32 [ %idx_outer.inc, %outer_loop_end ], [ 0, %entry ] |
| br label %inner_loop |
| |
| inner_loop: |
| %idx_inner = phi i32 [ 0, %outer_loop ], [ %idx_inner.inc, %inner_loop ] |
| %add = add i32 %idx_inner, %n |
| %gep = getelementptr inbounds i16, ptr %dst, i32 %add |
| store i16 0, ptr %gep, align 2 |
| %idx_inner.inc = add nuw nsw i32 %idx_inner, 1 |
| %cond_inner = icmp eq i32 %idx_inner.inc, %n |
| br i1 %cond_inner, label %outer_loop_end, label %inner_loop |
| |
| outer_loop_end: |
| %idx_outer.inc = add nuw i32 %idx_outer, 1 |
| %cond_outer = icmp eq i32 %idx_outer.inc, %n |
| br i1 %cond_outer, label %exit, label %outer_loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @test_optsize(ptr %dst, i32 %n) optsize { |
| ; CHECK-LABEL: test_optsize: |
| ; CHECK: @ %bb.0: @ %entry |
| ; CHECK-NEXT: push {r7, lr} |
| ; CHECK-NEXT: add.w r12, r0, r1, lsl #1 |
| ; CHECK-NEXT: movs r2, #0 |
| ; CHECK-NEXT: movs r3, #0 |
| ; CHECK-NEXT: .LBB1_1: @ %outer_loop |
| ; CHECK-NEXT: @ =>This Loop Header: Depth=1 |
| ; CHECK-NEXT: @ Child Loop BB1_2 Depth 2 |
| ; CHECK-NEXT: dls lr, r1 |
| ; CHECK-NEXT: mov r0, r12 |
| ; CHECK-NEXT: .LBB1_2: @ %inner_loop |
| ; CHECK-NEXT: @ Parent Loop BB1_1 Depth=1 |
| ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 |
| ; CHECK-NEXT: strh r2, [r0], #2 |
| ; CHECK-NEXT: le lr, .LBB1_2 |
| ; CHECK-NEXT: @ %bb.3: @ %outer_loop_end |
| ; CHECK-NEXT: @ in Loop: Header=BB1_1 Depth=1 |
| ; CHECK-NEXT: adds r3, #1 |
| ; CHECK-NEXT: cmp r3, r1 |
| ; CHECK-NEXT: it eq |
| ; CHECK-NEXT: popeq {r7, pc} |
| ; CHECK-NEXT: b .LBB1_1 |
| entry: |
| br label %outer_loop |
| |
| outer_loop: |
| %idx_outer = phi i32 [ %idx_outer.inc, %outer_loop_end ], [ 0, %entry ] |
| br label %inner_loop |
| |
| inner_loop: |
| %idx_inner = phi i32 [ 0, %outer_loop ], [ %idx_inner.inc, %inner_loop ] |
| %add = add i32 %idx_inner, %n |
| %gep = getelementptr inbounds i16, ptr %dst, i32 %add |
| store i16 0, ptr %gep, align 2 |
| %idx_inner.inc = add nuw nsw i32 %idx_inner, 1 |
| %cond_inner = icmp eq i32 %idx_inner.inc, %n |
| br i1 %cond_inner, label %outer_loop_end, label %inner_loop |
| |
| outer_loop_end: |
| %idx_outer.inc = add nuw i32 %idx_outer, 1 |
| %cond_outer = icmp eq i32 %idx_outer.inc, %n |
| br i1 %cond_outer, label %exit, label %outer_loop |
| |
| exit: |
| ret void |
| } |
| |
| ; Check that when we can't use LE we don't discount the cost of a sub |
| ; instruction, so we only get it when postincrement is the preferred addressing |
| ; mode (i.e. when we have mve). |
| |
| declare void @otherfn() |
| |
| define void @test_no_le(ptr %dst, i32 %n) { |
| ; CHECK-NOMVE-LABEL: test_no_le: |
| ; CHECK-NOMVE: @ %bb.0: @ %entry |
| ; CHECK-NOMVE-NEXT: push.w {r4, r5, r6, r7, r8, lr} |
| ; CHECK-NOMVE-NEXT: add.w r5, r0, r1, lsl #1 |
| ; CHECK-NOMVE-NEXT: mov r4, r1 |
| ; CHECK-NOMVE-NEXT: movs r6, #0 |
| ; CHECK-NOMVE-NEXT: mov.w r8, #0 |
| ; CHECK-NOMVE-NEXT: .LBB2_1: @ %outer_loop |
| ; CHECK-NOMVE-NEXT: @ =>This Loop Header: Depth=1 |
| ; CHECK-NOMVE-NEXT: @ Child Loop BB2_2 Depth 2 |
| ; CHECK-NOMVE-NEXT: movs r7, #0 |
| ; CHECK-NOMVE-NEXT: .LBB2_2: @ %inner_loop |
| ; CHECK-NOMVE-NEXT: @ Parent Loop BB2_1 Depth=1 |
| ; CHECK-NOMVE-NEXT: @ => This Inner Loop Header: Depth=2 |
| ; CHECK-NOMVE-NEXT: bl otherfn |
| ; CHECK-NOMVE-NEXT: strh.w r6, [r5, r7, lsl #1] |
| ; CHECK-NOMVE-NEXT: adds r7, #1 |
| ; CHECK-NOMVE-NEXT: cmp r4, r7 |
| ; CHECK-NOMVE-NEXT: bne .LBB2_2 |
| ; CHECK-NOMVE-NEXT: @ %bb.3: @ %outer_loop_end |
| ; CHECK-NOMVE-NEXT: @ in Loop: Header=BB2_1 Depth=1 |
| ; CHECK-NOMVE-NEXT: add.w r8, r8, #1 |
| ; CHECK-NOMVE-NEXT: cmp r8, r4 |
| ; CHECK-NOMVE-NEXT: bne .LBB2_1 |
| ; CHECK-NOMVE-NEXT: @ %bb.4: @ %exit |
| ; CHECK-NOMVE-NEXT: pop.w {r4, r5, r6, r7, r8, pc} |
| ; |
| ; CHECK-MVE-LABEL: test_no_le: |
| ; CHECK-MVE: @ %bb.0: @ %entry |
| ; CHECK-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} |
| ; CHECK-MVE-NEXT: sub sp, #4 |
| ; CHECK-MVE-NEXT: add.w r8, r0, r1, lsl #1 |
| ; CHECK-MVE-NEXT: mov r9, r1 |
| ; CHECK-MVE-NEXT: movs r6, #0 |
| ; CHECK-MVE-NEXT: movs r7, #0 |
| ; CHECK-MVE-NEXT: .LBB2_1: @ %outer_loop |
| ; CHECK-MVE-NEXT: @ =>This Loop Header: Depth=1 |
| ; CHECK-MVE-NEXT: @ Child Loop BB2_2 Depth 2 |
| ; CHECK-MVE-NEXT: mov r5, r8 |
| ; CHECK-MVE-NEXT: mov r4, r9 |
| ; CHECK-MVE-NEXT: .LBB2_2: @ %inner_loop |
| ; CHECK-MVE-NEXT: @ Parent Loop BB2_1 Depth=1 |
| ; CHECK-MVE-NEXT: @ => This Inner Loop Header: Depth=2 |
| ; CHECK-MVE-NEXT: bl otherfn |
| ; CHECK-MVE-NEXT: strh r6, [r5], #2 |
| ; CHECK-MVE-NEXT: subs r4, #1 |
| ; CHECK-MVE-NEXT: bne .LBB2_2 |
| ; CHECK-MVE-NEXT: @ %bb.3: @ %outer_loop_end |
| ; CHECK-MVE-NEXT: @ in Loop: Header=BB2_1 Depth=1 |
| ; CHECK-MVE-NEXT: adds r7, #1 |
| ; CHECK-MVE-NEXT: cmp r7, r9 |
| ; CHECK-MVE-NEXT: bne .LBB2_1 |
| ; CHECK-MVE-NEXT: @ %bb.4: @ %exit |
| ; CHECK-MVE-NEXT: add sp, #4 |
| ; CHECK-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} |
| entry: |
| br label %outer_loop |
| |
| outer_loop: |
| %idx_outer = phi i32 [ %idx_outer.inc, %outer_loop_end ], [ 0, %entry ] |
| br label %inner_loop |
| |
| inner_loop: |
| %idx_inner = phi i32 [ 0, %outer_loop ], [ %idx_inner.inc, %inner_loop ] |
| call void @otherfn() |
| %add = add i32 %idx_inner, %n |
| %gep = getelementptr inbounds i16, ptr %dst, i32 %add |
| store i16 0, ptr %gep, align 2 |
| %idx_inner.inc = add nuw nsw i32 %idx_inner, 1 |
| %cond_inner = icmp eq i32 %idx_inner.inc, %n |
| br i1 %cond_inner, label %outer_loop_end, label %inner_loop |
| |
| outer_loop_end: |
| %idx_outer.inc = add nuw i32 %idx_outer, 1 |
| %cond_outer = icmp eq i32 %idx_outer.inc, %n |
| br i1 %cond_outer, label %exit, label %outer_loop |
| |
| exit: |
| ret void |
| } |
| |
| define void @test_no_le_optsize(ptr %dst, i32 %n) optsize { |
| ; CHECK-NOMVE-LABEL: test_no_le_optsize: |
| ; CHECK-NOMVE: @ %bb.0: @ %entry |
| ; CHECK-NOMVE-NEXT: push.w {r4, r5, r6, r7, r8, lr} |
| ; CHECK-NOMVE-NEXT: add.w r5, r0, r1, lsl #1 |
| ; CHECK-NOMVE-NEXT: mov r4, r1 |
| ; CHECK-NOMVE-NEXT: movs r6, #0 |
| ; CHECK-NOMVE-NEXT: mov.w r8, #0 |
| ; CHECK-NOMVE-NEXT: .LBB3_1: @ %outer_loop |
| ; CHECK-NOMVE-NEXT: @ =>This Loop Header: Depth=1 |
| ; CHECK-NOMVE-NEXT: @ Child Loop BB3_2 Depth 2 |
| ; CHECK-NOMVE-NEXT: movs r7, #0 |
| ; CHECK-NOMVE-NEXT: .LBB3_2: @ %inner_loop |
| ; CHECK-NOMVE-NEXT: @ Parent Loop BB3_1 Depth=1 |
| ; CHECK-NOMVE-NEXT: @ => This Inner Loop Header: Depth=2 |
| ; CHECK-NOMVE-NEXT: bl otherfn |
| ; CHECK-NOMVE-NEXT: strh.w r6, [r5, r7, lsl #1] |
| ; CHECK-NOMVE-NEXT: adds r7, #1 |
| ; CHECK-NOMVE-NEXT: cmp r4, r7 |
| ; CHECK-NOMVE-NEXT: bne .LBB3_2 |
| ; CHECK-NOMVE-NEXT: @ %bb.3: @ %outer_loop_end |
| ; CHECK-NOMVE-NEXT: @ in Loop: Header=BB3_1 Depth=1 |
| ; CHECK-NOMVE-NEXT: add.w r8, r8, #1 |
| ; CHECK-NOMVE-NEXT: cmp r8, r4 |
| ; CHECK-NOMVE-NEXT: bne .LBB3_1 |
| ; CHECK-NOMVE-NEXT: @ %bb.4: @ %exit |
| ; CHECK-NOMVE-NEXT: pop.w {r4, r5, r6, r7, r8, pc} |
| ; |
| ; CHECK-MVE-LABEL: test_no_le_optsize: |
| ; CHECK-MVE: @ %bb.0: @ %entry |
| ; CHECK-MVE-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} |
| ; CHECK-MVE-NEXT: sub sp, #4 |
| ; CHECK-MVE-NEXT: add.w r8, r0, r1, lsl #1 |
| ; CHECK-MVE-NEXT: mov r9, r1 |
| ; CHECK-MVE-NEXT: movs r6, #0 |
| ; CHECK-MVE-NEXT: movs r7, #0 |
| ; CHECK-MVE-NEXT: .LBB3_1: @ %outer_loop |
| ; CHECK-MVE-NEXT: @ =>This Loop Header: Depth=1 |
| ; CHECK-MVE-NEXT: @ Child Loop BB3_2 Depth 2 |
| ; CHECK-MVE-NEXT: mov r5, r8 |
| ; CHECK-MVE-NEXT: mov r4, r9 |
| ; CHECK-MVE-NEXT: .LBB3_2: @ %inner_loop |
| ; CHECK-MVE-NEXT: @ Parent Loop BB3_1 Depth=1 |
| ; CHECK-MVE-NEXT: @ => This Inner Loop Header: Depth=2 |
| ; CHECK-MVE-NEXT: bl otherfn |
| ; CHECK-MVE-NEXT: strh r6, [r5], #2 |
| ; CHECK-MVE-NEXT: subs r4, #1 |
| ; CHECK-MVE-NEXT: bne .LBB3_2 |
| ; CHECK-MVE-NEXT: @ %bb.3: @ %outer_loop_end |
| ; CHECK-MVE-NEXT: @ in Loop: Header=BB3_1 Depth=1 |
| ; CHECK-MVE-NEXT: adds r7, #1 |
| ; CHECK-MVE-NEXT: cmp r7, r9 |
| ; CHECK-MVE-NEXT: bne .LBB3_1 |
| ; CHECK-MVE-NEXT: @ %bb.4: @ %exit |
| ; CHECK-MVE-NEXT: add sp, #4 |
| ; CHECK-MVE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} |
| entry: |
| br label %outer_loop |
| |
| outer_loop: |
| %idx_outer = phi i32 [ %idx_outer.inc, %outer_loop_end ], [ 0, %entry ] |
| br label %inner_loop |
| |
| inner_loop: |
| %idx_inner = phi i32 [ 0, %outer_loop ], [ %idx_inner.inc, %inner_loop ] |
| call void @otherfn() |
| %add = add i32 %idx_inner, %n |
| %gep = getelementptr inbounds i16, ptr %dst, i32 %add |
| store i16 0, ptr %gep, align 2 |
| %idx_inner.inc = add nuw nsw i32 %idx_inner, 1 |
| %cond_inner = icmp eq i32 %idx_inner.inc, %n |
| br i1 %cond_inner, label %outer_loop_end, label %inner_loop |
| |
| outer_loop_end: |
| %idx_outer.inc = add nuw i32 %idx_outer, 1 |
| %cond_outer = icmp eq i32 %idx_outer.inc, %n |
| br i1 %cond_outer, label %exit, label %outer_loop |
| |
| exit: |
| ret void |
| } |