| ; RUN: llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false -mattr=+lob -stop-after=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s |
| ; RUN: llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false -mattr=+lob -stop-after=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-GLOBAL |
| |
| ; Not implemented as a mir test so that changes the generic HardwareLoop can |
| ; also be tested. These functions have been taken from |
| ; Transforms/HardwareLoops/loop-guards.ll in which can be seen the generation |
| ; of a few test.set intrinsics, but only one (ne_trip_count) gets generated |
| ; here. Simplifications result in icmps changing and maybe also the CFG. So, |
| ; TODO: Teach the HardwareLoops some better pattern recognition. |
| |
| ; CHECK-GLOBAL-NOT: DoLoopStart |
| ; CHECK-GLOBAL-NOT: WhileLoopStart |
| ; CHECK-GLOBAL-NOT: LoopEnd |
| |
| ; CHECK: ne_and_guard |
| ; CHECK: body: |
| ; CHECK: bb.0.entry: |
| ; CHECK: t2CMPri renamable $lr, 0 |
| ; CHECK: tBcc %bb.3 |
| ; CHECK: bb.1.while.body.preheader: |
| ; CHECK: $lr = t2DLS renamable $lr |
| ; CHECK: bb.2.while.body: |
| ; CHECK: $lr = t2LEUpdate renamable $lr, %bb.2 |
| define void @ne_and_guard(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { |
| entry: |
| %brmerge.demorgan = and i1 %t1, %t2 |
| %cmp6 = icmp ne i32 %N, 0 |
| %or.cond = and i1 %brmerge.demorgan, %cmp6 |
| br i1 %or.cond, label %while.body, label %if.end |
| |
| while.body: ; preds = %while.body, %entry |
| %i.09 = phi i32 [ %inc, %while.body ], [ 0, %entry ] |
| %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %entry ] |
| %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %entry ] |
| %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1 |
| %tmp = load i32, i32* %b.addr.07, align 4 |
| %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1 |
| store i32 %tmp, i32* %a.addr.08, align 4 |
| %inc = add nuw i32 %i.09, 1 |
| %exitcond = icmp eq i32 %inc, %N |
| br i1 %exitcond, label %if.end, label %while.body |
| |
| if.end: ; preds = %while.body, %entry |
| ret void |
| } |
| |
| ; TODO: This could generate WLS |
| ; CHECK: ne_preheader |
| ; CHECK: body: |
| ; CHECK: bb.0.entry: |
| ; CHECK: t2CMPri renamable $lr, 0 |
| ; CHECK: tBcc %bb.3 |
| ; CHECK: bb.1.while.body.preheader: |
| ; CHECK: $lr = t2DLS renamable $lr |
| ; CHECK: bb.2.while.body: |
| ; CHECK: $lr = t2LEUpdate renamable $lr, %bb.2 |
| define void @ne_preheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { |
| entry: |
| %brmerge.demorgan = and i1 %t1, %t2 |
| br i1 %brmerge.demorgan, label %while.preheader, label %if.end |
| |
| while.preheader: ; preds = %entry |
| %cmp = icmp ne i32 %N, 0 |
| br i1 %cmp, label %while.body, label %if.end |
| |
| while.body: ; preds = %while.body, %while.preheader |
| %i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ] |
| %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ] |
| %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ] |
| %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1 |
| %tmp = load i32, i32* %b.addr.07, align 4 |
| %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1 |
| store i32 %tmp, i32* %a.addr.08, align 4 |
| %inc = add nuw i32 %i.09, 1 |
| %exitcond = icmp eq i32 %inc, %N |
| br i1 %exitcond, label %if.end, label %while.body |
| |
| if.end: ; preds = %while.body, %while.preheader, %entry |
| ret void |
| } |
| |
| ; TODO: This could generate WLS |
| ; CHECK: eq_preheader |
| ; CHECK: body: |
| ; CHECK: bb.0.entry: |
| ; CHECK: t2CMPri renamable $lr, 0 |
| ; CHECK: tBcc %bb.3 |
| ; CHECK: bb.1.while.body.preheader: |
| ; CHECK: $lr = t2DLS renamable $lr |
| ; CHECK: bb.2.while.body: |
| ; CHECK: $lr = t2LEUpdate renamable $lr, %bb.2 |
| define void @eq_preheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { |
| entry: |
| %brmerge.demorgan = and i1 %t1, %t2 |
| br i1 %brmerge.demorgan, label %while.preheader, label %if.end |
| |
| while.preheader: ; preds = %entry |
| %cmp = icmp eq i32 %N, 0 |
| br i1 %cmp, label %if.end, label %while.body |
| |
| while.body: ; preds = %while.body, %while.preheader |
| %i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ] |
| %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ] |
| %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ] |
| %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1 |
| %tmp = load i32, i32* %b.addr.07, align 4 |
| %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1 |
| store i32 %tmp, i32* %a.addr.08, align 4 |
| %inc = add nuw i32 %i.09, 1 |
| %exitcond = icmp eq i32 %inc, %N |
| br i1 %exitcond, label %if.end, label %while.body |
| |
| if.end: ; preds = %while.body, %while.preheader, %entry |
| ret void |
| } |
| |
| ; TODO: This could generate WLS |
| ; CHECK: ne_prepreheader |
| ; CHECK: body: |
| ; CHECK: bb.0.entry: |
| ; CHECK: t2CMPri renamable $lr, 0 |
| ; CHECK: tBcc %bb.3 |
| ; CHECK: bb.1.while.body.preheader: |
| ; CHECK: $lr = t2DLS renamable $lr |
| ; CHECK: bb.2.while.body: |
| ; CHECK: $lr = t2LEUpdate renamable $lr, %bb.2 |
| define void @ne_prepreheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { |
| entry: |
| %cmp = icmp ne i32 %N, 0 |
| br i1 %cmp, label %while.preheader, label %if.end |
| |
| while.preheader: ; preds = %entry |
| %brmerge.demorgan = and i1 %t1, %t2 |
| br i1 %brmerge.demorgan, label %while.body, label %if.end |
| |
| while.body: ; preds = %while.body, %while.preheader |
| %i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ] |
| %a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ] |
| %b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ] |
| %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1 |
| %tmp = load i32, i32* %b.addr.07, align 4 |
| %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1 |
| store i32 %tmp, i32* %a.addr.08, align 4 |
| %inc = add nuw i32 %i.09, 1 |
| %exitcond = icmp eq i32 %inc, %N |
| br i1 %exitcond, label %if.end, label %while.body |
| |
| if.end: ; preds = %while.body, %while.preheader, %entry |
| ret void |
| } |
| |
| ; CHECK: be_ne |
| ; CHECK: body: |
| ; CHECK: bb.0.entry: |
| ; CHECK: $lr = t2DLS renamable $lr |
| ; CHECK: bb.1.do.body: |
| ; CHECK: $lr = t2LEUpdate renamable $lr, %bb.1 |
| define void @be_ne(i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { |
| entry: |
| %cmp = icmp ne i32 %N, 0 |
| %sub = sub i32 %N, 1 |
| %be = select i1 %cmp, i32 0, i32 %sub |
| %cmp.1 = icmp ne i32 %be, 0 |
| br i1 %cmp.1, label %do.body, label %if.end |
| |
| do.body: ; preds = %do.body, %entry |
| %b.addr.0 = phi i32* [ %incdec.ptr, %do.body ], [ %b, %entry ] |
| %a.addr.0 = phi i32* [ %incdec.ptr3, %do.body ], [ %a, %entry ] |
| %i.0 = phi i32 [ %inc, %do.body ], [ 0, %entry ] |
| %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.0, i32 1 |
| %tmp = load i32, i32* %b.addr.0, align 4 |
| %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.0, i32 1 |
| store i32 %tmp, i32* %a.addr.0, align 4 |
| %inc = add nuw i32 %i.0, 1 |
| %cmp.2 = icmp ult i32 %inc, %N |
| br i1 %cmp.2, label %do.body, label %if.end |
| |
| if.end: ; preds = %do.body, %entry |
| ret void |
| } |
| |
| ; TODO: Remove the tMOVr in the preheader! |
| ; CHECK: ne_trip_count |
| ; CHECK: body: |
| ; CHECK: bb.0.entry: |
| ; CHECK: $lr = t2WLS $r3, %bb.3 |
| ; CHECK: bb.1.do.body.preheader: |
| ; CHECK: $lr = tMOVr |
| ; CHECK: bb.2.do.body: |
| ; CHECK: $lr = t2LEUpdate renamable $lr, %bb.2 |
| define void @ne_trip_count(i1 zeroext %t1, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) { |
| entry: |
| br label %do.body.preheader |
| |
| do.body.preheader: |
| %cmp = icmp ne i32 %N, 0 |
| br i1 %cmp, label %do.body, label %if.end |
| |
| do.body: |
| %b.addr.0 = phi i32* [ %incdec.ptr, %do.body ], [ %b, %do.body.preheader ] |
| %a.addr.0 = phi i32* [ %incdec.ptr3, %do.body ], [ %a, %do.body.preheader ] |
| %i.0 = phi i32 [ %inc, %do.body ], [ 0, %do.body.preheader ] |
| %incdec.ptr = getelementptr inbounds i32, i32* %b.addr.0, i32 1 |
| %tmp = load i32, i32* %b.addr.0, align 4 |
| %incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.0, i32 1 |
| store i32 %tmp, i32* %a.addr.0, align 4 |
| %inc = add nuw i32 %i.0, 1 |
| %cmp.1 = icmp ult i32 %inc, %N |
| br i1 %cmp.1, label %do.body, label %if.end |
| |
| if.end: ; preds = %do.body, %entry |
| ret void |
| } |