| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| ; RUN: llc -mtriple=hexagon < %s | FileCheck %s |
| |
| ; In this testcase, when loads were moved close to users, they were actualy |
| ; moved right before the consuming stores. This was after the store group |
| ; was moved, so the loads and stores ended up being interleaved. This violated |
| ; the assumption in store realigning that all loads were available before the |
| ; first store, causing some code depending on the loads being inserted before |
| ; the load used. |
| ; Just make sure that this compiles ok. |
| |
| ; Function Attrs: nounwind |
| define void @f0(ptr noalias nocapture readonly %a0, ptr noalias nocapture %a1, i32 %a2) #0 { |
| ; CHECK-LABEL: f0: |
| ; CHECK: // %bb.0: // %b0 |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: p0 = cmp.eq(r2,#0) |
| ; CHECK-NEXT: if (p0.new) jumpr:nt r31 |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: .p2align 4 |
| ; CHECK-NEXT: .LBB0_1: // %b2 |
| ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: v0.cur = vmem(r0+#0) |
| ; CHECK-NEXT: vmem(r1+#0) = v0 |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: v29.cur = vmem(r0+#1) |
| ; CHECK-NEXT: vmem(r1+#1) = v29 |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: v30.cur = vmem(r0+#2) |
| ; CHECK-NEXT: vmem(r1+#2) = v30 |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: r0 = add(r0,#256) |
| ; CHECK-NEXT: r1 = add(r1,#256) |
| ; CHECK-NEXT: v31.cur = vmem(r0+#3) |
| ; CHECK-NEXT: vmem(r1+#3) = v31 |
| ; CHECK-NEXT: } |
| ; CHECK-NEXT: { |
| ; CHECK-NEXT: jump .LBB0_1 |
| ; CHECK-NEXT: } |
| b0: |
| %v0 = icmp eq i32 %a2, 0 |
| br i1 %v0, label %b3, label %b1 |
| |
| b1: ; preds = %b0 |
| br label %b2 |
| |
| b2: ; preds = %b2, %b1 |
| %v3 = phi ptr [ %v16, %b2 ], [ %a1, %b1 ] |
| %v4 = phi ptr [ %v11, %b2 ], [ %a0, %b1 ] |
| %v5 = getelementptr inbounds <16 x i32>, ptr %v4, i32 1 |
| %v6 = load <16 x i32>, ptr %v4, align 64 |
| %v7 = getelementptr inbounds <16 x i32>, ptr %v4, i32 2 |
| %v8 = load <16 x i32>, ptr %v5, align 64 |
| %v9 = getelementptr inbounds <16 x i32>, ptr %v4, i32 3 |
| %v10 = load <16 x i32>, ptr %v7, align 64 |
| %v11 = getelementptr inbounds <16 x i32>, ptr %v4, i32 4 |
| %v12 = load <16 x i32>, ptr %v9, align 64 |
| %v13 = getelementptr inbounds <16 x i32>, ptr %v3, i32 1 |
| store <16 x i32> %v6, ptr %v3, align 64 |
| %v14 = getelementptr inbounds <16 x i32>, ptr %v3, i32 2 |
| store <16 x i32> %v8, ptr %v13, align 64 |
| %v15 = getelementptr inbounds <16 x i32>, ptr %v3, i32 3 |
| store <16 x i32> %v10, ptr %v14, align 64 |
| %v16 = getelementptr inbounds <16 x i32>, ptr %v3, i32 4 |
| store <16 x i32> %v12, ptr %v15, align 64 |
| br label %b2 |
| |
| b3: ; preds = %b0 |
| ret void |
| } |
| |
| attributes #0 = { nounwind "target-features"="+hvxv65,+hvx-length64b" } |