| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt -passes=slp-vectorizer -mtriple=aarch64-none-linux-gnu -mcpu=neoverse-n1 -S %s | FileCheck %s |
| ; |
| declare void @use(i32) memory(none) nounwind willreturn |
| |
| define void @test_correctness(ptr noalias %p, ptr noalias %q, i32 %n) { |
| ; CHECK-LABEL: define void @test_correctness( |
| ; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[Q:%.*]], i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[OUTER_HEADER:.*]] |
| ; CHECK: [[OUTER_HEADER]]: |
| ; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[J_NEXT:%.*]], %[[OUTER_LATCH:.*]] ] |
| ; CHECK-NEXT: br label %[[INNER_HEADER:.*]] |
| ; CHECK: [[INNER_HEADER]]: |
| ; CHECK-NEXT: [[A:%.*]] = phi i32 [ 10, %[[OUTER_HEADER]] ], [ [[A_NEXT:%.*]], %[[INNER_BODY:.*]] ] |
| ; CHECK-NEXT: [[B:%.*]] = phi i32 [ 20, %[[OUTER_HEADER]] ], [ [[B_NEXT:%.*]], %[[INNER_BODY]] ] |
| ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[OUTER_HEADER]] ], [ [[I_NEXT:%.*]], %[[INNER_BODY]] ] |
| ; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[I]], 100 |
| ; CHECK-NEXT: br i1 [[COND]], label %[[INNER_BODY]], label %[[OUTER_LATCH]] |
| ; CHECK: [[INNER_BODY]]: |
| ; CHECK-NEXT: [[A_NEXT]] = add nsw i32 [[A]], -1 |
| ; CHECK-NEXT: [[B_NEXT]] = add nsw i32 [[B]], -1 |
| ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 |
| ; CHECK-NEXT: [[I64:%.*]] = zext i32 [[I]] to i64 |
| ; CHECK-NEXT: [[I64N:%.*]] = zext i32 [[I_NEXT]] to i64 |
| ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[I64]] |
| ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[I64N]] |
| ; CHECK-NEXT: store i32 [[A_NEXT]], ptr [[GEP0]], align 4 |
| ; CHECK-NEXT: store i32 [[B_NEXT]], ptr [[GEP1]], align 4 |
| ; CHECK-NEXT: br label %[[INNER_HEADER]] |
| ; CHECK: [[OUTER_LATCH]]: |
| ; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ [[A]], %[[INNER_HEADER]] ] |
| ; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[B]], %[[INNER_HEADER]] ] |
| ; CHECK-NEXT: [[SUM:%.*]] = add i32 [[TMP3]], [[TMP4]] |
| ; CHECK-NEXT: store i32 [[SUM]], ptr [[Q]], align 4 |
| ; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1 |
| ; CHECK-NEXT: [[OUTER_COND:%.*]] = icmp slt i32 [[J_NEXT]], [[N]] |
| ; CHECK-NEXT: br i1 [[OUTER_COND]], label %[[OUTER_HEADER]], label %[[EXIT:.*]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| br label %outer.header |
| |
| outer.header: |
| %j = phi i32 [ 0, %entry ], [ %j.next, %outer.latch ] |
| br label %inner.header |
| |
| inner.header: |
| ; Constant initial values: tree does NOT reach back into the outer loop. |
| %a = phi i32 [ 10, %outer.header ], [ %a.next, %inner.body ] |
| %b = phi i32 [ 20, %outer.header ], [ %b.next, %inner.body ] |
| %i = phi i32 [ 0, %outer.header ], [ %i.next, %inner.body ] |
| %cond = icmp ult i32 %i, 100 |
| br i1 %cond, label %inner.body, label %outer.latch |
| |
| inner.body: |
| %a.next = add nsw i32 %a, -1 |
| %b.next = add nsw i32 %b, -1 |
| %i.next = add nuw nsw i32 %i, 1 |
| %i64 = zext i32 %i to i64 |
| %i64n = zext i32 %i.next to i64 |
| %gep0 = getelementptr inbounds i32, ptr %p, i64 %i64 |
| %gep1 = getelementptr inbounds i32, ptr %p, i64 %i64n |
| store i32 %a.next, ptr %gep0, align 4 |
| store i32 %b.next, ptr %gep1, align 4 |
| br label %inner.header |
| |
| ; outer.latch is AFTER inner.body: LCSSA-phi users are the only external |
| ; users of %a/%b. The fix changes their scale from outer_TC (2) to |
| ; inner_TC*outer_TC (101*2=202), raising the extract cost from 8 to 808. |
| outer.latch: |
| %v0.lc = phi i32 [ %a, %inner.header ] |
| %v1.lc = phi i32 [ %b, %inner.header ] |
| %sum = add i32 %v0.lc, %v1.lc |
| store i32 %sum, ptr %q, align 4 |
| %j.next = add nuw nsw i32 %j, 1 |
| %outer.cond = icmp slt i32 %j.next, %n |
| br i1 %outer.cond, label %outer.header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| define void @test_ordering_lcssa_first(ptr noalias %p, ptr noalias %q, i32 %n) { |
| ; CHECK-LABEL: define void @test_ordering_lcssa_first( |
| ; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[Q:%.*]], i32 [[N:%.*]]) #[[ATTR1]] { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[OUTER_HEADER:.*]] |
| ; CHECK: [[OUTER_HEADER]]: |
| ; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[J_NEXT:%.*]], %[[OUTER_LATCH:.*]] ] |
| ; CHECK-NEXT: br label %[[INNER_HEADER:.*]] |
| ; CHECK: [[INNER_HEADER]]: |
| ; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ 10, %[[OUTER_HEADER]] ], [ [[A_NEXT:%.*]], %[[INNER_BODY:.*]] ] |
| ; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ 20, %[[OUTER_HEADER]] ], [ [[B_NEXT:%.*]], %[[INNER_BODY]] ] |
| ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[OUTER_HEADER]] ], [ [[I_NEXT:%.*]], %[[INNER_BODY]] ] |
| ; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[I]], 100 |
| ; CHECK-NEXT: br i1 [[COND]], label %[[INNER_BODY]], label %[[OUTER_LATCH]] |
| ; CHECK: [[INNER_BODY]]: |
| ; CHECK-NEXT: call void @use(i32 [[TMP1]]) |
| ; CHECK-NEXT: call void @use(i32 [[TMP2]]) |
| ; CHECK-NEXT: [[A_NEXT]] = add nsw i32 [[TMP1]], -1 |
| ; CHECK-NEXT: [[B_NEXT]] = add nsw i32 [[TMP2]], -1 |
| ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 |
| ; CHECK-NEXT: [[I64:%.*]] = zext i32 [[I]] to i64 |
| ; CHECK-NEXT: [[I64N:%.*]] = zext i32 [[I_NEXT]] to i64 |
| ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[I64]] |
| ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[I64N]] |
| ; CHECK-NEXT: store i32 [[A_NEXT]], ptr [[GEP0]], align 4 |
| ; CHECK-NEXT: store i32 [[B_NEXT]], ptr [[GEP1]], align 4 |
| ; CHECK-NEXT: br label %[[INNER_HEADER]] |
| ; CHECK: [[OUTER_LATCH]]: |
| ; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP1]], %[[INNER_HEADER]] ] |
| ; CHECK-NEXT: [[TMP6:%.*]] = phi i32 [ [[TMP2]], %[[INNER_HEADER]] ] |
| ; CHECK-NEXT: [[SUM:%.*]] = add i32 [[TMP5]], [[TMP6]] |
| ; CHECK-NEXT: store i32 [[SUM]], ptr [[Q]], align 4 |
| ; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1 |
| ; CHECK-NEXT: [[OUTER_COND:%.*]] = icmp slt i32 [[J_NEXT]], [[N]] |
| ; CHECK-NEXT: br i1 [[OUTER_COND]], label %[[OUTER_HEADER]], label %[[EXIT:.*]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| br label %outer.header |
| |
| outer.header: |
| %j = phi i32 [ 0, %entry ], [ %j.next, %outer.latch ] |
| br label %inner.header |
| |
| inner.header: |
| %a = phi i32 [ 10, %outer.header ], [ %a.next, %inner.body ] |
| %b = phi i32 [ 20, %outer.header ], [ %b.next, %inner.body ] |
| %i = phi i32 [ 0, %outer.header ], [ %i.next, %inner.body ] |
| %cond = icmp ult i32 %i, 100 |
| br i1 %cond, label %inner.body, label %outer.latch |
| |
| inner.body: |
| call void @use(i32 %a) |
| call void @use(i32 %b) |
| %a.next = add nsw i32 %a, -1 |
| %b.next = add nsw i32 %b, -1 |
| %i.next = add nuw nsw i32 %i, 1 |
| %i64 = zext i32 %i to i64 |
| %i64n = zext i32 %i.next to i64 |
| %gep0 = getelementptr inbounds i32, ptr %p, i64 %i64 |
| %gep1 = getelementptr inbounds i32, ptr %p, i64 %i64n |
| store i32 %a.next, ptr %gep0, align 4 |
| store i32 %b.next, ptr %gep1, align 4 |
| br label %inner.header |
| |
| outer.latch: |
| %v0.lc = phi i32 [ %a, %inner.header ] |
| %v1.lc = phi i32 [ %b, %inner.header ] |
| %sum = add i32 %v0.lc, %v1.lc |
| store i32 %sum, ptr %q, align 4 |
| %j.next = add nuw nsw i32 %j, 1 |
| %outer.cond = icmp slt i32 %j.next, %n |
| br i1 %outer.cond, label %outer.header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| define void @test_ordering_inloop_first(ptr noalias %p, ptr noalias %q, i32 %n) { |
| ; CHECK-LABEL: define void @test_ordering_inloop_first( |
| ; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[Q:%.*]], i32 [[N:%.*]]) #[[ATTR1]] { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[OUTER_HEADER:.*]] |
| ; CHECK: [[OUTER_HEADER]]: |
| ; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[J_NEXT:%.*]], %[[OUTER_LATCH:.*]] ] |
| ; CHECK-NEXT: br label %[[INNER_HEADER:.*]] |
| ; CHECK: [[INNER_HEADER]]: |
| ; CHECK-NEXT: [[A:%.*]] = phi i32 [ 10, %[[OUTER_HEADER]] ], [ [[A_NEXT:%.*]], %[[INNER_BODY:.*]] ] |
| ; CHECK-NEXT: [[B:%.*]] = phi i32 [ 20, %[[OUTER_HEADER]] ], [ [[B_NEXT:%.*]], %[[INNER_BODY]] ] |
| ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[OUTER_HEADER]] ], [ [[I_NEXT:%.*]], %[[INNER_BODY]] ] |
| ; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[I]], 100 |
| ; CHECK-NEXT: br i1 [[COND]], label %[[INNER_BODY]], label %[[OUTER_LATCH]] |
| ; CHECK: [[OUTER_LATCH]]: |
| ; CHECK-NEXT: [[V0_LC:%.*]] = phi i32 [ [[A]], %[[INNER_HEADER]] ] |
| ; CHECK-NEXT: [[V1_LC:%.*]] = phi i32 [ [[B]], %[[INNER_HEADER]] ] |
| ; CHECK-NEXT: [[SUM:%.*]] = add i32 [[V0_LC]], [[V1_LC]] |
| ; CHECK-NEXT: store i32 [[SUM]], ptr [[Q]], align 4 |
| ; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1 |
| ; CHECK-NEXT: [[OUTER_COND:%.*]] = icmp slt i32 [[J_NEXT]], [[N]] |
| ; CHECK-NEXT: br i1 [[OUTER_COND]], label %[[OUTER_HEADER]], label %[[EXIT:.*]] |
| ; CHECK: [[INNER_BODY]]: |
| ; CHECK-NEXT: call void @use(i32 [[A]]) |
| ; CHECK-NEXT: call void @use(i32 [[B]]) |
| ; CHECK-NEXT: [[A_NEXT]] = add nsw i32 [[A]], -1 |
| ; CHECK-NEXT: [[B_NEXT]] = add nsw i32 [[B]], -1 |
| ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 |
| ; CHECK-NEXT: [[I64:%.*]] = zext i32 [[I]] to i64 |
| ; CHECK-NEXT: [[I64N:%.*]] = zext i32 [[I_NEXT]] to i64 |
| ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[I64]] |
| ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[I64N]] |
| ; CHECK-NEXT: store i32 [[A_NEXT]], ptr [[GEP0]], align 4 |
| ; CHECK-NEXT: store i32 [[B_NEXT]], ptr [[GEP1]], align 4 |
| ; CHECK-NEXT: br label %[[INNER_HEADER]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| br label %outer.header |
| |
| outer.header: |
| %j = phi i32 [ 0, %entry ], [ %j.next, %outer.latch ] |
| br label %inner.header |
| |
| inner.header: |
| %a = phi i32 [ 10, %outer.header ], [ %a.next, %inner.body ] |
| %b = phi i32 [ 20, %outer.header ], [ %b.next, %inner.body ] |
| %i = phi i32 [ 0, %outer.header ], [ %i.next, %inner.body ] |
| %cond = icmp ult i32 %i, 100 |
| br i1 %cond, label %inner.body, label %outer.latch |
| |
| ; outer.latch placed BEFORE inner.body: its LCSSA phis of %a/%b are |
| ; registered as forward references when parsed, then inner.body @use |
| ; calls are prepended on top => in-loop users appear first. |
| outer.latch: |
| %v0.lc = phi i32 [ %a, %inner.header ] |
| %v1.lc = phi i32 [ %b, %inner.header ] |
| %sum = add i32 %v0.lc, %v1.lc |
| store i32 %sum, ptr %q, align 4 |
| %j.next = add nuw nsw i32 %j, 1 |
| %outer.cond = icmp slt i32 %j.next, %n |
| br i1 %outer.cond, label %outer.header, label %exit |
| |
| inner.body: |
| call void @use(i32 %a) |
| call void @use(i32 %b) |
| %a.next = add nsw i32 %a, -1 |
| %b.next = add nsw i32 %b, -1 |
| %i.next = add nuw nsw i32 %i, 1 |
| %i64 = zext i32 %i to i64 |
| %i64n = zext i32 %i.next to i64 |
| %gep0 = getelementptr inbounds i32, ptr %p, i64 %i64 |
| %gep1 = getelementptr inbounds i32, ptr %p, i64 %i64n |
| store i32 %a.next, ptr %gep0, align 4 |
| store i32 %b.next, ptr %gep1, align 4 |
| br label %inner.header |
| |
| exit: |
| ret void |
| } |