blob: fd8c9c0b348de548cd6133aa724b881b1f20cd6a [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -passes=slp-vectorizer -mtriple=aarch64-none-linux-gnu -mcpu=neoverse-n1 -S %s | FileCheck %s
;
declare void @use(i32) memory(none) nounwind willreturn
define void @test_correctness(ptr noalias %p, ptr noalias %q, i32 %n) {
; CHECK-LABEL: define void @test_correctness(
; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[Q:%.*]], i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[OUTER_HEADER:.*]]
; CHECK: [[OUTER_HEADER]]:
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[J_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
; CHECK-NEXT: br label %[[INNER_HEADER:.*]]
; CHECK: [[INNER_HEADER]]:
; CHECK-NEXT: [[A:%.*]] = phi i32 [ 10, %[[OUTER_HEADER]] ], [ [[A_NEXT:%.*]], %[[INNER_BODY:.*]] ]
; CHECK-NEXT: [[B:%.*]] = phi i32 [ 20, %[[OUTER_HEADER]] ], [ [[B_NEXT:%.*]], %[[INNER_BODY]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[OUTER_HEADER]] ], [ [[I_NEXT:%.*]], %[[INNER_BODY]] ]
; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[I]], 100
; CHECK-NEXT: br i1 [[COND]], label %[[INNER_BODY]], label %[[OUTER_LATCH]]
; CHECK: [[INNER_BODY]]:
; CHECK-NEXT: [[A_NEXT]] = add nsw i32 [[A]], -1
; CHECK-NEXT: [[B_NEXT]] = add nsw i32 [[B]], -1
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[I64:%.*]] = zext i32 [[I]] to i64
; CHECK-NEXT: [[I64N:%.*]] = zext i32 [[I_NEXT]] to i64
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[I64]]
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[I64N]]
; CHECK-NEXT: store i32 [[A_NEXT]], ptr [[GEP0]], align 4
; CHECK-NEXT: store i32 [[B_NEXT]], ptr [[GEP1]], align 4
; CHECK-NEXT: br label %[[INNER_HEADER]]
; CHECK: [[OUTER_LATCH]]:
; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ [[A]], %[[INNER_HEADER]] ]
; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[B]], %[[INNER_HEADER]] ]
; CHECK-NEXT: [[SUM:%.*]] = add i32 [[TMP3]], [[TMP4]]
; CHECK-NEXT: store i32 [[SUM]], ptr [[Q]], align 4
; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1
; CHECK-NEXT: [[OUTER_COND:%.*]] = icmp slt i32 [[J_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[OUTER_COND]], label %[[OUTER_HEADER]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %outer.header
outer.header:
%j = phi i32 [ 0, %entry ], [ %j.next, %outer.latch ]
br label %inner.header
inner.header:
; Constant initial values: tree does NOT reach back into the outer loop.
%a = phi i32 [ 10, %outer.header ], [ %a.next, %inner.body ]
%b = phi i32 [ 20, %outer.header ], [ %b.next, %inner.body ]
%i = phi i32 [ 0, %outer.header ], [ %i.next, %inner.body ]
%cond = icmp ult i32 %i, 100
br i1 %cond, label %inner.body, label %outer.latch
inner.body:
%a.next = add nsw i32 %a, -1
%b.next = add nsw i32 %b, -1
%i.next = add nuw nsw i32 %i, 1
%i64 = zext i32 %i to i64
%i64n = zext i32 %i.next to i64
%gep0 = getelementptr inbounds i32, ptr %p, i64 %i64
%gep1 = getelementptr inbounds i32, ptr %p, i64 %i64n
store i32 %a.next, ptr %gep0, align 4
store i32 %b.next, ptr %gep1, align 4
br label %inner.header
; outer.latch is AFTER inner.body: LCSSA-phi users are the only external
; users of %a/%b. The fix changes their scale from outer_TC (2) to
; inner_TC*outer_TC (101*2=202), raising the extract cost from 8 to 808.
outer.latch:
%v0.lc = phi i32 [ %a, %inner.header ]
%v1.lc = phi i32 [ %b, %inner.header ]
%sum = add i32 %v0.lc, %v1.lc
store i32 %sum, ptr %q, align 4
%j.next = add nuw nsw i32 %j, 1
%outer.cond = icmp slt i32 %j.next, %n
br i1 %outer.cond, label %outer.header, label %exit
exit:
ret void
}
define void @test_ordering_lcssa_first(ptr noalias %p, ptr noalias %q, i32 %n) {
; CHECK-LABEL: define void @test_ordering_lcssa_first(
; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[Q:%.*]], i32 [[N:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[OUTER_HEADER:.*]]
; CHECK: [[OUTER_HEADER]]:
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[J_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
; CHECK-NEXT: br label %[[INNER_HEADER:.*]]
; CHECK: [[INNER_HEADER]]:
; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ 10, %[[OUTER_HEADER]] ], [ [[A_NEXT:%.*]], %[[INNER_BODY:.*]] ]
; CHECK-NEXT: [[TMP2:%.*]] = phi i32 [ 20, %[[OUTER_HEADER]] ], [ [[B_NEXT:%.*]], %[[INNER_BODY]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[OUTER_HEADER]] ], [ [[I_NEXT:%.*]], %[[INNER_BODY]] ]
; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[I]], 100
; CHECK-NEXT: br i1 [[COND]], label %[[INNER_BODY]], label %[[OUTER_LATCH]]
; CHECK: [[INNER_BODY]]:
; CHECK-NEXT: call void @use(i32 [[TMP1]])
; CHECK-NEXT: call void @use(i32 [[TMP2]])
; CHECK-NEXT: [[A_NEXT]] = add nsw i32 [[TMP1]], -1
; CHECK-NEXT: [[B_NEXT]] = add nsw i32 [[TMP2]], -1
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[I64:%.*]] = zext i32 [[I]] to i64
; CHECK-NEXT: [[I64N:%.*]] = zext i32 [[I_NEXT]] to i64
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[I64]]
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[I64N]]
; CHECK-NEXT: store i32 [[A_NEXT]], ptr [[GEP0]], align 4
; CHECK-NEXT: store i32 [[B_NEXT]], ptr [[GEP1]], align 4
; CHECK-NEXT: br label %[[INNER_HEADER]]
; CHECK: [[OUTER_LATCH]]:
; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP1]], %[[INNER_HEADER]] ]
; CHECK-NEXT: [[TMP6:%.*]] = phi i32 [ [[TMP2]], %[[INNER_HEADER]] ]
; CHECK-NEXT: [[SUM:%.*]] = add i32 [[TMP5]], [[TMP6]]
; CHECK-NEXT: store i32 [[SUM]], ptr [[Q]], align 4
; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1
; CHECK-NEXT: [[OUTER_COND:%.*]] = icmp slt i32 [[J_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[OUTER_COND]], label %[[OUTER_HEADER]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %outer.header
outer.header:
%j = phi i32 [ 0, %entry ], [ %j.next, %outer.latch ]
br label %inner.header
inner.header:
%a = phi i32 [ 10, %outer.header ], [ %a.next, %inner.body ]
%b = phi i32 [ 20, %outer.header ], [ %b.next, %inner.body ]
%i = phi i32 [ 0, %outer.header ], [ %i.next, %inner.body ]
%cond = icmp ult i32 %i, 100
br i1 %cond, label %inner.body, label %outer.latch
inner.body:
call void @use(i32 %a)
call void @use(i32 %b)
%a.next = add nsw i32 %a, -1
%b.next = add nsw i32 %b, -1
%i.next = add nuw nsw i32 %i, 1
%i64 = zext i32 %i to i64
%i64n = zext i32 %i.next to i64
%gep0 = getelementptr inbounds i32, ptr %p, i64 %i64
%gep1 = getelementptr inbounds i32, ptr %p, i64 %i64n
store i32 %a.next, ptr %gep0, align 4
store i32 %b.next, ptr %gep1, align 4
br label %inner.header
outer.latch:
%v0.lc = phi i32 [ %a, %inner.header ]
%v1.lc = phi i32 [ %b, %inner.header ]
%sum = add i32 %v0.lc, %v1.lc
store i32 %sum, ptr %q, align 4
%j.next = add nuw nsw i32 %j, 1
%outer.cond = icmp slt i32 %j.next, %n
br i1 %outer.cond, label %outer.header, label %exit
exit:
ret void
}
define void @test_ordering_inloop_first(ptr noalias %p, ptr noalias %q, i32 %n) {
; CHECK-LABEL: define void @test_ordering_inloop_first(
; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[Q:%.*]], i32 [[N:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[OUTER_HEADER:.*]]
; CHECK: [[OUTER_HEADER]]:
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[J_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
; CHECK-NEXT: br label %[[INNER_HEADER:.*]]
; CHECK: [[INNER_HEADER]]:
; CHECK-NEXT: [[A:%.*]] = phi i32 [ 10, %[[OUTER_HEADER]] ], [ [[A_NEXT:%.*]], %[[INNER_BODY:.*]] ]
; CHECK-NEXT: [[B:%.*]] = phi i32 [ 20, %[[OUTER_HEADER]] ], [ [[B_NEXT:%.*]], %[[INNER_BODY]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[OUTER_HEADER]] ], [ [[I_NEXT:%.*]], %[[INNER_BODY]] ]
; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[I]], 100
; CHECK-NEXT: br i1 [[COND]], label %[[INNER_BODY]], label %[[OUTER_LATCH]]
; CHECK: [[OUTER_LATCH]]:
; CHECK-NEXT: [[V0_LC:%.*]] = phi i32 [ [[A]], %[[INNER_HEADER]] ]
; CHECK-NEXT: [[V1_LC:%.*]] = phi i32 [ [[B]], %[[INNER_HEADER]] ]
; CHECK-NEXT: [[SUM:%.*]] = add i32 [[V0_LC]], [[V1_LC]]
; CHECK-NEXT: store i32 [[SUM]], ptr [[Q]], align 4
; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i32 [[J]], 1
; CHECK-NEXT: [[OUTER_COND:%.*]] = icmp slt i32 [[J_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[OUTER_COND]], label %[[OUTER_HEADER]], label %[[EXIT:.*]]
; CHECK: [[INNER_BODY]]:
; CHECK-NEXT: call void @use(i32 [[A]])
; CHECK-NEXT: call void @use(i32 [[B]])
; CHECK-NEXT: [[A_NEXT]] = add nsw i32 [[A]], -1
; CHECK-NEXT: [[B_NEXT]] = add nsw i32 [[B]], -1
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1
; CHECK-NEXT: [[I64:%.*]] = zext i32 [[I]] to i64
; CHECK-NEXT: [[I64N:%.*]] = zext i32 [[I_NEXT]] to i64
; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[I64]]
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[I64N]]
; CHECK-NEXT: store i32 [[A_NEXT]], ptr [[GEP0]], align 4
; CHECK-NEXT: store i32 [[B_NEXT]], ptr [[GEP1]], align 4
; CHECK-NEXT: br label %[[INNER_HEADER]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %outer.header
outer.header:
%j = phi i32 [ 0, %entry ], [ %j.next, %outer.latch ]
br label %inner.header
inner.header:
%a = phi i32 [ 10, %outer.header ], [ %a.next, %inner.body ]
%b = phi i32 [ 20, %outer.header ], [ %b.next, %inner.body ]
%i = phi i32 [ 0, %outer.header ], [ %i.next, %inner.body ]
%cond = icmp ult i32 %i, 100
br i1 %cond, label %inner.body, label %outer.latch
; outer.latch placed BEFORE inner.body: its LCSSA phis of %a/%b are
; registered as forward references when parsed, then inner.body @use
; calls are prepended on top => in-loop users appear first.
outer.latch:
%v0.lc = phi i32 [ %a, %inner.header ]
%v1.lc = phi i32 [ %b, %inner.header ]
%sum = add i32 %v0.lc, %v1.lc
store i32 %sum, ptr %q, align 4
%j.next = add nuw nsw i32 %j, 1
%outer.cond = icmp slt i32 %j.next, %n
br i1 %outer.cond, label %outer.header, label %exit
inner.body:
call void @use(i32 %a)
call void @use(i32 %b)
%a.next = add nsw i32 %a, -1
%b.next = add nsw i32 %b, -1
%i.next = add nuw nsw i32 %i, 1
%i64 = zext i32 %i to i64
%i64n = zext i32 %i.next to i64
%gep0 = getelementptr inbounds i32, ptr %p, i64 %i64
%gep1 = getelementptr inbounds i32, ptr %p, i64 %i64n
store i32 %a.next, ptr %gep0, align 4
store i32 %b.next, ptr %gep1, align 4
br label %inner.header
exit:
ret void
}