| ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4 |
| ; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s |
| |
| target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" |
| |
| define void @vscale_slt(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { |
| ; CHECK-LABEL: 'vscale_slt' |
| ; CHECK-NEXT: Classifying expressions for: @vscale_slt |
| ; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() |
| ; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) |
| ; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| ; CHECK-NEXT: --> {0,+,vscale}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (vscale * ((-1 + %n) /u vscale))<nuw> LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| ; CHECK-NEXT: --> {%A,+,(4 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((4 * vscale * ((-1 + %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %add = add nsw i32 %i.05, %vscale |
| ; CHECK-NEXT: --> {vscale,+,vscale}<nuw><nsw><%for.body> U: [2,-2147483648) S: [2,-2147483648) Exits: (vscale * (1 + ((-1 + %n) /u vscale))<nuw>) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: Determining loop execution counts for: @vscale_slt |
| ; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u vscale) |
| ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 1073741822 |
| ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u vscale) |
| ; CHECK-NEXT: Loop %for.body: Trip multiple is 1 |
| ; |
| entry: |
| %vscale = call i32 @llvm.vscale.i32() |
| %cmp4 = icmp sgt i32 %n, 0 |
| br i1 %cmp4, label %for.body, label %for.end |
| |
| for.body: ; preds = %entry, %for.body |
| %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4 |
| %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1) |
| store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4 |
| %add = add nsw i32 %i.05, %vscale |
| %cmp = icmp slt i32 %add, %n |
| br i1 %cmp, label %for.body, label %for.end |
| |
| for.end: ; preds = %for.body, %entry |
| ret void |
| } |
| |
| define void @vscale_ult(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { |
| ; CHECK-LABEL: 'vscale_ult' |
| ; CHECK-NEXT: Classifying expressions for: @vscale_ult |
| ; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() |
| ; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) |
| ; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| ; CHECK-NEXT: --> {0,+,vscale}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (vscale * ((-1 + %n) /u vscale))<nuw> LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| ; CHECK-NEXT: --> {%A,+,(4 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((4 * vscale * ((-1 + %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %add = add nsw i32 %i.05, %vscale |
| ; CHECK-NEXT: --> {vscale,+,vscale}<nuw><nsw><%for.body> U: [2,-2147483648) S: [2,-2147483648) Exits: (vscale * (1 + ((-1 + %n) /u vscale))<nuw>) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: Determining loop execution counts for: @vscale_ult |
| ; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u vscale) |
| ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483646 |
| ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u vscale) |
| ; CHECK-NEXT: Loop %for.body: Trip multiple is 1 |
| ; |
| entry: |
| %vscale = call i32 @llvm.vscale.i32() |
| %cmp4 = icmp sgt i32 %n, 0 |
| br i1 %cmp4, label %for.body, label %for.end |
| |
| for.body: ; preds = %entry, %for.body |
| %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4 |
| %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1) |
| store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4 |
| %add = add nsw i32 %i.05, %vscale |
| %cmp = icmp ult i32 %add, %n |
| br i1 %cmp, label %for.body, label %for.end |
| |
| for.end: ; preds = %for.body, %entry |
| ret void |
| } |
| |
| define void @vscale_ule(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { |
| ; CHECK-LABEL: 'vscale_ule' |
| ; CHECK-NEXT: Classifying expressions for: @vscale_ule |
| ; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() |
| ; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) |
| ; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| ; CHECK-NEXT: --> {0,+,vscale}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (vscale * (((-1 + vscale)<nsw> umax %n) /u vscale))<nuw> LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| ; CHECK-NEXT: --> {%A,+,(4 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((4 * vscale * (((-1 + vscale)<nsw> umax %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %add = add nsw i32 %i.05, %vscale |
| ; CHECK-NEXT: --> {vscale,+,vscale}<nuw><nsw><%for.body> U: [2,-2147483648) S: [2,-2147483648) Exits: (vscale * (1 + (((-1 + vscale)<nsw> umax %n) /u vscale))<nuw>) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: Determining loop execution counts for: @vscale_ule |
| ; CHECK-NEXT: Loop %for.body: backedge-taken count is (((-1 + vscale)<nsw> umax %n) /u vscale) |
| ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483647 |
| ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (((-1 + vscale)<nsw> umax %n) /u vscale) |
| ; CHECK-NEXT: Loop %for.body: Trip multiple is 1 |
| ; |
| entry: |
| %vscale = call i32 @llvm.vscale.i32() |
| %cmp4 = icmp sgt i32 %n, 0 |
| br i1 %cmp4, label %for.body, label %for.end |
| |
| for.body: ; preds = %entry, %for.body |
| %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4 |
| %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1) |
| store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4 |
| %add = add nsw i32 %i.05, %vscale |
| %cmp = icmp ule i32 %add, %n |
| br i1 %cmp, label %for.body, label %for.end |
| |
| for.end: ; preds = %for.body, %entry |
| ret void |
| } |
| |
| define void @vscale_ne(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { |
| ; CHECK-LABEL: 'vscale_ne' |
| ; CHECK-NEXT: Classifying expressions for: @vscale_ne |
| ; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() |
| ; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) |
| ; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| ; CHECK-NEXT: --> {0,+,vscale}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,-2147483648) Exits: (vscale * (((-1 * vscale)<nsw> + %n) /u vscale))<nuw> LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| ; CHECK-NEXT: --> {%A,+,(4 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((4 * vscale * (((-1 * vscale)<nsw> + %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %add = add nsw i32 %i.05, %vscale |
| ; CHECK-NEXT: --> {vscale,+,vscale}<nuw><nsw><%for.body> U: [2,-2147483648) S: [2,-2147483648) Exits: (vscale * (1 + (((-1 * vscale)<nsw> + %n) /u vscale))<nuw>) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: Determining loop execution counts for: @vscale_ne |
| ; CHECK-NEXT: Loop %for.body: backedge-taken count is (((-1 * vscale)<nsw> + %n) /u vscale) |
| ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483647 |
| ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (((-1 * vscale)<nsw> + %n) /u vscale) |
| ; CHECK-NEXT: Loop %for.body: Trip multiple is 1 |
| ; |
| entry: |
| %vscale = call i32 @llvm.vscale.i32() |
| %cmp4 = icmp sgt i32 %n, 0 |
| br i1 %cmp4, label %for.body, label %for.end |
| |
| for.body: ; preds = %entry, %for.body |
| %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4 |
| %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1) |
| store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4 |
| %add = add nsw i32 %i.05, %vscale |
| %cmp = icmp ne i32 %add, %n |
| br i1 %cmp, label %for.body, label %for.end |
| |
| for.end: ; preds = %for.body, %entry |
| ret void |
| } |
| |
| |
| define void @vscalex4_slt(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { |
| ; CHECK-LABEL: 'vscalex4_slt' |
| ; CHECK-NEXT: Classifying expressions for: @vscalex4_slt |
| ; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() |
| ; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) |
| ; CHECK-NEXT: %VF = mul i32 %vscale, 4 |
| ; CHECK-NEXT: --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097) |
| ; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| ; CHECK-NEXT: --> {0,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| ; CHECK-NEXT: --> {%A,+,(16 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %add = add nsw i32 %i.05, %VF |
| ; CHECK-NEXT: --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [8,-2147483648) S: [8,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: Determining loop execution counts for: @vscalex4_slt |
| ; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>) |
| ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 268435454 |
| ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>) |
| ; CHECK-NEXT: Loop %for.body: Trip multiple is 1 |
| ; |
| entry: |
| %vscale = call i32 @llvm.vscale.i32() |
| %VF = mul i32 %vscale, 4 |
| %cmp4 = icmp sgt i32 %n, 0 |
| br i1 %cmp4, label %for.body, label %for.end |
| |
| for.body: ; preds = %entry, %for.body |
| %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4 |
| %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1) |
| store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4 |
| %add = add nsw i32 %i.05, %VF |
| %cmp = icmp slt i32 %add, %n |
| br i1 %cmp, label %for.body, label %for.end |
| |
| for.end: ; preds = %for.body, %entry |
| ret void |
| } |
| |
| define void @vscalex4_ult(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { |
| ; CHECK-LABEL: 'vscalex4_ult' |
| ; CHECK-NEXT: Classifying expressions for: @vscalex4_ult |
| ; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() |
| ; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) |
| ; CHECK-NEXT: %VF = mul i32 %vscale, 4 |
| ; CHECK-NEXT: --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097) |
| ; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| ; CHECK-NEXT: --> {0,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| ; CHECK-NEXT: --> {%A,+,(16 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %add = add nsw i32 %i.05, %VF |
| ; CHECK-NEXT: --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [8,-2147483648) S: [8,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: Determining loop execution counts for: @vscalex4_ult |
| ; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>) |
| ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 536870910 |
| ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>) |
| ; CHECK-NEXT: Loop %for.body: Trip multiple is 1 |
| ; |
| entry: |
| %vscale = call i32 @llvm.vscale.i32() |
| %VF = mul i32 %vscale, 4 |
| %cmp4 = icmp sgt i32 %n, 0 |
| br i1 %cmp4, label %for.body, label %for.end |
| |
| for.body: ; preds = %entry, %for.body |
| %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4 |
| %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1) |
| store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4 |
| %add = add nsw i32 %i.05, %VF |
| %cmp = icmp ult i32 %add, %n |
| br i1 %cmp, label %for.body, label %for.end |
| |
| for.end: ; preds = %for.body, %entry |
| ret void |
| } |
| |
| |
| define void @vscale_slt_with_vp_plain(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { |
| ; CHECK-LABEL: 'vscale_slt_with_vp_plain' |
| ; CHECK-NEXT: Classifying expressions for: @vscale_slt_with_vp_plain |
| ; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() |
| ; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) |
| ; CHECK-NEXT: %VF = mul i32 %vscale, 4 |
| ; CHECK-NEXT: --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097) |
| ; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| ; CHECK-NEXT: --> {0,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| ; CHECK-NEXT: --> {%A,+,(16 * vscale)<nuw><nsw>}<%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %add = add nsw i32 %i.05, %VF |
| ; CHECK-NEXT: --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [8,-2147483648) S: [8,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: Determining loop execution counts for: @vscale_slt_with_vp_plain |
| ; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>) |
| ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 268435454 |
| ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>) |
| ; CHECK-NEXT: Loop %for.body: Trip multiple is 1 |
| ; |
| entry: |
| %vscale = call i32 @llvm.vscale.i32() |
| %VF = mul i32 %vscale, 4 |
| %cmp4 = icmp sgt i32 %n, 0 |
| br i1 %cmp4, label %for.body, label %for.end |
| |
| for.body: ; preds = %entry, %for.body |
| %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| |
| %0 = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF) |
| %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1) |
| call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %inc, ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF) |
| |
| %add = add nsw i32 %i.05, %VF |
| %cmp = icmp slt i32 %add, %n |
| br i1 %cmp, label %for.body, label %for.end |
| |
| for.end: ; preds = %for.body, %entry |
| ret void |
| } |
| |
| define void @vscale_slt_with_vp_umin(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { |
| ; CHECK-LABEL: 'vscale_slt_with_vp_umin' |
| ; CHECK-NEXT: Classifying expressions for: @vscale_slt_with_vp_umin |
| ; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() |
| ; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) |
| ; CHECK-NEXT: %VF = mul i32 %vscale, 4 |
| ; CHECK-NEXT: --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097) |
| ; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| ; CHECK-NEXT: --> {0,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| ; CHECK-NEXT: --> {%A,+,(16 * vscale)<nuw><nsw>}<%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %left = sub i32 %n, %i.05 |
| ; CHECK-NEXT: --> {%n,+,(-4 * vscale)<nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((-4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %n) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %VF.capped = call i32 @llvm.umin.i32(i32 %VF, i32 %left) |
| ; CHECK-NEXT: --> ((4 * vscale)<nuw><nsw> umin {%n,+,(-4 * vscale)<nsw>}<nw><%for.body>) U: [0,4097) S: [0,4097) Exits: (((-4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %n) umin (4 * vscale)<nuw><nsw>) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %add = add nsw i32 %i.05, %VF |
| ; CHECK-NEXT: --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [8,-2147483648) S: [8,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: Determining loop execution counts for: @vscale_slt_with_vp_umin |
| ; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>) |
| ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 268435454 |
| ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>) |
| ; CHECK-NEXT: Loop %for.body: Trip multiple is 1 |
| ; |
| entry: |
| %vscale = call i32 @llvm.vscale.i32() |
| %VF = mul i32 %vscale, 4 |
| %cmp4 = icmp sgt i32 %n, 0 |
| br i1 %cmp4, label %for.body, label %for.end |
| |
| for.body: ; preds = %entry, %for.body |
| %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| |
| %left = sub i32 %n, %i.05 |
| %VF.capped = call i32 @llvm.umin(i32 %VF, i32 %left) |
| |
| %0 = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF.capped) |
| %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1) |
| call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %inc, ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF.capped) |
| |
| %add = add nsw i32 %i.05, %VF |
| %cmp = icmp slt i32 %add, %n |
| br i1 %cmp, label %for.body, label %for.end |
| |
| for.end: ; preds = %for.body, %entry |
| ret void |
| } |
| |
| define void @vscale_slt_with_vp_umin2(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { |
| ; CHECK-LABEL: 'vscale_slt_with_vp_umin2' |
| ; CHECK-NEXT: Classifying expressions for: @vscale_slt_with_vp_umin2 |
| ; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() |
| ; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) |
| ; CHECK-NEXT: %VF = mul i32 %vscale, 4 |
| ; CHECK-NEXT: --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097) |
| ; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| ; CHECK-NEXT: --> %i.05 U: [0,-2147483648) S: [0,-2147483648) Exits: <<Unknown>> LoopDispositions: { %for.body: Variant } |
| ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| ; CHECK-NEXT: --> ((4 * %i.05) + %A) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Variant } |
| ; CHECK-NEXT: %left = sub i32 %n, %i.05 |
| ; CHECK-NEXT: --> ((-1 * %i.05)<nsw> + %n) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.body: Variant } |
| ; CHECK-NEXT: %VF.capped = call i32 @llvm.umin.i32(i32 %VF, i32 %left) |
| ; CHECK-NEXT: --> (((-1 * %i.05)<nsw> + %n) umin (4 * vscale)<nuw><nsw>) U: [0,4097) S: [0,4097) Exits: <<Unknown>> LoopDispositions: { %for.body: Variant } |
| ; CHECK-NEXT: %add = add nsw i32 %i.05, %VF.capped |
| ; CHECK-NEXT: --> ((((-1 * %i.05)<nsw> + %n) umin (4 * vscale)<nuw><nsw>) + %i.05)<nuw><nsw> U: [0,-2147483648) S: [0,-2147483648) Exits: <<Unknown>> LoopDispositions: { %for.body: Variant } |
| ; CHECK-NEXT: Determining loop execution counts for: @vscale_slt_with_vp_umin2 |
| ; CHECK-NEXT: Loop %for.body: Unpredictable backedge-taken count. |
| ; CHECK-NEXT: Loop %for.body: Unpredictable constant max backedge-taken count. |
| ; CHECK-NEXT: Loop %for.body: Unpredictable symbolic max backedge-taken count. |
| ; |
| entry: |
| %vscale = call i32 @llvm.vscale.i32() |
| %VF = mul i32 %vscale, 4 |
| %cmp4 = icmp sgt i32 %n, 0 |
| br i1 %cmp4, label %for.body, label %for.end |
| |
| for.body: ; preds = %entry, %for.body |
| %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| |
| %left = sub i32 %n, %i.05 |
| %VF.capped = call i32 @llvm.umin(i32 %VF, i32 %left) |
| |
| %0 = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF.capped) |
| %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1) |
| call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %inc, ptr align 4 %arrayidx, <vscale x 4 x i1> splat (i1 true), i32 %VF.capped) |
| |
| %add = add nsw i32 %i.05, %VF.capped |
| %cmp = icmp slt i32 %add, %n |
| br i1 %cmp, label %for.body, label %for.end |
| |
| for.end: ; preds = %for.body, %entry |
| ret void |
| } |
| |
| ; The next two cases check to see if we can infer the flags on the IV |
| ; of a countup loop using vscale strides. vscale is a power of two |
| ; and these are finite loops by assumption. |
| |
| define void @vscale_slt_noflags(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { |
| ; CHECK-LABEL: 'vscale_slt_noflags' |
| ; CHECK-NEXT: Classifying expressions for: @vscale_slt_noflags |
| ; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() |
| ; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) |
| ; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| ; CHECK-NEXT: --> {0,+,vscale}<%for.body> U: full-set S: full-set Exits: (vscale * ((-1 + %n) /u vscale))<nuw> LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| ; CHECK-NEXT: --> {%A,+,(4 * vscale)<nuw><nsw>}<%for.body> U: full-set S: full-set Exits: ((4 * vscale * ((-1 + %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %add = add i32 %i.05, %vscale |
| ; CHECK-NEXT: --> {vscale,+,vscale}<nuw><nsw><%for.body> U: [2,-2147483648) S: [2,-2147483648) Exits: (vscale * (1 + ((-1 + %n) /u vscale))<nuw>) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: Determining loop execution counts for: @vscale_slt_noflags |
| ; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u vscale) |
| ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 1073741822 |
| ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u vscale) |
| ; CHECK-NEXT: Loop %for.body: Trip multiple is 1 |
| ; |
| entry: |
| %vscale = call i32 @llvm.vscale.i32() |
| %cmp4 = icmp sgt i32 %n, 0 |
| br i1 %cmp4, label %for.body, label %for.end |
| |
| for.body: ; preds = %entry, %for.body |
| %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4 |
| %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1) |
| store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4 |
| %add = add i32 %i.05, %vscale |
| %cmp = icmp slt i32 %add, %n |
| br i1 %cmp, label %for.body, label %for.end |
| |
| for.end: ; preds = %for.body, %entry |
| ret void |
| } |
| |
| define void @vscalex4_ult_noflags(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { |
| ; CHECK-LABEL: 'vscalex4_ult_noflags' |
| ; CHECK-NEXT: Classifying expressions for: @vscalex4_ult_noflags |
| ; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() |
| ; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) |
| ; CHECK-NEXT: %VF = mul i32 %vscale, 4 |
| ; CHECK-NEXT: --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097) |
| ; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| ; CHECK-NEXT: --> {0,+,(4 * vscale)<nuw><nsw>}<%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| ; CHECK-NEXT: --> {%A,+,(16 * vscale)<nuw><nsw>}<%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %add = add i32 %i.05, %VF |
| ; CHECK-NEXT: --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><%for.body> U: [8,-3) S: [-2147483648,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: Determining loop execution counts for: @vscalex4_ult_noflags |
| ; CHECK-NEXT: Loop %for.body: backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>) |
| ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 536870910 |
| ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is ((-1 + %n) /u (4 * vscale)<nuw><nsw>) |
| ; CHECK-NEXT: Loop %for.body: Trip multiple is 1 |
| ; |
| entry: |
| %vscale = call i32 @llvm.vscale.i32() |
| %VF = mul i32 %vscale, 4 |
| %cmp4 = icmp sgt i32 %n, 0 |
| br i1 %cmp4, label %for.body, label %for.end |
| |
| for.body: ; preds = %entry, %for.body |
| %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05 |
| %0 = load <vscale x 4 x i32>, ptr %arrayidx, align 4 |
| %inc = add nsw <vscale x 4 x i32> %0, splat (i32 1) |
| store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4 |
| %add = add i32 %i.05, %VF |
| %cmp = icmp ult i32 %add, %n |
| br i1 %cmp, label %for.body, label %for.end |
| |
| for.end: ; preds = %for.body, %entry |
| ret void |
| } |
| |
| ; The next two cases check to see if we can infer the flags on the IV |
| ; of a countdown loop using vscale strides. |
| |
| define void @vscale_countdown_ne(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { |
| ; CHECK-LABEL: 'vscale_countdown_ne' |
| ; CHECK-NEXT: Classifying expressions for: @vscale_countdown_ne |
| ; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() |
| ; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) |
| ; CHECK-NEXT: %start = sub i32 %n, %vscale |
| ; CHECK-NEXT: --> ((-1 * vscale)<nsw> + %n) U: full-set S: full-set |
| ; CHECK-NEXT: %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ] |
| ; CHECK-NEXT: --> {((-1 * vscale)<nsw> + %n),+,(-1 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((vscale * (-1 + (-1 * (((-2 * vscale)<nsw> + %n) /u vscale))<nsw>)<nsw>) + %n) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv |
| ; CHECK-NEXT: --> {((4 * %n) + (-4 * vscale)<nsw> + %A),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((4 * %n) + (vscale * (-4 + (-4 * (((-2 * vscale)<nsw> + %n) /u vscale)))) + %A) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %sub = sub i32 %iv, %vscale |
| ; CHECK-NEXT: --> {((-2 * vscale)<nsw> + %n),+,(-1 * vscale)<nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((vscale * (-2 + (-1 * (((-2 * vscale)<nsw> + %n) /u vscale))<nsw>)) + %n) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: Determining loop execution counts for: @vscale_countdown_ne |
| ; CHECK-NEXT: Loop %for.body: backedge-taken count is (((-2 * vscale)<nsw> + %n) /u vscale) |
| ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 2147483647 |
| ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (((-2 * vscale)<nsw> + %n) /u vscale) |
| ; CHECK-NEXT: Loop %for.body: Trip multiple is 1 |
| ; |
| entry: |
| %vscale = call i32 @llvm.vscale.i32() |
| %cmp4 = icmp sgt i32 %n, 0 |
| %start = sub i32 %n, %vscale |
| br i1 %cmp4, label %for.body, label %for.end |
| |
| for.body: ; preds = %entry, %for.body |
| %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv |
| %ld = load <vscale x 4 x i32>, ptr %arrayidx, align 4 |
| %inc = add nsw <vscale x 4 x i32> %ld, splat (i32 1) |
| store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4 |
| %sub = sub i32 %iv, %vscale |
| %cmp = icmp ne i32 %sub, 0 |
| br i1 %cmp, label %for.body, label %for.end |
| |
| for.end: ; preds = %for.body, %entry |
| ret void |
| } |
| |
| define void @vscalex4_countdown_ne(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) { |
| ; CHECK-LABEL: 'vscalex4_countdown_ne' |
| ; CHECK-NEXT: Classifying expressions for: @vscalex4_countdown_ne |
| ; CHECK-NEXT: %vscale = call i32 @llvm.vscale.i32() |
| ; CHECK-NEXT: --> vscale U: [2,1025) S: [2,1025) |
| ; CHECK-NEXT: %VF = shl i32 %vscale, 2 |
| ; CHECK-NEXT: --> (4 * vscale)<nuw><nsw> U: [8,4097) S: [8,4097) |
| ; CHECK-NEXT: %start = sub i32 %n, %VF |
| ; CHECK-NEXT: --> ((-4 * vscale)<nsw> + %n) U: full-set S: full-set |
| ; CHECK-NEXT: %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ] |
| ; CHECK-NEXT: --> {((-4 * vscale)<nsw> + %n),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((vscale * (-4 + (-4 * (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>))<nsw>)<nsw>) + %n) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv |
| ; CHECK-NEXT: --> {((4 * %n) + (-16 * vscale)<nsw> + %A),+,(-16 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((4 * %n) + (vscale * (-16 + (-16 * (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>)))) + %A) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: %sub = sub i32 %iv, %VF |
| ; CHECK-NEXT: --> {((-8 * vscale)<nsw> + %n),+,(-4 * vscale)<nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((vscale * (-8 + (-4 * (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>))<nsw>)) + %n) LoopDispositions: { %for.body: Computable } |
| ; CHECK-NEXT: Determining loop execution counts for: @vscalex4_countdown_ne |
| ; CHECK-NEXT: Loop %for.body: backedge-taken count is (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>) |
| ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is i32 536870911 |
| ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>) |
| ; CHECK-NEXT: Loop %for.body: Trip multiple is 1 |
| ; |
| entry: |
| %vscale = call i32 @llvm.vscale.i32() |
| %VF = shl i32 %vscale, 2 |
| %cmp4 = icmp sgt i32 %n, 0 |
| %start = sub i32 %n, %VF |
| br i1 %cmp4, label %for.body, label %for.end |
| |
| for.body: ; preds = %entry, %for.body |
| %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ] |
| %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv |
| %ld = load <vscale x 4 x i32>, ptr %arrayidx, align 4 |
| %inc = add nsw <vscale x 4 x i32> %ld, splat (i32 1) |
| store <vscale x 4 x i32> %inc, ptr %arrayidx, align 4 |
| %sub = sub i32 %iv, %VF |
| %cmp = icmp ne i32 %sub, 0 |
| br i1 %cmp, label %for.body, label %for.end |
| |
| for.end: ; preds = %for.body, %entry |
| ret void |
| } |