| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt -mtriple=amdgcn-unknown-amdhsa < %s -S -passes=loop-unroll | FileCheck %s |
| ; Checks that loops with expensive trip counts are unrolled when the loop.unroll.enable metadata is present. |
| |
| ; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite) |
| define dso_local void @complex_loop_unroll(i64 noundef %input_offset, i64 noundef %step, i64 noundef %n) { |
| ; CHECK-LABEL: define dso_local void @complex_loop_unroll( |
| ; CHECK-SAME: i64 noundef [[INPUT_OFFSET:%.*]], i64 noundef [[STEP:%.*]], i64 noundef [[N:%.*]]) { |
| ; CHECK-NEXT: [[FOR_BODY_LR_PH:.*]]: |
| ; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[STEP]], [[INPUT_OFFSET]] |
| ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 [[TMP3]]) |
| ; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[SMAX]], [[TMP3]] |
| ; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP4]], i64 1) |
| ; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[SMAX]], [[UMIN]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP5]], [[TMP3]] |
| ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[STEP]], i64 1) |
| ; CHECK-NEXT: [[TMP7:%.*]] = udiv i64 [[TMP6]], [[UMAX]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[UMIN]], [[TMP7]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 1 |
| ; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP9]], 7 |
| ; CHECK-NEXT: [[TMP12:%.*]] = icmp ult i64 [[TMP8]], 7 |
| ; CHECK-NEXT: br i1 [[TMP12]], label %[[FOR_END14_LOOPEXIT1:.*]], label %[[FOR_BODY_LR_PH_NEW:.*]] |
| ; CHECK: [[FOR_BODY_LR_PH_NEW]]: |
| ; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP9]], [[XTRAITER]] |
| ; CHECK-NEXT: br label %[[FOR_BODY1:.*]] |
| ; CHECK: [[FOR_BODY1]]: |
| ; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ 0, %[[FOR_BODY_LR_PH_NEW]] ], [ [[ADD_7:%.*]], %[[FOR_BODY1]] ] |
| ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INPUT_OFFSET]], %[[FOR_BODY_LR_PH_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], %[[FOR_BODY1]] ] |
| ; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[FOR_BODY_LR_PH_NEW]] ], [ [[NITER_NEXT_7:%.*]], %[[FOR_BODY1]] ] |
| ; CHECK-NEXT: [[ADD:%.*]] = add i64 [[TMP11]], [[INDVARS_IV]] |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nsw i64 [[INDVARS_IV]], [[STEP]] |
| ; CHECK-NEXT: [[ADD_1:%.*]] = add i64 [[ADD]], [[INDVARS_IV_NEXT]] |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nsw i64 [[INDVARS_IV_NEXT]], [[STEP]] |
| ; CHECK-NEXT: [[ADD_2:%.*]] = add i64 [[ADD_1]], [[INDVARS_IV_NEXT_1]] |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_1]], [[STEP]] |
| ; CHECK-NEXT: [[ADD_3:%.*]] = add i64 [[ADD_2]], [[INDVARS_IV_NEXT_2]] |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_2]], [[STEP]] |
| ; CHECK-NEXT: [[ADD_4:%.*]] = add i64 [[ADD_3]], [[INDVARS_IV_NEXT_3]] |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_3]], [[STEP]] |
| ; CHECK-NEXT: [[ADD_5:%.*]] = add i64 [[ADD_4]], [[INDVARS_IV_NEXT_4]] |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_4]], [[STEP]] |
| ; CHECK-NEXT: [[ADD_6:%.*]] = add i64 [[ADD_5]], [[INDVARS_IV_NEXT_5]] |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT_9:%.*]] = add nsw i64 [[INDVARS_IV_NEXT_5]], [[STEP]] |
| ; CHECK-NEXT: [[ADD_7]] = add i64 [[ADD_6]], [[INDVARS_IV_NEXT_9]] |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nsw i64 [[INDVARS_IV_NEXT_9]], [[STEP]] |
| ; CHECK-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8 |
| ; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp ne i64 [[NITER_NEXT_7]], [[UNROLL_ITER]] |
| ; CHECK-NEXT: br i1 [[NITER_NCMP_7]], label %[[FOR_BODY1]], label %[[FOR_END14_LOOPEXIT_UNR_LCSSA1:.*]], !llvm.loop [[LOOP0:![0-9]+]] |
| ; CHECK: [[FOR_END14_LOOPEXIT_UNR_LCSSA1]]: |
| ; CHECK-NEXT: [[DOTUNR:%.*]] = phi i64 [ [[ADD_7]], %[[FOR_BODY1]] ] |
| ; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], %[[FOR_BODY1]] ] |
| ; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 |
| ; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[FOR_END14_LOOPEXIT1]], label %[[FOR_END14_LOOPEXIT:.*]] |
| ; CHECK: [[FOR_END14_LOOPEXIT1]]: |
| ; CHECK-NEXT: [[DOTEPIL_INIT:%.*]] = phi i64 [ 0, %[[FOR_BODY_LR_PH]] ], [ [[DOTUNR]], %[[FOR_END14_LOOPEXIT_UNR_LCSSA1]] ] |
| ; CHECK-NEXT: [[INDVARS_IV_EPIL_INIT:%.*]] = phi i64 [ [[INPUT_OFFSET]], %[[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_UNR]], %[[FOR_END14_LOOPEXIT_UNR_LCSSA1]] ] |
| ; CHECK-NEXT: [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0 |
| ; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD1]]) |
| ; CHECK-NEXT: br label %[[FOR_BODY_EPIL:.*]] |
| ; CHECK: [[FOR_BODY_EPIL]]: |
| ; CHECK-NEXT: [[TMP13:%.*]] = phi i64 [ [[DOTEPIL_INIT]], %[[FOR_END14_LOOPEXIT1]] ], [ [[ADD_EPIL:%.*]], %[[FOR_BODY_EPIL]] ] |
| ; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_EPIL_INIT]], %[[FOR_END14_LOOPEXIT1]] ], [ [[INDVARS_IV_NEXT_EPIL:%.*]], %[[FOR_BODY_EPIL]] ] |
| ; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, %[[FOR_END14_LOOPEXIT1]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[FOR_BODY_EPIL]] ] |
| ; CHECK-NEXT: [[ADD_EPIL]] = add i64 [[TMP13]], [[INDVARS_IV_EPIL]] |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add nsw i64 [[INDVARS_IV_EPIL]], [[STEP]] |
| ; CHECK-NEXT: [[CMP_EPIL:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT_EPIL]], [[N]] |
| ; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1 |
| ; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]] |
| ; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label %[[FOR_BODY_EPIL]], label %[[FOR_END14_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP3:![0-9]+]] |
| ; CHECK: [[FOR_END14_LOOPEXIT_UNR_LCSSA]]: |
| ; CHECK-NEXT: br label %[[FOR_END14_LOOPEXIT]] |
| ; CHECK: [[FOR_END14_LOOPEXIT]]: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %for.body, %for.body.lr.ph |
| %3 = phi i64 [ 0, %entry ], [ %add, %for.body ] |
| %indvars.iv = phi i64 [ %input_offset, %entry ], [ %indvars.iv.next, %for.body ] |
| %add = add i64 %3, %indvars.iv |
| %indvars.iv.next = add nsw i64 %indvars.iv, %step |
| %cmp = icmp slt i64 %indvars.iv.next, %n |
| br i1 %cmp, label %for.body, label %for.end14, !llvm.loop !0 |
| |
| for.end14: ; preds = %for.body, %entry |
| ret void |
| } |
| |
| !0 = distinct !{!0, !1, !2} |
| !1 = !{!"llvm.loop.mustprogress"} |
| !2 = !{!"llvm.loop.unroll.enable"} |
| |
| ;. |
| ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} |
| ; CHECK: [[META1]] = !{!"llvm.loop.mustprogress"} |
| ; CHECK: [[META2]] = !{!"llvm.loop.unroll.disable"} |
| ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]]} |
| ;. |