| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt -passes="default<O3>" -S %s | FileCheck %s |
| |
| target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" |
| target triple = "x86_64-unknown-linux-gnu" |
| |
| ; FIXME: !llvm.access.group should be preserved, loop should be vectorized. |
| ; End-to-end test for https://github.com/llvm/llvm-project/issues/115595. |
| define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %face_cell, ptr noalias noundef %x, ptr noalias noundef %y) #0 { |
| ; CHECK-LABEL: define void @test( |
| ; CHECK-SAME: i32 noundef [[NFACE:%.*]], i32 noundef [[NCELL:%.*]], ptr noalias noundef readonly captures(none) [[FACE_CELL:%.*]], ptr noalias noundef readonly captures(none) [[X:%.*]], ptr noalias noundef captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[NFACE]], 0 |
| ; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]] |
| ; CHECK: [[FOR_BODY_PREHEADER]]: |
| ; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[NFACE]] to i64 |
| ; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[TMP0]] |
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NFACE]], 4 |
| ; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_BODY_PREHEADER14:.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP0]], 2147483644 |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_EPIL]] |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4, !tbaa [[INT_TBAA0:![0-9]+]], !llvm.access.group [[ACC_GRP4:![0-9]+]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_EPIL]] |
| ; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[INT_TBAA0]], !llvm.access.group [[ACC_GRP4]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64> |
| ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[Y]], <4 x i64> [[TMP3]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i32> [[WIDE_LOAD12]] to <4 x i64> |
| ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[X]], <4 x i64> [[TMP5]] |
| ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> align 8 [[TMP4]], <4 x i1> splat (i1 true), <4 x double> poison), !tbaa [[DOUBLE_TBAA5:![0-9]+]], !llvm.access.group [[ACC_GRP4]] |
| ; CHECK-NEXT: [[WIDE_MASKED_GATHER13:%.*]] = tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> align 8 [[TMP6]], <4 x i1> splat (i1 true), <4 x double> poison), !tbaa [[DOUBLE_TBAA5]], !llvm.access.group [[ACC_GRP4]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast olt <4 x double> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER13]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x double> [[WIDE_MASKED_GATHER13]], <4 x double> [[WIDE_MASKED_GATHER]] |
| ; CHECK-NEXT: tail call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP8]], <4 x ptr> align 8 [[TMP4]], <4 x i1> splat (i1 true)), !tbaa [[DOUBLE_TBAA5]], !llvm.access.group [[ACC_GRP4]] |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV_EPIL]], 4 |
| ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[UNROLL_ITER]] |
| ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UNROLL_ITER]], [[TMP0]] |
| ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY_PREHEADER14]] |
| ; CHECK: [[FOR_BODY_PREHEADER14]]: |
| ; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[UNROLL_ITER]], %[[MIDDLE_BLOCK]] ] |
| ; CHECK-NEXT: br label %[[FOR_BODY:.*]] |
| ; CHECK: [[FOR_COND_CLEANUP]]: |
| ; CHECK-NEXT: ret void |
| ; CHECK: [[FOR_BODY]]: |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER14]] ] |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_NEXT_2]] |
| ; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]], !llvm.access.group [[ACC_GRP4]] |
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_NEXT_2]] |
| ; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[GEP]], align 4, !tbaa [[INT_TBAA0]], !llvm.access.group [[ACC_GRP4]] |
| ; CHECK-NEXT: [[IDXPROM3_3:%.*]] = sext i32 [[TMP22]] to i64 |
| ; CHECK-NEXT: [[ARRAYIDX4_3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_3]] |
| ; CHECK-NEXT: [[IDXPROM5_3:%.*]] = sext i32 [[TMP23]] to i64 |
| ; CHECK-NEXT: [[ARRAYIDX6_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5_3]] |
| ; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX4_3]], align 8, !tbaa [[DOUBLE_TBAA5]], !llvm.access.group [[ACC_GRP4]] |
| ; CHECK-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6_3]], align 8, !tbaa [[DOUBLE_TBAA5]], !llvm.access.group [[ACC_GRP4]] |
| ; CHECK-NEXT: [[CMP_I_3:%.*]] = fcmp fast olt double [[TMP24]], [[TMP25]] |
| ; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[CMP_I_3]], double [[TMP25]], double [[TMP24]] |
| ; CHECK-NEXT: store double [[TMP26]], ptr [[ARRAYIDX4_3]], align 8, !tbaa [[DOUBLE_TBAA5]], !llvm.access.group [[ACC_GRP4]] |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1 |
| ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP0]] |
| ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] |
| ; |
| entry: |
| %nface.addr = alloca i32, align 4 |
| %ncell.addr = alloca i32, align 4 |
| %face_cell.addr = alloca ptr, align 8 |
| %x.addr = alloca ptr, align 8 |
| %y.addr = alloca ptr, align 8 |
| %il = alloca i32, align 4 |
| %ir = alloca i32, align 4 |
| %iface = alloca i32, align 4 |
| store i32 %nface, ptr %nface.addr, align 4, !tbaa !6 |
| store i32 %ncell, ptr %ncell.addr, align 4, !tbaa !6 |
| store ptr %face_cell, ptr %face_cell.addr, align 8, !tbaa !10 |
| store ptr %x, ptr %x.addr, align 8, !tbaa !10 |
| store ptr %y, ptr %y.addr, align 8, !tbaa !10 |
| call void @llvm.lifetime.start.p0(ptr %il) #3 |
| call void @llvm.lifetime.start.p0(ptr %ir) #3 |
| call void @llvm.lifetime.start.p0(ptr %iface) #3 |
| store i32 0, ptr %iface, align 4, !tbaa !6 |
| br label %for.cond |
| |
| for.cond: |
| %0 = load i32, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12 |
| %1 = load i32, ptr %nface.addr, align 4, !tbaa !6, !llvm.access.group !12 |
| %cmp = icmp slt i32 %0, %1 |
| br i1 %cmp, label %for.body, label %for.cond.cleanup |
| |
| for.cond.cleanup: |
| call void @llvm.lifetime.end.p0(ptr %iface) #3, !llvm.access.group !12 |
| br label %for.end |
| |
| for.body: |
| %2 = load ptr, ptr %face_cell.addr, align 8, !tbaa !10, !llvm.access.group !12 |
| %3 = load i32, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12 |
| %idxprom = sext i32 %3 to i64 |
| %arrayidx = getelementptr inbounds i32, ptr %2, i64 %idxprom |
| %4 = load i32, ptr %arrayidx, align 4, !tbaa !6, !llvm.access.group !12 |
| store i32 %4, ptr %il, align 4, !tbaa !6, !llvm.access.group !12 |
| %5 = load ptr, ptr %face_cell.addr, align 8, !tbaa !10, !llvm.access.group !12 |
| %6 = load i32, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12 |
| %7 = load i32, ptr %nface.addr, align 4, !tbaa !6, !llvm.access.group !12 |
| %add = add nsw i32 %6, %7 |
| %idxprom1 = sext i32 %add to i64 |
| %arrayidx2 = getelementptr inbounds i32, ptr %5, i64 %idxprom1 |
| %8 = load i32, ptr %arrayidx2, align 4, !tbaa !6, !llvm.access.group !12 |
| store i32 %8, ptr %ir, align 4, !tbaa !6, !llvm.access.group !12 |
| %9 = load ptr, ptr %y.addr, align 8, !tbaa !10, !llvm.access.group !12 |
| %10 = load i32, ptr %il, align 4, !tbaa !6, !llvm.access.group !12 |
| %idxprom3 = sext i32 %10 to i64 |
| %arrayidx4 = getelementptr inbounds double, ptr %9, i64 %idxprom3 |
| %11 = load ptr, ptr %x.addr, align 8, !tbaa !10, !llvm.access.group !12 |
| %12 = load i32, ptr %ir, align 4, !tbaa !6, !llvm.access.group !12 |
| %idxprom5 = sext i32 %12 to i64 |
| %arrayidx6 = getelementptr inbounds double, ptr %11, i64 %idxprom5 |
| %call = call noundef nonnull align 8 dereferenceable(8) ptr @max(ptr noundef nonnull align 8 dereferenceable(8) %arrayidx4, ptr noundef nonnull align 8 dereferenceable(8) %arrayidx6), !llvm.access.group !12 |
| %13 = load double, ptr %call, align 8, !tbaa !13, !llvm.access.group !12 |
| %14 = load ptr, ptr %y.addr, align 8, !tbaa !10, !llvm.access.group !12 |
| %15 = load i32, ptr %il, align 4, !tbaa !6, !llvm.access.group !12 |
| %idxprom7 = sext i32 %15 to i64 |
| %arrayidx8 = getelementptr inbounds double, ptr %14, i64 %idxprom7 |
| store double %13, ptr %arrayidx8, align 8, !tbaa !13, !llvm.access.group !12 |
| br label %for.inc |
| |
| for.inc: |
| %16 = load i32, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12 |
| %inc = add nsw i32 %16, 1 |
| store i32 %inc, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12 |
| br label %for.cond, !llvm.loop !15 |
| |
| for.end: |
| call void @llvm.lifetime.end.p0(ptr %ir) #3 |
| call void @llvm.lifetime.end.p0(ptr %il) #3 |
| ret void |
| } |
| |
| declare void @llvm.lifetime.start.p0(ptr nocapture) #1 |
| |
| define linkonce_odr noundef nonnull align 8 dereferenceable(8) ptr @max(ptr noundef nonnull align 8 dereferenceable(8) %__a, ptr noundef nonnull align 8 dereferenceable(8) %__b) #2 { |
| entry: |
| %retval = alloca ptr, align 8 |
| %__a.addr = alloca ptr, align 8 |
| %__b.addr = alloca ptr, align 8 |
| store ptr %__a, ptr %__a.addr, align 8, !tbaa !10 |
| store ptr %__b, ptr %__b.addr, align 8, !tbaa !10 |
| %0 = load ptr, ptr %__a.addr, align 8, !tbaa !10 |
| %1 = load double, ptr %0, align 8, !tbaa !13 |
| %2 = load ptr, ptr %__b.addr, align 8, !tbaa !10 |
| %3 = load double, ptr %2, align 8, !tbaa !13 |
| %cmp = fcmp fast olt double %1, %3 |
| br i1 %cmp, label %if.then, label %if.end |
| |
| if.then: |
| %4 = load ptr, ptr %__b.addr, align 8, !tbaa !10 |
| store ptr %4, ptr %retval, align 8 |
| br label %return |
| |
| if.end: |
| %5 = load ptr, ptr %__a.addr, align 8, !tbaa !10 |
| store ptr %5, ptr %retval, align 8 |
| br label %return |
| |
| return: |
| %6 = load ptr, ptr %retval, align 8 |
| ret ptr %6 |
| } |
| |
| declare void @llvm.lifetime.end.p0(ptr nocapture) #1 |
| |
| attributes #0 = { mustprogress "target-cpu" = "skylake-avx512" } |
| attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } |
| |
| !6 = !{!7, !7, i64 0} |
| !7 = !{!"int", !8, i64 0} |
| !8 = !{!"omnipotent char", !9, i64 0} |
| !9 = !{!"Simple C++ TBAA"} |
| !10 = !{!11, !11, i64 0} |
| !11 = !{!"any pointer", !8, i64 0} |
| !12 = distinct !{} |
| !13 = !{!14, !14, i64 0} |
| !14 = !{!"double", !8, i64 0} |
| !15 = distinct !{!15, !16, !17, !18} |
| !16 = !{!"llvm.loop.mustprogress"} |
| !17 = !{!"llvm.loop.parallel_accesses", !12} |
| !18 = !{!"llvm.loop.vectorize.enable", i1 true} |
| |
| ;. |
| ; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} |
| ; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0} |
| ; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} |
| ; CHECK: [[META3]] = !{!"Simple C++ TBAA"} |
| ; CHECK: [[ACC_GRP4]] = distinct !{} |
| ; CHECK: [[DOUBLE_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} |
| ; CHECK: [[META6]] = !{!"double", [[META2]], i64 0} |
| ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]], [[META10:![0-9]+]], [[META11:![0-9]+]]} |
| ; CHECK: [[META8]] = !{!"llvm.loop.mustprogress"} |
| ; CHECK: [[META9]] = !{!"llvm.loop.parallel_accesses", [[ACC_GRP4]]} |
| ; CHECK: [[META10]] = !{!"llvm.loop.isvectorized", i32 1} |
| ; CHECK: [[META11]] = !{!"llvm.loop.unroll.runtime.disable"} |
| ; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META8]], [[META9]], [[META11]], [[META10]]} |
| ;. |