blob: d9803697a72653fe1ab89c621e24a90606911828 [file]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -passes="default<O3>" -S %s | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; FIXME: !llvm.access.group should be preserved, loop should be vectorized.
; End-to-end test for https://github.com/llvm/llvm-project/issues/115595.
define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %face_cell, ptr noalias noundef %x, ptr noalias noundef %y) #0 {
; CHECK-LABEL: define void @test(
; CHECK-SAME: i32 noundef [[NFACE:%.*]], i32 noundef [[NCELL:%.*]], ptr noalias noundef readonly captures(none) [[FACE_CELL:%.*]], ptr noalias noundef readonly captures(none) [[X:%.*]], ptr noalias noundef captures(none) [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[NFACE]], 0
; CHECK-NEXT: br i1 [[CMP8]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_COND_CLEANUP:.*]]
; CHECK: [[FOR_BODY_PREHEADER]]:
; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[NFACE]] to i64
; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NFACE]], 4
; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_BODY_PREHEADER14:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP0]], 2147483644
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_EPIL]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4, !tbaa [[INT_TBAA0:![0-9]+]], !llvm.access.group [[ACC_GRP4:![0-9]+]]
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_EPIL]]
; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[INT_TBAA0]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[Y]], <4 x i64> [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i32> [[WIDE_LOAD12]] to <4 x i64>
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds double, ptr [[X]], <4 x i64> [[TMP5]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> align 8 [[TMP4]], <4 x i1> splat (i1 true), <4 x double> poison), !tbaa [[DOUBLE_TBAA5:![0-9]+]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER13:%.*]] = tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> align 8 [[TMP6]], <4 x i1> splat (i1 true), <4 x double> poison), !tbaa [[DOUBLE_TBAA5]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast olt <4 x double> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER13]]
; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x double> [[WIDE_MASKED_GATHER13]], <4 x double> [[WIDE_MASKED_GATHER]]
; CHECK-NEXT: tail call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP8]], <4 x ptr> align 8 [[TMP4]], <4 x i1> splat (i1 true)), !tbaa [[DOUBLE_TBAA5]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV_EPIL]], 4
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[UNROLL_ITER]]
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UNROLL_ITER]], [[TMP0]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY_PREHEADER14]]
; CHECK: [[FOR_BODY_PREHEADER14]]:
; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[UNROLL_ITER]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_COND_CLEANUP]]:
; CHECK-NEXT: ret void
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER14]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_NEXT_2]]
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_NEXT_2]]
; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[GEP]], align 4, !tbaa [[INT_TBAA0]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[IDXPROM3_3:%.*]] = sext i32 [[TMP22]] to i64
; CHECK-NEXT: [[ARRAYIDX4_3:%.*]] = getelementptr inbounds double, ptr [[Y]], i64 [[IDXPROM3_3]]
; CHECK-NEXT: [[IDXPROM5_3:%.*]] = sext i32 [[TMP23]] to i64
; CHECK-NEXT: [[ARRAYIDX6_3:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[IDXPROM5_3]]
; CHECK-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX4_3]], align 8, !tbaa [[DOUBLE_TBAA5]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[TMP25:%.*]] = load double, ptr [[ARRAYIDX6_3]], align 8, !tbaa [[DOUBLE_TBAA5]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[CMP_I_3:%.*]] = fcmp fast olt double [[TMP24]], [[TMP25]]
; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[CMP_I_3]], double [[TMP25]], double [[TMP24]]
; CHECK-NEXT: store double [[TMP26]], ptr [[ARRAYIDX4_3]], align 8, !tbaa [[DOUBLE_TBAA5]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[TMP0]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
;
entry:
%nface.addr = alloca i32, align 4
%ncell.addr = alloca i32, align 4
%face_cell.addr = alloca ptr, align 8
%x.addr = alloca ptr, align 8
%y.addr = alloca ptr, align 8
%il = alloca i32, align 4
%ir = alloca i32, align 4
%iface = alloca i32, align 4
store i32 %nface, ptr %nface.addr, align 4, !tbaa !6
store i32 %ncell, ptr %ncell.addr, align 4, !tbaa !6
store ptr %face_cell, ptr %face_cell.addr, align 8, !tbaa !10
store ptr %x, ptr %x.addr, align 8, !tbaa !10
store ptr %y, ptr %y.addr, align 8, !tbaa !10
call void @llvm.lifetime.start.p0(ptr %il) #3
call void @llvm.lifetime.start.p0(ptr %ir) #3
call void @llvm.lifetime.start.p0(ptr %iface) #3
store i32 0, ptr %iface, align 4, !tbaa !6
br label %for.cond
for.cond:
%0 = load i32, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12
%1 = load i32, ptr %nface.addr, align 4, !tbaa !6, !llvm.access.group !12
%cmp = icmp slt i32 %0, %1
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup:
call void @llvm.lifetime.end.p0(ptr %iface) #3, !llvm.access.group !12
br label %for.end
for.body:
%2 = load ptr, ptr %face_cell.addr, align 8, !tbaa !10, !llvm.access.group !12
%3 = load i32, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12
%idxprom = sext i32 %3 to i64
%arrayidx = getelementptr inbounds i32, ptr %2, i64 %idxprom
%4 = load i32, ptr %arrayidx, align 4, !tbaa !6, !llvm.access.group !12
store i32 %4, ptr %il, align 4, !tbaa !6, !llvm.access.group !12
%5 = load ptr, ptr %face_cell.addr, align 8, !tbaa !10, !llvm.access.group !12
%6 = load i32, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12
%7 = load i32, ptr %nface.addr, align 4, !tbaa !6, !llvm.access.group !12
%add = add nsw i32 %6, %7
%idxprom1 = sext i32 %add to i64
%arrayidx2 = getelementptr inbounds i32, ptr %5, i64 %idxprom1
%8 = load i32, ptr %arrayidx2, align 4, !tbaa !6, !llvm.access.group !12
store i32 %8, ptr %ir, align 4, !tbaa !6, !llvm.access.group !12
%9 = load ptr, ptr %y.addr, align 8, !tbaa !10, !llvm.access.group !12
%10 = load i32, ptr %il, align 4, !tbaa !6, !llvm.access.group !12
%idxprom3 = sext i32 %10 to i64
%arrayidx4 = getelementptr inbounds double, ptr %9, i64 %idxprom3
%11 = load ptr, ptr %x.addr, align 8, !tbaa !10, !llvm.access.group !12
%12 = load i32, ptr %ir, align 4, !tbaa !6, !llvm.access.group !12
%idxprom5 = sext i32 %12 to i64
%arrayidx6 = getelementptr inbounds double, ptr %11, i64 %idxprom5
%call = call noundef nonnull align 8 dereferenceable(8) ptr @max(ptr noundef nonnull align 8 dereferenceable(8) %arrayidx4, ptr noundef nonnull align 8 dereferenceable(8) %arrayidx6), !llvm.access.group !12
%13 = load double, ptr %call, align 8, !tbaa !13, !llvm.access.group !12
%14 = load ptr, ptr %y.addr, align 8, !tbaa !10, !llvm.access.group !12
%15 = load i32, ptr %il, align 4, !tbaa !6, !llvm.access.group !12
%idxprom7 = sext i32 %15 to i64
%arrayidx8 = getelementptr inbounds double, ptr %14, i64 %idxprom7
store double %13, ptr %arrayidx8, align 8, !tbaa !13, !llvm.access.group !12
br label %for.inc
for.inc:
%16 = load i32, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12
%inc = add nsw i32 %16, 1
store i32 %inc, ptr %iface, align 4, !tbaa !6, !llvm.access.group !12
br label %for.cond, !llvm.loop !15
for.end:
call void @llvm.lifetime.end.p0(ptr %ir) #3
call void @llvm.lifetime.end.p0(ptr %il) #3
ret void
}
declare void @llvm.lifetime.start.p0(ptr nocapture) #1
define linkonce_odr noundef nonnull align 8 dereferenceable(8) ptr @max(ptr noundef nonnull align 8 dereferenceable(8) %__a, ptr noundef nonnull align 8 dereferenceable(8) %__b) #2 {
entry:
%retval = alloca ptr, align 8
%__a.addr = alloca ptr, align 8
%__b.addr = alloca ptr, align 8
store ptr %__a, ptr %__a.addr, align 8, !tbaa !10
store ptr %__b, ptr %__b.addr, align 8, !tbaa !10
%0 = load ptr, ptr %__a.addr, align 8, !tbaa !10
%1 = load double, ptr %0, align 8, !tbaa !13
%2 = load ptr, ptr %__b.addr, align 8, !tbaa !10
%3 = load double, ptr %2, align 8, !tbaa !13
%cmp = fcmp fast olt double %1, %3
br i1 %cmp, label %if.then, label %if.end
if.then:
%4 = load ptr, ptr %__b.addr, align 8, !tbaa !10
store ptr %4, ptr %retval, align 8
br label %return
if.end:
%5 = load ptr, ptr %__a.addr, align 8, !tbaa !10
store ptr %5, ptr %retval, align 8
br label %return
return:
%6 = load ptr, ptr %retval, align 8
ret ptr %6
}
declare void @llvm.lifetime.end.p0(ptr nocapture) #1
attributes #0 = { mustprogress "target-cpu" = "skylake-avx512" }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
!6 = !{!7, !7, i64 0}
!7 = !{!"int", !8, i64 0}
!8 = !{!"omnipotent char", !9, i64 0}
!9 = !{!"Simple C++ TBAA"}
!10 = !{!11, !11, i64 0}
!11 = !{!"any pointer", !8, i64 0}
!12 = distinct !{}
!13 = !{!14, !14, i64 0}
!14 = !{!"double", !8, i64 0}
!15 = distinct !{!15, !16, !17, !18}
!16 = !{!"llvm.loop.mustprogress"}
!17 = !{!"llvm.loop.parallel_accesses", !12}
!18 = !{!"llvm.loop.vectorize.enable", i1 true}
;.
; CHECK: [[INT_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
; CHECK: [[META1]] = !{!"int", [[META2:![0-9]+]], i64 0}
; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
; CHECK: [[ACC_GRP4]] = distinct !{}
; CHECK: [[DOUBLE_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
; CHECK: [[META6]] = !{!"double", [[META2]], i64 0}
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]], [[META10:![0-9]+]], [[META11:![0-9]+]]}
; CHECK: [[META8]] = !{!"llvm.loop.mustprogress"}
; CHECK: [[META9]] = !{!"llvm.loop.parallel_accesses", [[ACC_GRP4]]}
; CHECK: [[META10]] = !{!"llvm.loop.isvectorized", i32 1}
; CHECK: [[META11]] = !{!"llvm.loop.unroll.runtime.disable"}
; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META8]], [[META9]], [[META11]], [[META10]]}
;.