| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt < %s -O3 -S | FileCheck %s |
| ; RUN: opt < %s -passes="default<O3>" -S | FileCheck %s |
| |
| ; Test that IR is optimal after vectorization/unrolling/CSE/canonicalization. |
| ; In particular, there should be no fdivs inside loops because that is expensive. |
| |
| ; TODO: There is a CSE opportunity to reduce the hoisted fdivs after vectorization/unrolling. |
| ; PR46115 - https://llvm.org/PR46115 |
| |
| target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" |
| target triple = "x86_64-apple-macosx10.15.0" |
| |
| define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 { |
| ; CHECK-LABEL: define void @vdiv( |
| ; CHECK-SAME: ptr writeonly captures(none) [[X:%.*]], ptr readonly captures(none) [[Y:%.*]], double [[A:%.*]], i32 [[N:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N]], 0 |
| ; CHECK-NEXT: br i1 [[CMP1]], label %[[FOR_BODY_PREHEADER:.*]], label %[[FOR_END:.*]] |
| ; CHECK: [[FOR_BODY_PREHEADER]]: |
| ; CHECK-NEXT: [[X4:%.*]] = ptrtoint ptr [[X]] to i64 |
| ; CHECK-NEXT: [[Y5:%.*]] = ptrtoint ptr [[Y]] to i64 |
| ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64 |
| ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 |
| ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[X4]], [[Y5]] |
| ; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 128 |
| ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[DIFF_CHECK]] |
| ; CHECK-NEXT: br i1 [[OR_COND]], label %[[FOR_BODY_PREHEADER9:.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: [[MIN_ITERS_CHECK6:%.*]] = icmp ult i32 [[N]], 16 |
| ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK6]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH1:.*]] |
| ; CHECK: [[VECTOR_PH1]]: |
| ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 12 |
| ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483632 |
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A]], i64 0 |
| ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT]] |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH1]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 32 |
| ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 64 |
| ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP5]], i64 96 |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP5]], align 8, !tbaa [[DOUBLE_TBAA3:![0-9]+]] |
| ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x double>, ptr [[TMP6]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x double>, ptr [[TMP7]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x double>, ptr [[TMP8]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <4 x double> [[WIDE_LOAD]], [[TMP1]] |
| ; CHECK-NEXT: [[TMP10:%.*]] = fmul fast <4 x double> [[WIDE_LOAD6]], [[TMP2]] |
| ; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <4 x double> [[WIDE_LOAD7]], [[TMP3]] |
| ; CHECK-NEXT: [[TMP12:%.*]] = fmul fast <4 x double> [[WIDE_LOAD8]], [[TMP4]] |
| ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP13]], i64 32 |
| ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP13]], i64 64 |
| ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP13]], i64 96 |
| ; CHECK-NEXT: store <4 x double> [[TMP9]], ptr [[TMP13]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: store <4 x double> [[TMP10]], ptr [[TMP14]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: store <4 x double> [[TMP11]], ptr [[TMP15]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: store <4 x double> [[TMP12]], ptr [[TMP16]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 |
| ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] |
| ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]] |
| ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END]], label %[[VEC_EPILOG_ITER_CHECK:.*]] |
| ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: |
| ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0 |
| ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER9]], label %[[VEC_EPILOG_PH]] |
| ; CHECK: [[VEC_EPILOG_PH]]: |
| ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_PH]] ] |
| ; CHECK-NEXT: [[N_VEC11:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483644 |
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x double> poison, double [[A]], i64 0 |
| ; CHECK-NEXT: [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT14]], <4 x double> poison, <4 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP38:%.*]] = fdiv fast <4 x double> splat (double 1.000000e+00), [[BROADCAST_SPLAT15]] |
| ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] |
| ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT16:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDEX12]] |
| ; CHECK-NEXT: [[WIDE_LOAD13:%.*]] = load <4 x double>, ptr [[TMP39]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[TMP40:%.*]] = fmul fast <4 x double> [[WIDE_LOAD13]], [[TMP38]] |
| ; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDEX12]] |
| ; CHECK-NEXT: store <4 x double> [[TMP40]], ptr [[TMP41]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[INDEX_NEXT16]] = add nuw i64 [[INDEX12]], 4 |
| ; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[INDEX_NEXT16]], [[N_VEC11]] |
| ; CHECK-NEXT: br i1 [[TMP42]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] |
| ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: [[CMP_N17:%.*]] = icmp eq i64 [[N_VEC11]], [[WIDE_TRIP_COUNT]] |
| ; CHECK-NEXT: br i1 [[CMP_N17]], label %[[FOR_END]], label %[[FOR_BODY_PREHEADER9]] |
| ; CHECK: [[FOR_BODY_PREHEADER9]]: |
| ; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[N_VEC11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] |
| ; CHECK-NEXT: [[TMP43:%.*]] = sub nsw i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_PH]] |
| ; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP43]], 7 |
| ; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0 |
| ; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label %[[FOR_BODY_PROL_LOOPEXIT:.*]], label %[[FOR_BODY_PROL_PREHEADER:.*]] |
| ; CHECK: [[FOR_BODY_PROL_PREHEADER]]: |
| ; CHECK-NEXT: [[TMP18:%.*]] = fdiv fast double 1.000000e+00, [[A]] |
| ; CHECK-NEXT: br label %[[FOR_BODY_PROL:.*]] |
| ; CHECK: [[FOR_BODY_PROL]]: |
| ; CHECK-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], %[[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], %[[FOR_BODY_PROL_PREHEADER]] ] |
| ; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_NEXT:%.*]], %[[FOR_BODY_PROL]] ], [ 0, %[[FOR_BODY_PROL_PREHEADER]] ] |
| ; CHECK-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_PROL]] |
| ; CHECK-NEXT: [[T0_PROL:%.*]] = load double, ptr [[ARRAYIDX_PROL]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[TMP19:%.*]] = fmul fast double [[T0_PROL]], [[TMP18]] |
| ; CHECK-NEXT: [[ARRAYIDX2_PROL:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_PROL]] |
| ; CHECK-NEXT: store double [[TMP19]], ptr [[ARRAYIDX2_PROL]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT_PROL]] = add nuw nsw i64 [[INDVARS_IV_PROL]], 1 |
| ; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 |
| ; CHECK-NEXT: [[PROL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[PROL_ITER_NEXT]], [[XTRAITER]] |
| ; CHECK-NEXT: br i1 [[PROL_ITER_CMP_NOT]], label %[[FOR_BODY_PROL_LOOPEXIT]], label %[[FOR_BODY_PROL]], !llvm.loop [[LOOP11:![0-9]+]] |
| ; CHECK: [[FOR_BODY_PROL_LOOPEXIT]]: |
| ; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], %[[FOR_BODY_PREHEADER9]] ], [ [[INDVARS_IV_NEXT_PROL]], %[[FOR_BODY_PROL]] ] |
| ; CHECK-NEXT: [[TMP20:%.*]] = sub nsw i64 [[INDVARS_IV_PH]], [[WIDE_TRIP_COUNT]] |
| ; CHECK-NEXT: [[TMP21:%.*]] = icmp ugt i64 [[TMP20]], -8 |
| ; CHECK-NEXT: br i1 [[TMP21]], label %[[FOR_END]], label %[[FOR_BODY_PREHEADER9_NEW:.*]] |
| ; CHECK: [[FOR_BODY_PREHEADER9_NEW]]: |
| ; CHECK-NEXT: [[TMP22:%.*]] = fdiv fast double 1.000000e+00, [[A]] |
| ; CHECK-NEXT: [[TMP23:%.*]] = fdiv fast double 1.000000e+00, [[A]] |
| ; CHECK-NEXT: [[TMP24:%.*]] = fdiv fast double 1.000000e+00, [[A]] |
| ; CHECK-NEXT: [[TMP25:%.*]] = fdiv fast double 1.000000e+00, [[A]] |
| ; CHECK-NEXT: [[TMP26:%.*]] = fdiv fast double 1.000000e+00, [[A]] |
| ; CHECK-NEXT: [[TMP27:%.*]] = fdiv fast double 1.000000e+00, [[A]] |
| ; CHECK-NEXT: [[TMP28:%.*]] = fdiv fast double 1.000000e+00, [[A]] |
| ; CHECK-NEXT: [[TMP29:%.*]] = fdiv fast double 1.000000e+00, [[A]] |
| ; CHECK-NEXT: br label %[[FOR_BODY:.*]] |
| ; CHECK: [[FOR_BODY]]: |
| ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], %[[FOR_BODY_PREHEADER9_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], %[[FOR_BODY]] ] |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV]] |
| ; CHECK-NEXT: [[T0:%.*]] = load double, ptr [[ARRAYIDX]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[TMP30:%.*]] = fmul fast double [[T0]], [[TMP22]] |
| ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV]] |
| ; CHECK-NEXT: store double [[TMP30]], ptr [[ARRAYIDX2]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 |
| ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT]] |
| ; CHECK-NEXT: [[T0_1:%.*]] = load double, ptr [[ARRAYIDX_1]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[TMP31:%.*]] = fmul fast double [[T0_1]], [[TMP23]] |
| ; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT]] |
| ; CHECK-NEXT: store double [[TMP31]], ptr [[ARRAYIDX2_1]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 2 |
| ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_1]] |
| ; CHECK-NEXT: [[T0_2:%.*]] = load double, ptr [[ARRAYIDX_2]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[TMP32:%.*]] = fmul fast double [[T0_2]], [[TMP24]] |
| ; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_1]] |
| ; CHECK-NEXT: store double [[TMP32]], ptr [[ARRAYIDX2_2]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 3 |
| ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_2]] |
| ; CHECK-NEXT: [[T0_3:%.*]] = load double, ptr [[ARRAYIDX_3]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[TMP33:%.*]] = fmul fast double [[T0_3]], [[TMP25]] |
| ; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_2]] |
| ; CHECK-NEXT: store double [[TMP33]], ptr [[ARRAYIDX2_3]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 4 |
| ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_3]] |
| ; CHECK-NEXT: [[T0_4:%.*]] = load double, ptr [[ARRAYIDX_4]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[TMP34:%.*]] = fmul fast double [[T0_4]], [[TMP26]] |
| ; CHECK-NEXT: [[ARRAYIDX2_4:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_3]] |
| ; CHECK-NEXT: store double [[TMP34]], ptr [[ARRAYIDX2_4]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 5 |
| ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_4]] |
| ; CHECK-NEXT: [[T0_5:%.*]] = load double, ptr [[ARRAYIDX_5]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[TMP35:%.*]] = fmul fast double [[T0_5]], [[TMP27]] |
| ; CHECK-NEXT: [[ARRAYIDX2_5:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_4]] |
| ; CHECK-NEXT: store double [[TMP35]], ptr [[ARRAYIDX2_5]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 6 |
| ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_5]] |
| ; CHECK-NEXT: [[T0_6:%.*]] = load double, ptr [[ARRAYIDX_6]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[TMP36:%.*]] = fmul fast double [[T0_6]], [[TMP28]] |
| ; CHECK-NEXT: [[ARRAYIDX2_6:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_5]] |
| ; CHECK-NEXT: store double [[TMP36]], ptr [[ARRAYIDX2_6]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 7 |
| ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds nuw double, ptr [[Y]], i64 [[INDVARS_IV_NEXT_6]] |
| ; CHECK-NEXT: [[T0_7:%.*]] = load double, ptr [[ARRAYIDX_7]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[TMP37:%.*]] = fmul fast double [[T0_7]], [[TMP29]] |
| ; CHECK-NEXT: [[ARRAYIDX2_7:%.*]] = getelementptr inbounds nuw double, ptr [[X]], i64 [[INDVARS_IV_NEXT_6]] |
| ; CHECK-NEXT: store double [[TMP37]], ptr [[ARRAYIDX2_7]], align 8, !tbaa [[DOUBLE_TBAA3]] |
| ; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add nuw nsw i64 [[INDVARS_IV]], 8 |
| ; CHECK-NEXT: [[EXITCOND_NOT_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], [[WIDE_TRIP_COUNT]] |
| ; CHECK-NEXT: br i1 [[EXITCOND_NOT_7]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] |
| ; CHECK: [[FOR_END]]: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| %div = fdiv fast double 1.0, %a |
| br label %for.cond |
| |
| for.cond: |
| %n.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] |
| %cmp = icmp slt i32 %n.0, %N |
| br i1 %cmp, label %for.body, label %for.cond.cleanup |
| |
| for.cond.cleanup: |
| br label %for.end |
| |
| for.body: |
| %idxprom = sext i32 %n.0 to i64 |
| %arrayidx = getelementptr inbounds double, ptr %y, i64 %idxprom |
| %t0 = load double, ptr %arrayidx, align 8, !tbaa !3 |
| %mul = fmul fast double %t0, %div |
| %idxprom1 = sext i32 %n.0 to i64 |
| %arrayidx2 = getelementptr inbounds double, ptr %x, i64 %idxprom1 |
| store double %mul, ptr %arrayidx2, align 8, !tbaa !3 |
| br label %for.inc |
| |
| for.inc: |
| %inc = add nsw i32 %n.0, 1 |
| br label %for.cond |
| |
| for.end: |
| ret void |
| } |
| |
| attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="skylake-avx512" "target-features"="+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+clflushopt,+clwb,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+pku,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves" "unsafe-fp-math"="true" "use-soft-float"="false" } |
| |
| !llvm.module.flags = !{!0, !1} |
| !llvm.ident = !{!2} |
| |
| !0 = !{i32 1, !"wchar_size", i32 4} |
| !1 = !{i32 7, !"PIC Level", i32 2} |
| !2 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git 45ebe38ffc40bb7221fc587bfb4481cf7f53ebbc)"} |
| !3 = !{!4, !4, i64 0} |
| !4 = !{!"double", !5, i64 0} |
| !5 = !{!"omnipotent char", !6, i64 0} |
| !6 = !{!"Simple C/C++ TBAA"} |
| |
| ;. |
| ; CHECK: [[DOUBLE_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} |
| ; CHECK: [[META4]] = !{!"double", [[META5:![0-9]+]], i64 0} |
| ; CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0} |
| ; CHECK: [[META6]] = !{!"Simple C/C++ TBAA"} |
| ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META8:![0-9]+]], [[META9:![0-9]+]]} |
| ; CHECK: [[META8]] = !{!"llvm.loop.isvectorized", i32 1} |
| ; CHECK: [[META9]] = !{!"llvm.loop.unroll.runtime.disable"} |
| ; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META8]], [[META9]]} |
| ; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META12:![0-9]+]]} |
| ; CHECK: [[META12]] = !{!"llvm.loop.unroll.disable"} |
| ; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META8]]} |
| ;. |