| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt -passes=loop-vectorize -mcpu=skylake-avx512 -S %s | FileCheck %s |
| |
| target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13" |
| target triple = "x86_64-unknown-linux-gnu" |
| |
| @jlplt_ijl_alloc_array_1d_10294_got = external dso_local local_unnamed_addr global ptr |
| |
| define ptr addrspace(10) @japi1_vect_42283(ptr nocapture readonly %0, i32 %1) local_unnamed_addr #0 { |
| ; CHECK-LABEL: define ptr addrspace(10) @japi1_vect_42283( |
| ; CHECK-SAME: ptr readonly captures(none) [[TMP0:%.*]], i32 [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[ITER_CHECK:.*]]: |
| ; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 |
| ; CHECK-NEXT: [[TMP3:%.*]] = load atomic ptr, ptr @jlplt_ijl_alloc_array_1d_10294_got unordered, align 8 |
| ; CHECK-NEXT: [[TMP4:%.*]] = tail call ptr addrspace(10) [[TMP3]](ptr addrspace(10) null, i64 0) |
| ; CHECK-NEXT: [[TMP5:%.*]] = load ptr addrspace(10), ptr [[TMP0]], align 8, !tbaa [[JTBAA_VALUE_TBAA0:![0-9]+]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(10) [[TMP4]] to ptr addrspace(11) |
| ; CHECK-NEXT: [[TMP7:%.*]] = load ptr addrspace(13), ptr addrspace(11) [[TMP6]], align 8, !tbaa [[JTBAA_ARRAYPTR_TBAA5:![0-9]+]] |
| ; CHECK-NEXT: [[DOTELT:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(10) [[TMP5]], i64 0, i32 0 |
| ; CHECK-NEXT: [[DOTUNPACK:%.*]] = load ptr addrspace(10), ptr addrspace(10) [[DOTELT]], align 8, !tbaa [[JTBAA_IMMUT_TBAA8:![0-9]+]] |
| ; CHECK-NEXT: [[DOTELT1:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(10) [[TMP5]], i64 0, i32 1 |
| ; CHECK-NEXT: [[DOTUNPACK2:%.*]] = load i64, ptr addrspace(10) [[DOTELT1]], align 8, !tbaa [[JTBAA_IMMUT_TBAA8]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = add nsw i64 [[TMP2]], 1 |
| ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP8]], 4 |
| ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] |
| ; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: |
| ; CHECK-NEXT: [[TMP17:%.*]] = icmp ult i64 [[TMP8]], 16 |
| ; CHECK-NEXT: br i1 [[TMP17]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP8]], 16 |
| ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP8]], [[N_MOD_VF]] |
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr addrspace(10)> poison, ptr addrspace(10) [[DOTUNPACK]], i64 0 |
| ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr addrspace(10)> [[BROADCAST_SPLATINSERT]], <4 x ptr addrspace(10)> poison, <4 x i32> zeroinitializer |
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i64> poison, i64 [[DOTUNPACK2]], i64 0 |
| ; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT7]], <4 x i64> poison, <4 x i32> zeroinitializer |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4) |
| ; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4) |
| ; CHECK-NEXT: [[STEP_ADD5:%.*]] = add <4 x i64> [[STEP_ADD4]], splat (i64 4) |
| ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[VEC_IND]], i32 0 |
| ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD]], i32 0 |
| ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD4]], i32 0 |
| ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD5]], i32 0 |
| ; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> align 8 [[TMP18]], <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10:![0-9]+]] |
| ; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> align 8 [[TMP19]], <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] |
| ; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> align 8 [[TMP20]], <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] |
| ; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT]], <4 x ptr addrspace(13)> align 8 [[TMP21]], <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] |
| ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[VEC_IND]], i32 1 |
| ; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD]], i32 1 |
| ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD4]], i32 1 |
| ; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[STEP_ADD5]], i32 1 |
| ; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> align 8 [[TMP22]], <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] |
| ; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> align 8 [[TMP23]], <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] |
| ; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> align 8 [[TMP24]], <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] |
| ; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT8]], <4 x ptr addrspace(13)> align 8 [[TMP25]], <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 |
| ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD5]], splat (i64 4) |
| ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] |
| ; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]] |
| ; CHECK-NEXT: br i1 [[CMP_N]], label %[[L44:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] |
| ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: |
| ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 |
| ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF15:![0-9]+]] |
| ; CHECK: [[VEC_EPILOG_PH]]: |
| ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] |
| ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[TMP8]], 4 |
| ; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[TMP8]], [[N_MOD_VF4]] |
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x ptr addrspace(10)> poison, ptr addrspace(10) [[DOTUNPACK]], i64 0 |
| ; CHECK-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <4 x ptr addrspace(10)> [[BROADCAST_SPLATINSERT10]], <4 x ptr addrspace(10)> poison, <4 x i32> zeroinitializer |
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <4 x i64> poison, i64 [[DOTUNPACK2]], i64 0 |
| ; CHECK-NEXT: [[BROADCAST_SPLAT13:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT12]], <4 x i64> poison, <4 x i32> zeroinitializer |
| ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[VEC_EPILOG_RESUME_VAL]], i64 0 |
| ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer |
| ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i64> [[DOTSPLAT]], <i64 0, i64 1, i64 2, i64 3> |
| ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] |
| ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT14:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_IND8:%.*]] = phi <4 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT9:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[VEC_IND8]], i32 0 |
| ; CHECK-NEXT: call void @llvm.masked.scatter.v4p10.v4p13(<4 x ptr addrspace(10)> [[BROADCAST_SPLAT11]], <4 x ptr addrspace(13)> align 8 [[TMP28]], <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] |
| ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], <4 x i64> [[VEC_IND8]], i32 1 |
| ; CHECK-NEXT: call void @llvm.masked.scatter.v4i64.v4p13(<4 x i64> [[BROADCAST_SPLAT13]], <4 x ptr addrspace(13)> align 8 [[TMP29]], <4 x i1> splat (i1 true)), !tbaa [[JTBAA_ARRAYBUF_TBAA10]] |
| ; CHECK-NEXT: [[INDEX_NEXT14]] = add nuw i64 [[INDEX7]], 4 |
| ; CHECK-NEXT: [[VEC_IND_NEXT9]] = add <4 x i64> [[VEC_IND8]], splat (i64 4) |
| ; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT14]], [[N_VEC5]] |
| ; CHECK-NEXT: br i1 [[TMP30]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] |
| ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: [[CMP_N15:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC5]] |
| ; CHECK-NEXT: br i1 [[CMP_N15]], label %[[L44]], label %[[VEC_EPILOG_SCALAR_PH]] |
| ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: |
| ; CHECK-NEXT: [[BC_RESUME_VAL17:%.*]] = phi i64 [ [[N_VEC5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] |
| ; CHECK-NEXT: br label %[[L26:.*]] |
| ; CHECK: [[L26]]: |
| ; CHECK-NEXT: [[VALUE_PHI5:%.*]] = phi i64 [ [[BC_RESUME_VAL17]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[TMP27:%.*]], %[[L26]] ] |
| ; CHECK-NEXT: [[DOTREPACK:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], i64 [[VALUE_PHI5]], i32 0 |
| ; CHECK-NEXT: store ptr addrspace(10) [[DOTUNPACK]], ptr addrspace(13) [[DOTREPACK]], align 8, !tbaa [[JTBAA_ARRAYBUF_TBAA10]] |
| ; CHECK-NEXT: [[DOTREPACK4:%.*]] = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) [[TMP7]], i64 [[VALUE_PHI5]], i32 1 |
| ; CHECK-NEXT: store i64 [[DOTUNPACK2]], ptr addrspace(13) [[DOTREPACK4]], align 8, !tbaa [[JTBAA_ARRAYBUF_TBAA10]] |
| ; CHECK-NEXT: [[TMP27]] = add i64 [[VALUE_PHI5]], 1 |
| ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[VALUE_PHI5]], [[TMP2]] |
| ; CHECK-NEXT: br i1 [[DOTNOT]], label %[[L44]], label %[[L26]], !llvm.loop [[LOOP17:![0-9]+]] |
| ; CHECK: [[L44]]: |
| ; CHECK-NEXT: ret ptr addrspace(10) null |
| ; |
| top: |
| %2 = sext i32 %1 to i64 |
| %3 = load atomic ptr addrspace(10) (ptr addrspace(10), i64)*, ptr addrspace(10) (ptr addrspace(10), i64)** bitcast (ptr @jlplt_ijl_alloc_array_1d_10294_got to ptr addrspace(10) (ptr addrspace(10), i64)**) unordered, align 8 |
| %4 = tail call ptr addrspace(10) %3(ptr addrspace(10) null, i64 0) |
| %5 = load ptr addrspace(10), ptr %0, align 8, !tbaa !0 |
| %6 = addrspacecast ptr addrspace(10) %4 to ptr addrspace(11) |
| %7 = load ptr addrspace(13), ptr addrspace(11) %6, align 8, !tbaa !5 |
| %.elt = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(10) %5, i64 0, i32 0 |
| %.unpack = load ptr addrspace(10), ptr addrspace(10) %.elt, align 8, !tbaa !8 |
| %.elt1 = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(10) %5, i64 0, i32 1 |
| %.unpack2 = load i64, ptr addrspace(10) %.elt1, align 8, !tbaa !8 |
| br label %L26 |
| |
| L26: ; preds = %L26, %top |
| %value_phi5 = phi i64 [ 0, %top ], [ %8, %L26 ] |
| %.repack = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) %7, i64 %value_phi5, i32 0 |
| store ptr addrspace(10) %.unpack, ptr addrspace(13) %.repack, align 8, !tbaa !10 |
| %.repack4 = getelementptr inbounds { ptr addrspace(10), i64 }, ptr addrspace(13) %7, i64 %value_phi5, i32 1 |
| store i64 %.unpack2, ptr addrspace(13) %.repack4, align 8, !tbaa !10 |
| %8 = add i64 %value_phi5, 1 |
| %.not = icmp eq i64 %value_phi5, %2 |
| br i1 %.not, label %L44, label %L26 |
| |
| L44: ; preds = %L26 |
| ret ptr addrspace(10) null |
| } |
| |
| !0 = !{!1, !1, i64 0} |
| !1 = !{!"jtbaa_value", !2, i64 0} |
| !2 = !{!"jtbaa_data", !3, i64 0} |
| !3 = !{!"jtbaa", !4, i64 0} |
| !4 = !{!"jtbaa"} |
| !5 = !{!6, !6, i64 0} |
| !6 = !{!"jtbaa_arrayptr", !7, i64 0} |
| !7 = !{!"jtbaa_array", !3, i64 0} |
| !8 = !{!9, !9, i64 0} |
| !9 = !{!"jtbaa_immut", !1, i64 0} |
| !10 = !{!11, !11, i64 0} |
| !11 = !{!"jtbaa_arraybuf", !2, i64 0} |
| ;. |
| ; CHECK: [[JTBAA_VALUE_TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} |
| ; CHECK: [[META1]] = !{!"jtbaa_value", [[META2:![0-9]+]], i64 0} |
| ; CHECK: [[META2]] = !{!"jtbaa_data", [[META3:![0-9]+]], i64 0} |
| ; CHECK: [[META3]] = !{!"jtbaa", [[META4:![0-9]+]], i64 0} |
| ; CHECK: [[META4]] = !{!"jtbaa"} |
| ; CHECK: [[JTBAA_ARRAYPTR_TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} |
| ; CHECK: [[META6]] = !{!"jtbaa_arrayptr", [[META7:![0-9]+]], i64 0} |
| ; CHECK: [[META7]] = !{!"jtbaa_array", [[META3]], i64 0} |
| ; CHECK: [[JTBAA_IMMUT_TBAA8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} |
| ; CHECK: [[META9]] = !{!"jtbaa_immut", [[META1]], i64 0} |
| ; CHECK: [[JTBAA_ARRAYBUF_TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0} |
| ; CHECK: [[META11]] = !{!"jtbaa_arraybuf", [[META2]], i64 0} |
| ; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META13:![0-9]+]], [[META14:![0-9]+]]} |
| ; CHECK: [[META13]] = !{!"llvm.loop.isvectorized", i32 1} |
| ; CHECK: [[META14]] = !{!"llvm.loop.unroll.runtime.disable"} |
| ; CHECK: [[PROF15]] = !{!"branch_weights", i32 4, i32 12} |
| ; CHECK: [[LOOP16]] = distinct !{[[LOOP16]], [[META13]], [[META14]]} |
| ; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META14]], [[META13]]} |
| ;. |