| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 |
| ; RUN: opt -p loop-vectorize -force-ordered-reductions -mcpu=tigerlake -S %s | FileCheck %s |
| |
| target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" |
| target triple = "x86_64-apple-macosx15.0.0" |
| |
| define float @ordered_reduction_epilogue(ptr %p, i64 %n) "prefer-vector-width"="512" { |
| ; CHECK-LABEL: define float @ordered_reduction_epilogue( |
| ; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { |
| ; CHECK-NEXT: [[ITER_CHECK:.*]]: |
| ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 |
| ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] |
| ; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: |
| ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], 64 |
| ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 64 |
| ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP3:%.*]] = phi float [ 0.000000e+00, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[TMP0]], i64 16 |
| ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP0]], i64 32 |
| ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, ptr [[TMP0]], i64 48 |
| ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x float>, ptr [[TMP0]], align 4 |
| ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x float>, ptr [[TMP1]], align 4 |
| ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x float>, ptr [[TMP2]], align 4 |
| ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x float>, ptr [[TMP10]], align 4 |
| ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP3]], <16 x float> [[WIDE_LOAD1]]) |
| ; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP4]], <16 x float> [[WIDE_LOAD2]]) |
| ; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP5]], <16 x float> [[WIDE_LOAD3]]) |
| ; CHECK-NEXT: [[TMP9]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP6]], <16 x float> [[WIDE_LOAD4]]) |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 |
| ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] |
| ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] |
| ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] |
| ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: |
| ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 |
| ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] |
| ; CHECK: [[VEC_EPILOG_PH]]: |
| ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] |
| ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP9]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0.000000e+00, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] |
| ; CHECK-NEXT: [[N_MOD_VF5:%.*]] = urem i64 [[N]], 8 |
| ; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[N]], [[N_MOD_VF5]] |
| ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] |
| ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: |
| ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL1]], %[[VEC_EPILOG_PH]] ], [ [[BOUND:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_PHI8:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[VEC_EPILOG_PH]] ], [ [[TMP11:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr float, ptr [[P]], i64 [[VEC_EPILOG_RESUME_VAL]] |
| ; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <8 x float>, ptr [[TMP7]], align 4 |
| ; CHECK-NEXT: [[TMP11]] = call float @llvm.vector.reduce.fadd.v8f32(float [[VEC_PHI8]], <8 x float> [[WIDE_LOAD9]]) |
| ; CHECK-NEXT: [[BOUND]] = add nuw i64 [[VEC_EPILOG_RESUME_VAL]], 8 |
| ; CHECK-NEXT: [[CMP_N7:%.*]] = icmp eq i64 [[BOUND]], [[N_VEC5]] |
| ; CHECK-NEXT: br i1 [[CMP_N7]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] |
| ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: [[CMP_N11:%.*]] = icmp eq i64 [[N]], [[N_VEC5]] |
| ; CHECK-NEXT: br i1 [[CMP_N11]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] |
| ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: |
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] |
| ; CHECK-NEXT: [[BC_MERGE_RDX12:%.*]] = phi float [ [[TMP11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP9]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0.000000e+00, %[[ITER_CHECK]] ] |
| ; CHECK-NEXT: br label %[[LOOP:.*]] |
| ; CHECK: [[LOOP]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RED:%.*]] = phi float [ [[BC_MERGE_RDX12]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[FADD:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[P]], i64 [[IV]] |
| ; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[GEP]], align 4 |
| ; CHECK-NEXT: [[FADD]] = fadd float [[RED]], [[LOAD]] |
| ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 |
| ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] |
| ; CHECK-NEXT: br i1 [[COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: [[RES:%.*]] = phi float [ [[FADD]], %[[LOOP]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ [[TMP11]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] |
| ; CHECK-NEXT: ret float [[RES]] |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %red = phi float [ 0.0, %entry ], [ %fadd, %loop ] |
| %gep = getelementptr float, ptr %p, i64 %iv |
| %load = load float, ptr %gep, align 4 |
| %fadd = fadd float %red, %load |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %cond = icmp eq i64 %iv.next, %n |
| br i1 %cond, label %exit, label %loop |
| |
| exit: |
| %res = phi float [ %fadd, %loop ] |
| ret float %res |
| } |
| |
| define float @ordered_reduction_epilogue_single_vector_iterations(ptr %p, i64 %n) "prefer-vector-width"="512" { |
| ; CHECK-LABEL: define float @ordered_reduction_epilogue_single_vector_iterations( |
| ; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ITER_CHECK:.*]]: |
| ; CHECK-NEXT: [[BOUND:%.*]] = call i64 @llvm.umin.i64(i64 [[N]], i64 16) |
| ; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] |
| ; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: |
| ; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[P]], i64 16 |
| ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[P]], i64 32 |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[P]], i64 48 |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, ptr [[P]], align 4 |
| ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x float>, ptr [[TMP1]], align 4 |
| ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x float>, ptr [[TMP2]], align 4 |
| ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x float>, ptr [[TMP3]], align 4 |
| ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[WIDE_LOAD]]) |
| ; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP4]], <16 x float> [[WIDE_LOAD1]]) |
| ; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP5]], <16 x float> [[WIDE_LOAD2]]) |
| ; CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP6]], <16 x float> [[WIDE_LOAD3]]) |
| ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] |
| ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: |
| ; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF7:![0-9]+]] |
| ; CHECK: [[VEC_EPILOG_PH]]: |
| ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 64, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] |
| ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0.000000e+00, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] |
| ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] |
| ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[VEC_EPILOG_PH]] ], [ [[TMP10:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX4]] |
| ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x float>, ptr [[TMP9]], align 4 |
| ; CHECK-NEXT: [[TMP10]] = call float @llvm.vector.reduce.fadd.v16f32(float [[VEC_PHI5]], <16 x float> [[WIDE_LOAD6]]) |
| ; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX4]], 16 |
| ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT7]], 80 |
| ; CHECK-NEXT: br i1 [[TMP11]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] |
| ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: br i1 true, label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] |
| ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: |
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 80, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 64, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] |
| ; CHECK-NEXT: [[BC_MERGE_RDX8:%.*]] = phi float [ [[TMP10]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP7]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0.000000e+00, %[[ITER_CHECK]] ] |
| ; CHECK-NEXT: br label %[[LOOP:.*]] |
| ; CHECK: [[LOOP]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RED:%.*]] = phi float [ [[BC_MERGE_RDX8]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[FADD:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[P]], i64 [[IV]] |
| ; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[GEP]], align 4 |
| ; CHECK-NEXT: [[FADD]] = fadd float [[RED]], [[LOAD]] |
| ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 |
| ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 80 |
| ; CHECK-NEXT: br i1 [[COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: [[RES:%.*]] = phi float [ [[FADD]], %[[LOOP]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ [[TMP10]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] |
| ; CHECK-NEXT: ret float [[RES]] |
| ; |
| entry: |
| %bound = call i64 @llvm.umin.i64(i64 %n, i64 16) |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %red = phi float [ 0.0, %entry ], [ %fadd, %loop ] |
| %gep = getelementptr float, ptr %p, i64 %iv |
| %load = load float, ptr %gep, align 4 |
| %fadd = fadd float %red, %load |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %cond = icmp eq i64 %iv.next, 80 |
| br i1 %cond, label %exit, label %loop |
| |
| exit: |
| %res = phi float [ %fadd, %loop ] |
| ret float %res |
| } |
| |
| define float @ordered_reduction_nonzero_start_single_iteration_vector_loops(ptr %p, i64 %n) "prefer-vector-width"="512" { |
| ; CHECK-LABEL: define float @ordered_reduction_nonzero_start_single_iteration_vector_loops( |
| ; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ITER_CHECK:.*]]: |
| ; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] |
| ; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: |
| ; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[P]], i64 16 |
| ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[P]], i64 32 |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[P]], i64 48 |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, ptr [[P]], align 4 |
| ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x float>, ptr [[TMP1]], align 4 |
| ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x float>, ptr [[TMP2]], align 4 |
| ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x float>, ptr [[TMP3]], align 4 |
| ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float 4.200000e+01, <16 x float> [[WIDE_LOAD]]) |
| ; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP4]], <16 x float> [[WIDE_LOAD1]]) |
| ; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP5]], <16 x float> [[WIDE_LOAD2]]) |
| ; CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float [[TMP6]], <16 x float> [[WIDE_LOAD3]]) |
| ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] |
| ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: |
| ; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF7]] |
| ; CHECK: [[VEC_EPILOG_PH]]: |
| ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 64, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] |
| ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 4.200000e+01, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] |
| ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] |
| ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT7:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_PHI5:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[VEC_EPILOG_PH]] ], [ [[TMP10:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX4]] |
| ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x float>, ptr [[TMP9]], align 4 |
| ; CHECK-NEXT: [[TMP10]] = call float @llvm.vector.reduce.fadd.v16f32(float [[VEC_PHI5]], <16 x float> [[WIDE_LOAD6]]) |
| ; CHECK-NEXT: [[INDEX_NEXT7]] = add nuw i64 [[INDEX4]], 16 |
| ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT7]], 80 |
| ; CHECK-NEXT: br i1 [[TMP11]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] |
| ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: br i1 true, label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] |
| ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: |
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 80, %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 64, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] |
| ; CHECK-NEXT: [[BC_MERGE_RDX8:%.*]] = phi float [ [[TMP10]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP7]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 4.200000e+01, %[[ITER_CHECK]] ] |
| ; CHECK-NEXT: br label %[[LOOP:.*]] |
| ; CHECK: [[LOOP]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RED:%.*]] = phi float [ [[BC_MERGE_RDX8]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[FADD:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[P]], i64 [[IV]] |
| ; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[GEP]], align 4 |
| ; CHECK-NEXT: [[FADD]] = fadd float [[RED]], [[LOAD]] |
| ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 |
| ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], 80 |
| ; CHECK-NEXT: br i1 [[COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP12:![0-9]+]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: [[RES:%.*]] = phi float [ [[FADD]], %[[LOOP]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ [[TMP10]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] |
| ; CHECK-NEXT: ret float [[RES]] |
| ; |
| entry: |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %red = phi float [ 42.0, %entry ], [ %fadd, %loop ] |
| %gep = getelementptr float, ptr %p, i64 %iv |
| %load = load float, ptr %gep, align 4 |
| %fadd = fadd float %red, %load |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %cond = icmp eq i64 %iv.next, 80 |
| br i1 %cond, label %exit, label %loop |
| |
| exit: |
| %res = phi float [ %fadd, %loop ] |
| ret float %res |
| } |
| |
| define float @ordered_reduction_epilogue_dead_main_loop(ptr %p, i64 %n) "prefer-vector-width"="512" { |
| ; CHECK-LABEL: define float @ordered_reduction_epilogue_dead_main_loop( |
| ; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ITER_CHECK:.*]]: |
| ; CHECK-NEXT: [[BOUND:%.*]] = call i64 @llvm.umin.i64(i64 [[N]], i64 16) |
| ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[BOUND]], 4 |
| ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] |
| ; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: |
| ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[BOUND]], 16 |
| ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[BOUND]], 16 |
| ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[BOUND]], [[N_MOD_VF]] |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, ptr [[P]], align 4 |
| ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[WIDE_LOAD]]) |
| ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[BOUND]], [[N_VEC]] |
| ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] |
| ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: |
| ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 |
| ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF13:![0-9]+]] |
| ; CHECK: [[VEC_EPILOG_PH]]: |
| ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] |
| ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0.000000e+00, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] |
| ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[BOUND]], 4 |
| ; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[BOUND]], [[N_MOD_VF4]] |
| ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] |
| ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: |
| ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL1]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[VEC_EPILOG_PH]] ], [ [[TMP2:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[P]], i64 [[VEC_EPILOG_RESUME_VAL]] |
| ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 |
| ; CHECK-NEXT: [[TMP2]] = call float @llvm.vector.reduce.fadd.v4f32(float [[VEC_PHI]], <4 x float> [[WIDE_LOAD4]]) |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[VEC_EPILOG_RESUME_VAL]], 4 |
| ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC5]] |
| ; CHECK-NEXT: br i1 [[TMP3]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] |
| ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[BOUND]], [[N_VEC5]] |
| ; CHECK-NEXT: br i1 [[CMP_N10]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] |
| ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: |
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] |
| ; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi float [ [[TMP2]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0.000000e+00, %[[ITER_CHECK]] ] |
| ; CHECK-NEXT: br label %[[LOOP:.*]] |
| ; CHECK: [[LOOP]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RED:%.*]] = phi float [ [[BC_MERGE_RDX6]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[FADD:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[P]], i64 [[IV]] |
| ; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[GEP]], align 4 |
| ; CHECK-NEXT: [[FADD]] = fadd float [[RED]], [[LOAD]] |
| ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 |
| ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[BOUND]] |
| ; CHECK-NEXT: br i1 [[COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP15:![0-9]+]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: [[RES:%.*]] = phi float [ [[FADD]], %[[LOOP]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[TMP2]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] |
| ; CHECK-NEXT: ret float [[RES]] |
| ; |
| entry: |
| %bound = call i64 @llvm.umin.i64(i64 %n, i64 16) |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %red = phi float [ 0.0, %entry ], [ %fadd, %loop ] |
| %gep = getelementptr float, ptr %p, i64 %iv |
| %load = load float, ptr %gep, align 4 |
| %fadd = fadd float %red, %load |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %cond = icmp eq i64 %iv.next, %bound |
| br i1 %cond, label %exit, label %loop |
| |
| exit: |
| %res = phi float [ %fadd, %loop ] |
| ret float %res |
| } |
| |
| ; Same as above but with a non-zero start value for the reduction. |
| define float @ordered_reduction_nonzero_start_dead_main_vector_loop(ptr %p, i64 %n) "prefer-vector-width"="512" { |
| ; CHECK-LABEL: define float @ordered_reduction_nonzero_start_dead_main_vector_loop( |
| ; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ITER_CHECK:.*]]: |
| ; CHECK-NEXT: [[BOUND:%.*]] = call i64 @llvm.umin.i64(i64 [[N]], i64 16) |
| ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[BOUND]], 4 |
| ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] |
| ; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: |
| ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[BOUND]], 16 |
| ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[BOUND]], 16 |
| ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[BOUND]], [[N_MOD_VF]] |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, ptr [[P]], align 4 |
| ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float 4.200000e+01, <16 x float> [[WIDE_LOAD]]) |
| ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[BOUND]], [[N_VEC]] |
| ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] |
| ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: |
| ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 |
| ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF13]] |
| ; CHECK: [[VEC_EPILOG_PH]]: |
| ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL1:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] |
| ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 4.200000e+01, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] |
| ; CHECK-NEXT: [[N_MOD_VF4:%.*]] = urem i64 [[BOUND]], 4 |
| ; CHECK-NEXT: [[N_VEC5:%.*]] = sub i64 [[BOUND]], [[N_MOD_VF4]] |
| ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] |
| ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: |
| ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL1]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[VEC_EPILOG_PH]] ], [ [[TMP2:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[P]], i64 [[VEC_EPILOG_RESUME_VAL]] |
| ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x float>, ptr [[TMP8]], align 4 |
| ; CHECK-NEXT: [[TMP2]] = call float @llvm.vector.reduce.fadd.v4f32(float [[VEC_PHI]], <4 x float> [[WIDE_LOAD4]]) |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[VEC_EPILOG_RESUME_VAL]], 4 |
| ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC5]] |
| ; CHECK-NEXT: br i1 [[TMP3]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] |
| ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: [[CMP_N10:%.*]] = icmp eq i64 [[BOUND]], [[N_VEC5]] |
| ; CHECK-NEXT: br i1 [[CMP_N10]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] |
| ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: |
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] |
| ; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi float [ [[TMP2]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP4]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 4.200000e+01, %[[ITER_CHECK]] ] |
| ; CHECK-NEXT: br label %[[LOOP:.*]] |
| ; CHECK: [[LOOP]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RED:%.*]] = phi float [ [[BC_MERGE_RDX6]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[FADD:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[P]], i64 [[IV]] |
| ; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[GEP]], align 4 |
| ; CHECK-NEXT: [[FADD]] = fadd float [[RED]], [[LOAD]] |
| ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 |
| ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[BOUND]] |
| ; CHECK-NEXT: br i1 [[COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP17:![0-9]+]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: [[RES:%.*]] = phi float [ [[FADD]], %[[LOOP]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ [[TMP2]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] |
| ; CHECK-NEXT: ret float [[RES]] |
| ; |
| entry: |
| %bound = call i64 @llvm.umin.i64(i64 %n, i64 16) |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %red = phi float [ 42.0, %entry ], [ %fadd, %loop ] |
| %gep = getelementptr float, ptr %p, i64 %iv |
| %load = load float, ptr %gep, align 4 |
| %fadd = fadd float %red, %load |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %cond = icmp eq i64 %iv.next, %bound |
| br i1 %cond, label %exit, label %loop |
| |
| exit: |
| %res = phi float [ %fadd, %loop ] |
| ret float %res |
| } |
| |
| ; Two ordered reductions with different start values in the same loop. |
| define { float, float } @two_ordered_reductions(ptr %p, ptr %q, i64 %n) "prefer-vector-width"="512" { |
| ; CHECK-LABEL: define { float, float } @two_ordered_reductions( |
| ; CHECK-SAME: ptr [[P:%.*]], ptr [[Q:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { |
| ; CHECK-NEXT: [[ITER_CHECK:.*]]: |
| ; CHECK-NEXT: [[BOUND:%.*]] = call i64 @llvm.umin.i64(i64 [[N]], i64 16) |
| ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[BOUND]], 4 |
| ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]] |
| ; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]: |
| ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[BOUND]], 16 |
| ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[BOUND]], 16 |
| ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[BOUND]], [[N_MOD_VF]] |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x float>, ptr [[P]], align 4 |
| ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x float>, ptr [[Q]], align 4 |
| ; CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> [[WIDE_LOAD1]]) |
| ; CHECK-NEXT: [[TMP12:%.*]] = call float @llvm.vector.reduce.fadd.v16f32(float 1.000000e+00, <16 x float> [[WIDE_LOAD6]]) |
| ; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[BOUND]], [[N_VEC]] |
| ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]] |
| ; CHECK: [[VEC_EPILOG_ITER_CHECK]]: |
| ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4 |
| ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF13]] |
| ; CHECK: [[VEC_EPILOG_PH]]: |
| ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] |
| ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0.000000e+00, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] |
| ; CHECK-NEXT: [[BC_MERGE_RDX3:%.*]] = phi float [ [[TMP12]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ] |
| ; CHECK-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[BOUND]], 4 |
| ; CHECK-NEXT: [[N_VEC10:%.*]] = sub i64 [[BOUND]], [[N_MOD_VF9]] |
| ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]] |
| ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]: |
| ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[VEC_EPILOG_PH]] ], [ [[TMP4:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi float [ [[BC_MERGE_RDX3]], %[[VEC_EPILOG_PH]] ], [ [[TMP5:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr float, ptr [[P]], i64 [[VEC_EPILOG_RESUME_VAL1]] |
| ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x float>, ptr [[TMP14]], align 4 |
| ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr float, ptr [[Q]], i64 [[VEC_EPILOG_RESUME_VAL1]] |
| ; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <4 x float>, ptr [[TMP15]], align 4 |
| ; CHECK-NEXT: [[TMP4]] = call float @llvm.vector.reduce.fadd.v4f32(float [[VEC_PHI]], <4 x float> [[WIDE_LOAD7]]) |
| ; CHECK-NEXT: [[TMP5]] = call float @llvm.vector.reduce.fadd.v4f32(float [[VEC_PHI6]], <4 x float> [[WIDE_LOAD8]]) |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[VEC_EPILOG_RESUME_VAL1]], 4 |
| ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC10]] |
| ; CHECK-NEXT: br i1 [[TMP6]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] |
| ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: [[CMP_N13:%.*]] = icmp eq i64 [[BOUND]], [[N_VEC10]] |
| ; CHECK-NEXT: br i1 [[CMP_N13]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]] |
| ; CHECK: [[VEC_EPILOG_SCALAR_PH]]: |
| ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC10]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ] |
| ; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi float [ [[TMP4]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP7]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0.000000e+00, %[[ITER_CHECK]] ] |
| ; CHECK-NEXT: [[BC_MERGE_RDX11:%.*]] = phi float [ [[TMP5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP12]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 1.000000e+00, %[[ITER_CHECK]] ] |
| ; CHECK-NEXT: br label %[[LOOP:.*]] |
| ; CHECK: [[LOOP]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RED1:%.*]] = phi float [ [[BC_MERGE_RDX10]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[FADD1:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[RED2:%.*]] = phi float [ [[BC_MERGE_RDX11]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[FADD2:%.*]], %[[LOOP]] ] |
| ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr float, ptr [[P]], i64 [[IV]] |
| ; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP1]], align 4 |
| ; CHECK-NEXT: [[FADD1]] = fadd float [[RED1]], [[LOAD1]] |
| ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr [[Q]], i64 [[IV]] |
| ; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4 |
| ; CHECK-NEXT: [[FADD2]] = fadd float [[RED2]], [[LOAD2]] |
| ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 |
| ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[BOUND]] |
| ; CHECK-NEXT: br i1 [[COND]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP19:![0-9]+]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: [[R1:%.*]] = phi float [ [[FADD1]], %[[LOOP]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ [[TMP4]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] |
| ; CHECK-NEXT: [[R2:%.*]] = phi float [ [[FADD2]], %[[LOOP]] ], [ [[TMP12]], %[[MIDDLE_BLOCK]] ], [ [[TMP5]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ] |
| ; CHECK-NEXT: [[RET:%.*]] = insertvalue { float, float } undef, float [[R1]], 0 |
| ; CHECK-NEXT: [[RET2:%.*]] = insertvalue { float, float } [[RET]], float [[R2]], 1 |
| ; CHECK-NEXT: ret { float, float } [[RET2]] |
| ; |
| entry: |
| %bound = call i64 @llvm.umin.i64(i64 %n, i64 16) |
| br label %loop |
| |
| loop: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] |
| %red1 = phi float [ 0.0, %entry ], [ %fadd1, %loop ] |
| %red2 = phi float [ 1.0, %entry ], [ %fadd2, %loop ] |
| %gep1 = getelementptr float, ptr %p, i64 %iv |
| %load1 = load float, ptr %gep1, align 4 |
| %fadd1 = fadd float %red1, %load1 |
| %gep2 = getelementptr float, ptr %q, i64 %iv |
| %load2 = load float, ptr %gep2, align 4 |
| %fadd2 = fadd float %red2, %load2 |
| %iv.next = add nuw nsw i64 %iv, 1 |
| %cond = icmp eq i64 %iv.next, %bound |
| br i1 %cond, label %exit, label %loop |
| |
| exit: |
| %r1 = phi float [ %fadd1, %loop ] |
| %r2 = phi float [ %fadd2, %loop ] |
| %ret = insertvalue { float, float } undef, float %r1, 0 |
| %ret2 = insertvalue { float, float } %ret, float %r2, 1 |
| ret { float, float } %ret2 |
| } |
| |