blob: 10b22974080dc7a5aea72478f36230622cb76fdb [file] [edit]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -p loop-unroll -mcpu=apple-m1 -S %s | FileCheck --check-prefix=APPLE %s
; RUN: opt -p loop-unroll -mcpu=apple-m2 -S %s | FileCheck --check-prefix=APPLE %s
; RUN: opt -p loop-unroll -mcpu=apple-m3 -S %s | FileCheck --check-prefix=APPLE %s
; RUN: opt -p loop-unroll -mcpu=apple-m4 -S %s | FileCheck --check-prefix=APPLE %s
; RUN: opt -p loop-unroll -mcpu=apple-a17 -S %s | FileCheck --check-prefix=APPLE %s
; RUN: opt -p loop-unroll -mcpu=cortex-a57 -S %s | FileCheck --check-prefix=OTHER %s
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-n32:64-S128-Fn32"
target triple = "arm64-apple-macosx15.0.0"
define void @small_load_store_loop(ptr %src, ptr %dst, i64 %N, i64 %scale) {
; APPLE-LABEL: define void @small_load_store_loop(
; APPLE-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]]) #[[ATTR0:[0-9]+]] {
; APPLE-NEXT: [[ENTRY:.*]]:
; APPLE-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
; APPLE-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 7
; APPLE-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7
; APPLE-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
; APPLE: [[ENTRY_NEW]]:
; APPLE-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
; APPLE-NEXT: br label %[[LOOP:.*]]
; APPLE: [[LOOP]]:
; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_7:%.*]], %[[LOOP]] ]
; APPLE-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], %[[LOOP]] ]
; APPLE-NEXT: [[SCALED_IV:%.*]] = mul nuw nsw i64 [[IV]], [[SCALE]]
; APPLE-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV]]
; APPLE-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
; APPLE-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]]
; APPLE-NEXT: store float [[L]], ptr [[GEP_DST]], align 4
; APPLE-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
; APPLE-NEXT: [[SCALED_IV_1:%.*]] = mul nuw nsw i64 [[IV_NEXT]], [[SCALE]]
; APPLE-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_1]]
; APPLE-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4
; APPLE-NEXT: [[GEP_DST_1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT]]
; APPLE-NEXT: store float [[L_1]], ptr [[GEP_DST_1]], align 4
; APPLE-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
; APPLE-NEXT: [[SCALED_IV_2:%.*]] = mul nuw nsw i64 [[IV_NEXT_1]], [[SCALE]]
; APPLE-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_2]]
; APPLE-NEXT: [[L_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4
; APPLE-NEXT: [[GEP_DST_2:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT_1]]
; APPLE-NEXT: store float [[L_2]], ptr [[GEP_DST_2]], align 4
; APPLE-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
; APPLE-NEXT: [[SCALED_IV_3:%.*]] = mul nuw nsw i64 [[IV_NEXT_2]], [[SCALE]]
; APPLE-NEXT: [[GEP_SRC_3:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_3]]
; APPLE-NEXT: [[L_3:%.*]] = load float, ptr [[GEP_SRC_3]], align 4
; APPLE-NEXT: [[GEP_DST_3:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT_2]]
; APPLE-NEXT: store float [[L_3]], ptr [[GEP_DST_3]], align 4
; APPLE-NEXT: [[IV_NEXT_3:%.*]] = add nuw nsw i64 [[IV]], 4
; APPLE-NEXT: [[SCALED_IV_4:%.*]] = mul nuw nsw i64 [[IV_NEXT_3]], [[SCALE]]
; APPLE-NEXT: [[GEP_SRC_4:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_4]]
; APPLE-NEXT: [[L_4:%.*]] = load float, ptr [[GEP_SRC_4]], align 4
; APPLE-NEXT: [[GEP_DST_4:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT_3]]
; APPLE-NEXT: store float [[L_4]], ptr [[GEP_DST_4]], align 4
; APPLE-NEXT: [[IV_NEXT_4:%.*]] = add nuw nsw i64 [[IV]], 5
; APPLE-NEXT: [[SCALED_IV_5:%.*]] = mul nuw nsw i64 [[IV_NEXT_4]], [[SCALE]]
; APPLE-NEXT: [[GEP_SRC_5:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_5]]
; APPLE-NEXT: [[L_5:%.*]] = load float, ptr [[GEP_SRC_5]], align 4
; APPLE-NEXT: [[GEP_DST_5:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT_4]]
; APPLE-NEXT: store float [[L_5]], ptr [[GEP_DST_5]], align 4
; APPLE-NEXT: [[IV_NEXT_5:%.*]] = add nuw nsw i64 [[IV]], 6
; APPLE-NEXT: [[SCALED_IV_6:%.*]] = mul nuw nsw i64 [[IV_NEXT_5]], [[SCALE]]
; APPLE-NEXT: [[GEP_SRC_6:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_6]]
; APPLE-NEXT: [[L_6:%.*]] = load float, ptr [[GEP_SRC_6]], align 4
; APPLE-NEXT: [[GEP_DST_6:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT_5]]
; APPLE-NEXT: store float [[L_6]], ptr [[GEP_DST_6]], align 4
; APPLE-NEXT: [[IV_NEXT_6:%.*]] = add nuw nsw i64 [[IV]], 7
; APPLE-NEXT: [[SCALED_IV_7:%.*]] = mul nuw nsw i64 [[IV_NEXT_6]], [[SCALE]]
; APPLE-NEXT: [[GEP_SRC_7:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_7]]
; APPLE-NEXT: [[L_7:%.*]] = load float, ptr [[GEP_SRC_7]], align 4
; APPLE-NEXT: [[GEP_DST_7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT_6]]
; APPLE-NEXT: store float [[L_7]], ptr [[GEP_DST_7]], align 4
; APPLE-NEXT: [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8
; APPLE-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8
; APPLE-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
; APPLE-NEXT: br i1 [[NITER_NCMP_7]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]]
; APPLE: [[EXIT_UNR_LCSSA]]:
; APPLE-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_7]], %[[LOOP]] ]
; APPLE-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
; APPLE-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
; APPLE: [[LOOP_EPIL_PREHEADER]]:
; APPLE-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
; APPLE-NEXT: [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
; APPLE-NEXT: call void @llvm.assume(i1 [[LCMP_MOD1]])
; APPLE-NEXT: br label %[[LOOP_EPIL:.*]]
; APPLE: [[LOOP_EPIL]]:
; APPLE-NEXT: [[IV_EPIL:%.*]] = phi i64 [ [[IV_EPIL_INIT]], %[[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
; APPLE-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[LOOP_EPIL]] ]
; APPLE-NEXT: [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_EPIL]], [[SCALE]]
; APPLE-NEXT: [[GEP_SRC_EPIL:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_EPIL]]
; APPLE-NEXT: [[L_EPIL:%.*]] = load float, ptr [[GEP_SRC_EPIL]], align 4
; APPLE-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_EPIL]]
; APPLE-NEXT: store float [[L_EPIL]], ptr [[GEP_DST_EPIL]], align 4
; APPLE-NEXT: [[IV_NEXT_EPIL]] = add nuw nsw i64 [[IV_EPIL]], 1
; APPLE-NEXT: [[EC_EPIL:%.*]] = icmp eq i64 [[IV_NEXT_EPIL]], [[N]]
; APPLE-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
; APPLE-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
; APPLE-NEXT: br i1 [[EPIL_ITER_CMP]], label %[[LOOP_EPIL]], label %[[EXIT_EPILOG_LCSSA:.*]], !llvm.loop [[LOOP0:![0-9]+]]
; APPLE: [[EXIT_EPILOG_LCSSA]]:
; APPLE-NEXT: br label %[[EXIT]]
; APPLE: [[EXIT]]:
; APPLE-NEXT: ret void
;
; OTHER-LABEL: define void @small_load_store_loop(
; OTHER-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]]) #[[ATTR0:[0-9]+]] {
; OTHER-NEXT: [[ENTRY:.*]]:
; OTHER-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
; OTHER-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
; OTHER-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
; OTHER-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
; OTHER: [[ENTRY_NEW]]:
; OTHER-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
; OTHER-NEXT: br label %[[LOOP:.*]]
; OTHER: [[LOOP]]:
; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
; OTHER-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ]
; OTHER-NEXT: [[SCALED_IV:%.*]] = mul nuw nsw i64 [[IV]], [[SCALE]]
; OTHER-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV]]
; OTHER-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
; OTHER-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]]
; OTHER-NEXT: store float [[L]], ptr [[GEP_DST]], align 4
; OTHER-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
; OTHER-NEXT: [[SCALED_IV_1:%.*]] = mul nuw nsw i64 [[IV_NEXT]], [[SCALE]]
; OTHER-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_1]]
; OTHER-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4
; OTHER-NEXT: [[GEP_DST_1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT]]
; OTHER-NEXT: store float [[L_1]], ptr [[GEP_DST_1]], align 4
; OTHER-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
; OTHER-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
; OTHER-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
; OTHER-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]]
; OTHER: [[EXIT_UNR_LCSSA]]:
; OTHER-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
; OTHER-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
; OTHER-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
; OTHER: [[LOOP_EPIL_PREHEADER]]:
; OTHER-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
; OTHER-NEXT: [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
; OTHER-NEXT: call void @llvm.assume(i1 [[LCMP_MOD1]])
; OTHER-NEXT: br label %[[LOOP_EPIL:.*]]
; OTHER: [[LOOP_EPIL]]:
; OTHER-NEXT: [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_EPIL_INIT]], [[SCALE]]
; OTHER-NEXT: [[GEP_SRC_EPIL:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_EPIL]]
; OTHER-NEXT: [[L_EPIL:%.*]] = load float, ptr [[GEP_SRC_EPIL]], align 4
; OTHER-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_EPIL_INIT]]
; OTHER-NEXT: store float [[L_EPIL]], ptr [[GEP_DST_EPIL]], align 4
; OTHER-NEXT: br label %[[EXIT]]
; OTHER: [[EXIT]]:
; OTHER-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%scaled.iv = mul nuw nsw i64 %iv, %scale
%gep.src = getelementptr inbounds float, ptr %src, i64 %scaled.iv
%l = load float, ptr %gep.src, align 4
%gep.dst = getelementptr inbounds float, ptr %dst, i64 %iv
store float %l, ptr %gep.dst, align 4
%iv.next = add nuw nsw i64 %iv, 1
%ec = icmp eq i64 %iv.next, %N
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @load_op_store_loop(ptr %src, ptr %dst, i64 %N, i64 %scale, float %k) {
; APPLE-LABEL: define void @load_op_store_loop(
; APPLE-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]], float [[K:%.*]]) #[[ATTR0]] {
; APPLE-NEXT: [[ENTRY:.*]]:
; APPLE-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
; APPLE-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
; APPLE-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
; APPLE-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
; APPLE: [[ENTRY_NEW]]:
; APPLE-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
; APPLE-NEXT: br label %[[LOOP:.*]]
; APPLE: [[LOOP]]:
; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
; APPLE-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ]
; APPLE-NEXT: [[SCALED_IV:%.*]] = mul nuw nsw i64 [[IV]], [[SCALE]]
; APPLE-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV]]
; APPLE-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
; APPLE-NEXT: [[O:%.*]] = fadd float [[L]], [[K]]
; APPLE-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]]
; APPLE-NEXT: store float [[O]], ptr [[GEP_DST]], align 4
; APPLE-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
; APPLE-NEXT: [[SCALED_IV_1:%.*]] = mul nuw nsw i64 [[IV_NEXT]], [[SCALE]]
; APPLE-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_1]]
; APPLE-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4
; APPLE-NEXT: [[O_1:%.*]] = fadd float [[L_1]], [[K]]
; APPLE-NEXT: [[GEP_DST_1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT]]
; APPLE-NEXT: store float [[O_1]], ptr [[GEP_DST_1]], align 4
; APPLE-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
; APPLE-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
; APPLE-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
; APPLE-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]]
; APPLE: [[EXIT_UNR_LCSSA]]:
; APPLE-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
; APPLE-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
; APPLE-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
; APPLE: [[LOOP_EPIL_PREHEADER]]:
; APPLE-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
; APPLE-NEXT: [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
; APPLE-NEXT: call void @llvm.assume(i1 [[LCMP_MOD1]])
; APPLE-NEXT: br label %[[LOOP_EPIL:.*]]
; APPLE: [[LOOP_EPIL]]:
; APPLE-NEXT: [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_EPIL_INIT]], [[SCALE]]
; APPLE-NEXT: [[GEP_SRC_EPIL:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_EPIL]]
; APPLE-NEXT: [[L_EPIL:%.*]] = load float, ptr [[GEP_SRC_EPIL]], align 4
; APPLE-NEXT: [[O_EPIL:%.*]] = fadd float [[L_EPIL]], [[K]]
; APPLE-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_EPIL_INIT]]
; APPLE-NEXT: store float [[O_EPIL]], ptr [[GEP_DST_EPIL]], align 4
; APPLE-NEXT: br label %[[EXIT]]
; APPLE: [[EXIT]]:
; APPLE-NEXT: ret void
;
; OTHER-LABEL: define void @load_op_store_loop(
; OTHER-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]], float [[K:%.*]]) #[[ATTR0]] {
; OTHER-NEXT: [[ENTRY:.*]]:
; OTHER-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
; OTHER-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
; OTHER-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
; OTHER-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
; OTHER: [[ENTRY_NEW]]:
; OTHER-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
; OTHER-NEXT: br label %[[LOOP:.*]]
; OTHER: [[LOOP]]:
; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
; OTHER-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ]
; OTHER-NEXT: [[SCALED_IV:%.*]] = mul nuw nsw i64 [[IV]], [[SCALE]]
; OTHER-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV]]
; OTHER-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
; OTHER-NEXT: [[O:%.*]] = fadd float [[L]], [[K]]
; OTHER-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]]
; OTHER-NEXT: store float [[O]], ptr [[GEP_DST]], align 4
; OTHER-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
; OTHER-NEXT: [[SCALED_IV_1:%.*]] = mul nuw nsw i64 [[IV_NEXT]], [[SCALE]]
; OTHER-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_1]]
; OTHER-NEXT: [[L_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4
; OTHER-NEXT: [[O_1:%.*]] = fadd float [[L_1]], [[K]]
; OTHER-NEXT: [[GEP_DST_1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_NEXT]]
; OTHER-NEXT: store float [[O_1]], ptr [[GEP_DST_1]], align 4
; OTHER-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
; OTHER-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
; OTHER-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
; OTHER-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]]
; OTHER: [[EXIT_UNR_LCSSA]]:
; OTHER-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
; OTHER-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
; OTHER-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
; OTHER: [[LOOP_EPIL_PREHEADER]]:
; OTHER-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
; OTHER-NEXT: [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
; OTHER-NEXT: call void @llvm.assume(i1 [[LCMP_MOD1]])
; OTHER-NEXT: br label %[[LOOP_EPIL:.*]]
; OTHER: [[LOOP_EPIL]]:
; OTHER-NEXT: [[SCALED_IV_EPIL:%.*]] = mul nuw nsw i64 [[IV_EPIL_INIT]], [[SCALE]]
; OTHER-NEXT: [[GEP_SRC_EPIL:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV_EPIL]]
; OTHER-NEXT: [[L_EPIL:%.*]] = load float, ptr [[GEP_SRC_EPIL]], align 4
; OTHER-NEXT: [[O_EPIL:%.*]] = fadd float [[L_EPIL]], [[K]]
; OTHER-NEXT: [[GEP_DST_EPIL:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV_EPIL_INIT]]
; OTHER-NEXT: store float [[O_EPIL]], ptr [[GEP_DST_EPIL]], align 4
; OTHER-NEXT: br label %[[EXIT]]
; OTHER: [[EXIT]]:
; OTHER-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%scaled.iv = mul nuw nsw i64 %iv, %scale
%gep.src = getelementptr inbounds float, ptr %src, i64 %scaled.iv
%l = load float, ptr %gep.src, align 4
%o = fadd float %l, %k
%gep.dst = getelementptr inbounds float, ptr %dst, i64 %iv
store float %o, ptr %gep.dst, align 4
%iv.next = add nuw nsw i64 %iv, 1
%ec = icmp eq i64 %iv.next, %N
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @load_op_store_loop_multiblock(ptr %src, ptr %dst, i64 %N, i64 %scale, float %k) {
; APPLE-LABEL: define void @load_op_store_loop_multiblock(
; APPLE-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]], float [[K:%.*]]) #[[ATTR0]] {
; APPLE-NEXT: [[ENTRY:.*]]:
; APPLE-NEXT: br label %[[LOOP:.*]]
; APPLE: [[LOOP]]:
; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOPCONT:.*]] ]
; APPLE-NEXT: [[SCALED_IV:%.*]] = mul nuw nsw i64 [[IV]], [[SCALE]]
; APPLE-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV]]
; APPLE-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
; APPLE-NEXT: [[AND:%.*]] = and i64 [[IV]], 1
; APPLE-NEXT: [[ODD:%.*]] = icmp eq i64 [[AND]], 1
; APPLE-NEXT: br i1 [[ODD]], label %[[LOOPODD:.*]], label %[[LOOPCONT]]
; APPLE: [[LOOPCONT]]:
; APPLE-NEXT: [[D:%.*]] = phi float [ [[L2:%.*]], %[[LOOPODD]] ], [ [[L]], %[[LOOP]] ]
; APPLE-NEXT: [[O:%.*]] = fadd float [[D]], [[K]]
; APPLE-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]]
; APPLE-NEXT: store float [[O]], ptr [[GEP_DST]], align 4
; APPLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; APPLE-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; APPLE-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
; APPLE: [[LOOPODD]]:
; APPLE-NEXT: [[L2]] = fneg float [[L]]
; APPLE-NEXT: br label %[[LOOPCONT]]
; APPLE: [[EXIT]]:
; APPLE-NEXT: ret void
;
; ; OTHER-LABEL: define void @load_op_store_loop_multiblock(
; OTHER-SAME: ptr [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]], i64 [[SCALE:%.*]], float [[K:%.*]]) #[[ATTR0]] {
; OTHER-NEXT: [[ENTRY:.*]]:
; OTHER-NEXT: br label %[[LOOP:.*]]
; OTHER: [[LOOP]]:
; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOPCONT:.*]] ]
; OTHER-NEXT: [[SCALED_IV:%.*]] = mul nuw nsw i64 [[IV]], [[SCALE]]
; OTHER-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[SCALED_IV]]
; OTHER-NEXT: [[L:%.*]] = load float, ptr [[GEP_SRC]], align 4
; OTHER-NEXT: [[AND:%.*]] = and i64 [[IV]], 1
; OTHER-NEXT: [[ODD:%.*]] = icmp eq i64 [[AND]], 1
; OTHER-NEXT: br i1 [[ODD]], label %[[LOOPODD:.*]], label %[[LOOPCONT]]
; OTHER: [[LOOPCONT]]:
; OTHER-NEXT: [[D:%.*]] = phi float [ [[L2:%.*]], %[[LOOPODD]] ], [ [[L]], %[[LOOP]] ]
; OTHER-NEXT: [[O:%.*]] = fadd float [[D]], [[K]]
; OTHER-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]]
; OTHER-NEXT: store float [[O]], ptr [[GEP_DST]], align 4
; OTHER-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; OTHER-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; OTHER-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
; OTHER: [[LOOPODD]]:
; OTHER-NEXT: [[L2]] = fneg float [[L]]
; OTHER-NEXT: br label %[[LOOPCONT]]
; OTHER: [[EXIT]]:
; OTHER-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loopcont ]
%scaled.iv = mul nuw nsw i64 %iv, %scale
%gep.src = getelementptr inbounds float, ptr %src, i64 %scaled.iv
%l1 = load float, ptr %gep.src, align 4
%and = and i64 %iv, 1
%odd = icmp eq i64 %and, 1
br i1 %odd, label %loopodd, label %loopcont
loopcont:
%d = phi float [ %l2, %loopodd ], [ %l1, %loop]
%o = fadd float %d, %k
%gep.dst = getelementptr inbounds float, ptr %dst, i64 %iv
store float %o, ptr %gep.dst, align 4
%iv.next = add nuw nsw i64 %iv, 1
%ec = icmp eq i64 %iv.next, %N
br i1 %ec, label %exit, label %loop
loopodd:
%l2 = fneg float %l1
br label %loopcont
exit:
ret void
}
@A = external constant [9 x i8], align 1
@B = external constant [8 x i32], align 4
@C = external constant [8 x i32], align 4
define void @early_continue_dep_on_load_large(ptr %p.1, ptr %p.2, i64 %N, i32 %x, i32 %width, i32 %t.1, i32 %t.2) {
; APPLE-LABEL: define void @early_continue_dep_on_load_large(
; APPLE-SAME: ptr [[P_1:%.*]], ptr [[P_2:%.*]], i64 [[N:%.*]], i32 [[X:%.*]], i32 [[WIDTH:%.*]], i32 [[T_1:%.*]], i32 [[T_2:%.*]]) #[[ATTR0]] {
; APPLE-NEXT: [[ENTRY:.*]]:
; APPLE-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
; APPLE-NEXT: [[TMP1:%.*]] = add i64 [[N]], -2
; APPLE-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP0]], 3
; APPLE-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 3
; APPLE-NEXT: br i1 [[TMP2]], label %[[LOOP_HEADER_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
; APPLE: [[ENTRY_NEW]]:
; APPLE-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP0]], [[XTRAITER]]
; APPLE-NEXT: br label %[[LOOP_HEADER:.*]]
; APPLE: [[LOOP_HEADER]]:
; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 1, %[[ENTRY_NEW]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP_LATCH_3:.*]] ]
; APPLE-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[LOOP_LATCH_3]] ]
; APPLE-NEXT: [[GEP:%.*]] = getelementptr { i32, i8, i8, [2 x i8] }, ptr [[P_1]], i64 [[IV]]
; APPLE-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP]], align 4
; APPLE-NEXT: [[C_1:%.*]] = icmp sgt i32 [[L_1]], [[T_1]]
; APPLE-NEXT: br i1 [[C_1]], label %[[THEN:.*]], label %[[LOOP_LATCH:.*]]
; APPLE: [[THEN]]:
; APPLE-NEXT: [[GEP_4:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP]], i64 4
; APPLE-NEXT: [[L_2:%.*]] = load i8, ptr [[GEP_4]], align 4
; APPLE-NEXT: [[C_2:%.*]] = icmp ugt i8 [[L_2]], 7
; APPLE-NEXT: br i1 [[C_2]], label %[[MERGE:.*]], label %[[ELSE:.*]]
; APPLE: [[ELSE]]:
; APPLE-NEXT: [[CONV_I:%.*]] = zext nneg i8 [[L_2]] to i64
; APPLE-NEXT: [[GEP_A:%.*]] = getelementptr inbounds [9 x i8], ptr @A, i64 0, i64 [[CONV_I]]
; APPLE-NEXT: [[L_3:%.*]] = load i8, ptr [[GEP_A]], align 1
; APPLE-NEXT: [[IDXPROM_I:%.*]] = sext i8 [[L_3]] to i64
; APPLE-NEXT: [[GEP_B:%.*]] = getelementptr inbounds [8 x i32], ptr @B, i64 0, i64 [[IDXPROM_I]]
; APPLE-NEXT: [[L_4:%.*]] = load i32, ptr [[GEP_B]], align 4
; APPLE-NEXT: [[GEP_C:%.*]] = getelementptr inbounds [8 x i32], ptr @C, i64 0, i64 [[IDXPROM_I]]
; APPLE-NEXT: [[L_5:%.*]] = load i32, ptr [[GEP_C]], align 4
; APPLE-NEXT: br label %[[MERGE]]
; APPLE: [[MERGE]]:
; APPLE-NEXT: [[MERGE_1:%.*]] = phi i32 [ 0, %[[THEN]] ], [ [[L_4]], %[[ELSE]] ]
; APPLE-NEXT: [[MERGE_2:%.*]] = phi i32 [ 0, %[[THEN]] ], [ [[L_5]], %[[ELSE]] ]
; APPLE-NEXT: [[ADD14:%.*]] = add nsw i32 [[MERGE_2]], [[X]]
; APPLE-NEXT: [[MUL15:%.*]] = mul nsw i32 [[ADD14]], [[WIDTH]]
; APPLE-NEXT: [[TMP3:%.*]] = trunc nuw nsw i64 [[IV]] to i32
; APPLE-NEXT: [[ADD16:%.*]] = add nsw i32 [[MERGE_1]], [[TMP3]]
; APPLE-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD16]], [[MUL15]]
; APPLE-NEXT: [[IDXPROM18:%.*]] = sext i32 [[ADD17]] to i64
; APPLE-NEXT: [[GEP_P_2:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM18]]
; APPLE-NEXT: [[L_6:%.*]] = load i32, ptr [[GEP_P_2]], align 4
; APPLE-NEXT: [[SUB:%.*]] = sub nsw i32 [[X]], [[MERGE_2]]
; APPLE-NEXT: [[MUL21:%.*]] = mul nsw i32 [[SUB]], [[WIDTH]]
; APPLE-NEXT: [[SUB22:%.*]] = sub i32 [[TMP3]], [[MERGE_1]]
; APPLE-NEXT: [[ADD23:%.*]] = add nsw i32 [[SUB22]], [[MUL21]]
; APPLE-NEXT: [[IDXPROM24:%.*]] = sext i32 [[ADD23]] to i64
; APPLE-NEXT: [[GEP_P2_1:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM24]]
; APPLE-NEXT: [[L_7:%.*]] = load i32, ptr [[GEP_P2_1]], align 4
; APPLE-NEXT: [[C_3:%.*]] = icmp sgt i32 [[L_1]], [[L_6]]
; APPLE-NEXT: [[C_4:%.*]] = icmp sgt i32 [[L_1]], [[L_7]]
; APPLE-NEXT: [[AND34:%.*]] = and i1 [[C_3]], [[C_4]]
; APPLE-NEXT: br i1 [[AND34]], label %[[STORE_RES:.*]], label %[[LOOP_LATCH]]
; APPLE: [[STORE_RES]]:
; APPLE-NEXT: [[C_5:%.*]] = icmp sgt i32 [[L_1]], [[T_2]]
; APPLE-NEXT: [[GEP_5:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP]], i64 5
; APPLE-NEXT: [[RES:%.*]] = select i1 [[C_5]], i8 1, i8 2
; APPLE-NEXT: store i8 [[RES]], ptr [[GEP_5]], align 1
; APPLE-NEXT: br label %[[LOOP_LATCH]]
; APPLE: [[LOOP_LATCH]]:
; APPLE-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
; APPLE-NEXT: [[GEP_1:%.*]] = getelementptr { i32, i8, i8, [2 x i8] }, ptr [[P_1]], i64 [[IV_NEXT]]
; APPLE-NEXT: [[L_1_1:%.*]] = load i32, ptr [[GEP_1]], align 4
; APPLE-NEXT: [[C_1_1:%.*]] = icmp sgt i32 [[L_1_1]], [[T_1]]
; APPLE-NEXT: br i1 [[C_1_1]], label %[[THEN_1:.*]], label %[[LOOP_LATCH_1:.*]]
; APPLE: [[THEN_1]]:
; APPLE-NEXT: [[GEP_4_1:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_1]], i64 4
; APPLE-NEXT: [[L_2_1:%.*]] = load i8, ptr [[GEP_4_1]], align 4
; APPLE-NEXT: [[C_2_1:%.*]] = icmp ugt i8 [[L_2_1]], 7
; APPLE-NEXT: br i1 [[C_2_1]], label %[[MERGE_12:.*]], label %[[ELSE_1:.*]]
; APPLE: [[ELSE_1]]:
; APPLE-NEXT: [[CONV_I_1:%.*]] = zext nneg i8 [[L_2_1]] to i64
; APPLE-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds [9 x i8], ptr @A, i64 0, i64 [[CONV_I_1]]
; APPLE-NEXT: [[L_3_1:%.*]] = load i8, ptr [[GEP_A_1]], align 1
; APPLE-NEXT: [[IDXPROM_I_1:%.*]] = sext i8 [[L_3_1]] to i64
; APPLE-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds [8 x i32], ptr @B, i64 0, i64 [[IDXPROM_I_1]]
; APPLE-NEXT: [[L_4_1:%.*]] = load i32, ptr [[GEP_B_1]], align 4
; APPLE-NEXT: [[GEP_C_1:%.*]] = getelementptr inbounds [8 x i32], ptr @C, i64 0, i64 [[IDXPROM_I_1]]
; APPLE-NEXT: [[L_5_1:%.*]] = load i32, ptr [[GEP_C_1]], align 4
; APPLE-NEXT: br label %[[MERGE_12]]
; APPLE: [[MERGE_12]]:
; APPLE-NEXT: [[MERGE_1_1:%.*]] = phi i32 [ 0, %[[THEN_1]] ], [ [[L_4_1]], %[[ELSE_1]] ]
; APPLE-NEXT: [[MERGE_2_1:%.*]] = phi i32 [ 0, %[[THEN_1]] ], [ [[L_5_1]], %[[ELSE_1]] ]
; APPLE-NEXT: [[ADD14_1:%.*]] = add nsw i32 [[MERGE_2_1]], [[X]]
; APPLE-NEXT: [[MUL15_1:%.*]] = mul nsw i32 [[ADD14_1]], [[WIDTH]]
; APPLE-NEXT: [[TMP4:%.*]] = trunc nuw nsw i64 [[IV_NEXT]] to i32
; APPLE-NEXT: [[ADD16_1:%.*]] = add nsw i32 [[MERGE_1_1]], [[TMP4]]
; APPLE-NEXT: [[ADD17_1:%.*]] = add nsw i32 [[ADD16_1]], [[MUL15_1]]
; APPLE-NEXT: [[IDXPROM18_1:%.*]] = sext i32 [[ADD17_1]] to i64
; APPLE-NEXT: [[GEP_P_2_1:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM18_1]]
; APPLE-NEXT: [[L_6_1:%.*]] = load i32, ptr [[GEP_P_2_1]], align 4
; APPLE-NEXT: [[SUB_1:%.*]] = sub nsw i32 [[X]], [[MERGE_2_1]]
; APPLE-NEXT: [[MUL21_1:%.*]] = mul nsw i32 [[SUB_1]], [[WIDTH]]
; APPLE-NEXT: [[SUB22_1:%.*]] = sub i32 [[TMP4]], [[MERGE_1_1]]
; APPLE-NEXT: [[ADD23_1:%.*]] = add nsw i32 [[SUB22_1]], [[MUL21_1]]
; APPLE-NEXT: [[IDXPROM24_1:%.*]] = sext i32 [[ADD23_1]] to i64
; APPLE-NEXT: [[GEP_P2_1_1:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM24_1]]
; APPLE-NEXT: [[L_7_1:%.*]] = load i32, ptr [[GEP_P2_1_1]], align 4
; APPLE-NEXT: [[C_3_1:%.*]] = icmp sgt i32 [[L_1_1]], [[L_6_1]]
; APPLE-NEXT: [[C_4_1:%.*]] = icmp sgt i32 [[L_1_1]], [[L_7_1]]
; APPLE-NEXT: [[AND34_1:%.*]] = and i1 [[C_3_1]], [[C_4_1]]
; APPLE-NEXT: br i1 [[AND34_1]], label %[[STORE_RES_1:.*]], label %[[LOOP_LATCH_1]]
; APPLE: [[STORE_RES_1]]:
; APPLE-NEXT: [[C_5_1:%.*]] = icmp sgt i32 [[L_1_1]], [[T_2]]
; APPLE-NEXT: [[GEP_5_1:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_1]], i64 5
; APPLE-NEXT: [[RES_1:%.*]] = select i1 [[C_5_1]], i8 1, i8 2
; APPLE-NEXT: store i8 [[RES_1]], ptr [[GEP_5_1]], align 1
; APPLE-NEXT: br label %[[LOOP_LATCH_1]]
; APPLE: [[LOOP_LATCH_1]]:
; APPLE-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
; APPLE-NEXT: [[GEP_2:%.*]] = getelementptr { i32, i8, i8, [2 x i8] }, ptr [[P_1]], i64 [[IV_NEXT_1]]
; APPLE-NEXT: [[L_1_2:%.*]] = load i32, ptr [[GEP_2]], align 4
; APPLE-NEXT: [[C_1_2:%.*]] = icmp sgt i32 [[L_1_2]], [[T_1]]
; APPLE-NEXT: br i1 [[C_1_2]], label %[[THEN_2:.*]], label %[[LOOP_LATCH_2:.*]]
; APPLE: [[THEN_2]]:
; APPLE-NEXT: [[GEP_4_2:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_2]], i64 4
; APPLE-NEXT: [[L_2_2:%.*]] = load i8, ptr [[GEP_4_2]], align 4
; APPLE-NEXT: [[C_2_2:%.*]] = icmp ugt i8 [[L_2_2]], 7
; APPLE-NEXT: br i1 [[C_2_2]], label %[[MERGE_23:.*]], label %[[ELSE_2:.*]]
; APPLE: [[ELSE_2]]:
; APPLE-NEXT: [[CONV_I_2:%.*]] = zext nneg i8 [[L_2_2]] to i64
; APPLE-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds [9 x i8], ptr @A, i64 0, i64 [[CONV_I_2]]
; APPLE-NEXT: [[L_3_2:%.*]] = load i8, ptr [[GEP_A_2]], align 1
; APPLE-NEXT: [[IDXPROM_I_2:%.*]] = sext i8 [[L_3_2]] to i64
; APPLE-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds [8 x i32], ptr @B, i64 0, i64 [[IDXPROM_I_2]]
; APPLE-NEXT: [[L_4_2:%.*]] = load i32, ptr [[GEP_B_2]], align 4
; APPLE-NEXT: [[GEP_C_2:%.*]] = getelementptr inbounds [8 x i32], ptr @C, i64 0, i64 [[IDXPROM_I_2]]
; APPLE-NEXT: [[L_5_2:%.*]] = load i32, ptr [[GEP_C_2]], align 4
; APPLE-NEXT: br label %[[MERGE_23]]
; APPLE: [[MERGE_23]]:
; APPLE-NEXT: [[MERGE_1_2:%.*]] = phi i32 [ 0, %[[THEN_2]] ], [ [[L_4_2]], %[[ELSE_2]] ]
; APPLE-NEXT: [[MERGE_2_2:%.*]] = phi i32 [ 0, %[[THEN_2]] ], [ [[L_5_2]], %[[ELSE_2]] ]
; APPLE-NEXT: [[ADD14_2:%.*]] = add nsw i32 [[MERGE_2_2]], [[X]]
; APPLE-NEXT: [[MUL15_2:%.*]] = mul nsw i32 [[ADD14_2]], [[WIDTH]]
; APPLE-NEXT: [[TMP5:%.*]] = trunc nuw nsw i64 [[IV_NEXT_1]] to i32
; APPLE-NEXT: [[ADD16_2:%.*]] = add nsw i32 [[MERGE_1_2]], [[TMP5]]
; APPLE-NEXT: [[ADD17_2:%.*]] = add nsw i32 [[ADD16_2]], [[MUL15_2]]
; APPLE-NEXT: [[IDXPROM18_2:%.*]] = sext i32 [[ADD17_2]] to i64
; APPLE-NEXT: [[GEP_P_2_2:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM18_2]]
; APPLE-NEXT: [[L_6_2:%.*]] = load i32, ptr [[GEP_P_2_2]], align 4
; APPLE-NEXT: [[SUB_2:%.*]] = sub nsw i32 [[X]], [[MERGE_2_2]]
; APPLE-NEXT: [[MUL21_2:%.*]] = mul nsw i32 [[SUB_2]], [[WIDTH]]
; APPLE-NEXT: [[SUB22_2:%.*]] = sub i32 [[TMP5]], [[MERGE_1_2]]
; APPLE-NEXT: [[ADD23_2:%.*]] = add nsw i32 [[SUB22_2]], [[MUL21_2]]
; APPLE-NEXT: [[IDXPROM24_2:%.*]] = sext i32 [[ADD23_2]] to i64
; APPLE-NEXT: [[GEP_P2_1_2:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM24_2]]
; APPLE-NEXT: [[L_7_2:%.*]] = load i32, ptr [[GEP_P2_1_2]], align 4
; APPLE-NEXT: [[C_3_2:%.*]] = icmp sgt i32 [[L_1_2]], [[L_6_2]]
; APPLE-NEXT: [[C_4_2:%.*]] = icmp sgt i32 [[L_1_2]], [[L_7_2]]
; APPLE-NEXT: [[AND34_2:%.*]] = and i1 [[C_3_2]], [[C_4_2]]
; APPLE-NEXT: br i1 [[AND34_2]], label %[[STORE_RES_2:.*]], label %[[LOOP_LATCH_2]]
; APPLE: [[STORE_RES_2]]:
; APPLE-NEXT: [[C_5_2:%.*]] = icmp sgt i32 [[L_1_2]], [[T_2]]
; APPLE-NEXT: [[GEP_5_2:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_2]], i64 5
; APPLE-NEXT: [[RES_2:%.*]] = select i1 [[C_5_2]], i8 1, i8 2
; APPLE-NEXT: store i8 [[RES_2]], ptr [[GEP_5_2]], align 1
; APPLE-NEXT: br label %[[LOOP_LATCH_2]]
; APPLE: [[LOOP_LATCH_2]]:
; APPLE-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
; APPLE-NEXT: [[GEP_3:%.*]] = getelementptr { i32, i8, i8, [2 x i8] }, ptr [[P_1]], i64 [[IV_NEXT_2]]
; APPLE-NEXT: [[L_1_3:%.*]] = load i32, ptr [[GEP_3]], align 4
; APPLE-NEXT: [[C_1_3:%.*]] = icmp sgt i32 [[L_1_3]], [[T_1]]
; APPLE-NEXT: br i1 [[C_1_3]], label %[[THEN_3:.*]], label %[[LOOP_LATCH_3]]
; APPLE: [[THEN_3]]:
; APPLE-NEXT: [[GEP_4_3:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_3]], i64 4
; APPLE-NEXT: [[L_2_3:%.*]] = load i8, ptr [[GEP_4_3]], align 4
; APPLE-NEXT: [[C_2_3:%.*]] = icmp ugt i8 [[L_2_3]], 7
; APPLE-NEXT: br i1 [[C_2_3]], label %[[MERGE_3:.*]], label %[[ELSE_3:.*]]
; APPLE: [[ELSE_3]]:
; APPLE-NEXT: [[CONV_I_3:%.*]] = zext nneg i8 [[L_2_3]] to i64
; APPLE-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds [9 x i8], ptr @A, i64 0, i64 [[CONV_I_3]]
; APPLE-NEXT: [[L_3_3:%.*]] = load i8, ptr [[GEP_A_3]], align 1
; APPLE-NEXT: [[IDXPROM_I_3:%.*]] = sext i8 [[L_3_3]] to i64
; APPLE-NEXT: [[GEP_B_3:%.*]] = getelementptr inbounds [8 x i32], ptr @B, i64 0, i64 [[IDXPROM_I_3]]
; APPLE-NEXT: [[L_4_3:%.*]] = load i32, ptr [[GEP_B_3]], align 4
; APPLE-NEXT: [[GEP_C_3:%.*]] = getelementptr inbounds [8 x i32], ptr @C, i64 0, i64 [[IDXPROM_I_3]]
; APPLE-NEXT: [[L_5_3:%.*]] = load i32, ptr [[GEP_C_3]], align 4
; APPLE-NEXT: br label %[[MERGE_3]]
; APPLE: [[MERGE_3]]:
; APPLE-NEXT: [[MERGE_1_3:%.*]] = phi i32 [ 0, %[[THEN_3]] ], [ [[L_4_3]], %[[ELSE_3]] ]
; APPLE-NEXT: [[MERGE_2_3:%.*]] = phi i32 [ 0, %[[THEN_3]] ], [ [[L_5_3]], %[[ELSE_3]] ]
; APPLE-NEXT: [[ADD14_3:%.*]] = add nsw i32 [[MERGE_2_3]], [[X]]
; APPLE-NEXT: [[MUL15_3:%.*]] = mul nsw i32 [[ADD14_3]], [[WIDTH]]
; APPLE-NEXT: [[TMP6:%.*]] = trunc nuw nsw i64 [[IV_NEXT_2]] to i32
; APPLE-NEXT: [[ADD16_3:%.*]] = add nsw i32 [[MERGE_1_3]], [[TMP6]]
; APPLE-NEXT: [[ADD17_3:%.*]] = add nsw i32 [[ADD16_3]], [[MUL15_3]]
; APPLE-NEXT: [[IDXPROM18_3:%.*]] = sext i32 [[ADD17_3]] to i64
; APPLE-NEXT: [[GEP_P_2_3:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM18_3]]
; APPLE-NEXT: [[L_6_3:%.*]] = load i32, ptr [[GEP_P_2_3]], align 4
; APPLE-NEXT: [[SUB_3:%.*]] = sub nsw i32 [[X]], [[MERGE_2_3]]
; APPLE-NEXT: [[MUL21_3:%.*]] = mul nsw i32 [[SUB_3]], [[WIDTH]]
; APPLE-NEXT: [[SUB22_3:%.*]] = sub i32 [[TMP6]], [[MERGE_1_3]]
; APPLE-NEXT: [[ADD23_3:%.*]] = add nsw i32 [[SUB22_3]], [[MUL21_3]]
; APPLE-NEXT: [[IDXPROM24_3:%.*]] = sext i32 [[ADD23_3]] to i64
; APPLE-NEXT: [[GEP_P2_1_3:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM24_3]]
; APPLE-NEXT: [[L_7_3:%.*]] = load i32, ptr [[GEP_P2_1_3]], align 4
; APPLE-NEXT: [[C_3_3:%.*]] = icmp sgt i32 [[L_1_3]], [[L_6_3]]
; APPLE-NEXT: [[C_4_3:%.*]] = icmp sgt i32 [[L_1_3]], [[L_7_3]]
; APPLE-NEXT: [[AND34_3:%.*]] = and i1 [[C_3_3]], [[C_4_3]]
; APPLE-NEXT: br i1 [[AND34_3]], label %[[STORE_RES_3:.*]], label %[[LOOP_LATCH_3]]
; APPLE: [[STORE_RES_3]]:
; APPLE-NEXT: [[C_5_3:%.*]] = icmp sgt i32 [[L_1_3]], [[T_2]]
; APPLE-NEXT: [[GEP_5_3:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_3]], i64 5
; APPLE-NEXT: [[RES_3:%.*]] = select i1 [[C_5_3]], i8 1, i8 2
; APPLE-NEXT: store i8 [[RES_3]], ptr [[GEP_5_3]], align 1
; APPLE-NEXT: br label %[[LOOP_LATCH_3]]
; APPLE: [[LOOP_LATCH_3]]:
; APPLE-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
; APPLE-NEXT: [[NITER_NEXT_3]] = add i64 [[NITER]], 4
; APPLE-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
; APPLE-NEXT: br i1 [[NITER_NCMP_3]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP_HEADER]]
; APPLE: [[EXIT_UNR_LCSSA]]:
; APPLE-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_3]], %[[LOOP_LATCH_3]] ]
; APPLE-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
; APPLE-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_HEADER_EPIL_PREHEADER]], label %[[EXIT:.*]]
; APPLE: [[LOOP_HEADER_EPIL_PREHEADER]]:
; APPLE-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
; APPLE-NEXT: [[LCMP_MOD1:%.*]] = icmp ne i64 [[XTRAITER]], 0
; APPLE-NEXT: call void @llvm.assume(i1 [[LCMP_MOD1]])
; APPLE-NEXT: br label %[[LOOP_HEADER_EPIL:.*]]
; APPLE: [[LOOP_HEADER_EPIL]]:
; APPLE-NEXT: [[IV_EPIL:%.*]] = phi i64 [ [[IV_EPIL_INIT]], %[[LOOP_HEADER_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP_LATCH_EPIL:.*]] ]
; APPLE-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_HEADER_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[LOOP_LATCH_EPIL]] ]
; APPLE-NEXT: [[GEP_EPIL:%.*]] = getelementptr { i32, i8, i8, [2 x i8] }, ptr [[P_1]], i64 [[IV_EPIL]]
; APPLE-NEXT: [[L_1_EPIL:%.*]] = load i32, ptr [[GEP_EPIL]], align 4
; APPLE-NEXT: [[C_1_EPIL:%.*]] = icmp sgt i32 [[L_1_EPIL]], [[T_1]]
; APPLE-NEXT: br i1 [[C_1_EPIL]], label %[[THEN_EPIL:.*]], label %[[LOOP_LATCH_EPIL]]
; APPLE: [[THEN_EPIL]]:
; APPLE-NEXT: [[GEP_4_EPIL:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_EPIL]], i64 4
; APPLE-NEXT: [[L_2_EPIL:%.*]] = load i8, ptr [[GEP_4_EPIL]], align 4
; APPLE-NEXT: [[C_2_EPIL:%.*]] = icmp ugt i8 [[L_2_EPIL]], 7
; APPLE-NEXT: br i1 [[C_2_EPIL]], label %[[MERGE_EPIL:.*]], label %[[ELSE_EPIL:.*]]
; APPLE: [[ELSE_EPIL]]:
; APPLE-NEXT: [[CONV_I_EPIL:%.*]] = zext nneg i8 [[L_2_EPIL]] to i64
; APPLE-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds [9 x i8], ptr @A, i64 0, i64 [[CONV_I_EPIL]]
; APPLE-NEXT: [[L_3_EPIL:%.*]] = load i8, ptr [[GEP_A_EPIL]], align 1
; APPLE-NEXT: [[IDXPROM_I_EPIL:%.*]] = sext i8 [[L_3_EPIL]] to i64
; APPLE-NEXT: [[GEP_B_EPIL:%.*]] = getelementptr inbounds [8 x i32], ptr @B, i64 0, i64 [[IDXPROM_I_EPIL]]
; APPLE-NEXT: [[L_4_EPIL:%.*]] = load i32, ptr [[GEP_B_EPIL]], align 4
; APPLE-NEXT: [[GEP_C_EPIL:%.*]] = getelementptr inbounds [8 x i32], ptr @C, i64 0, i64 [[IDXPROM_I_EPIL]]
; APPLE-NEXT: [[L_5_EPIL:%.*]] = load i32, ptr [[GEP_C_EPIL]], align 4
; APPLE-NEXT: br label %[[MERGE_EPIL]]
; APPLE: [[MERGE_EPIL]]:
; APPLE-NEXT: [[MERGE_1_EPIL:%.*]] = phi i32 [ 0, %[[THEN_EPIL]] ], [ [[L_4_EPIL]], %[[ELSE_EPIL]] ]
; APPLE-NEXT: [[MERGE_2_EPIL:%.*]] = phi i32 [ 0, %[[THEN_EPIL]] ], [ [[L_5_EPIL]], %[[ELSE_EPIL]] ]
; APPLE-NEXT: [[ADD14_EPIL:%.*]] = add nsw i32 [[MERGE_2_EPIL]], [[X]]
; APPLE-NEXT: [[MUL15_EPIL:%.*]] = mul nsw i32 [[ADD14_EPIL]], [[WIDTH]]
; APPLE-NEXT: [[TMP7:%.*]] = trunc nuw nsw i64 [[IV_EPIL]] to i32
; APPLE-NEXT: [[ADD16_EPIL:%.*]] = add nsw i32 [[MERGE_1_EPIL]], [[TMP7]]
; APPLE-NEXT: [[ADD17_EPIL:%.*]] = add nsw i32 [[ADD16_EPIL]], [[MUL15_EPIL]]
; APPLE-NEXT: [[IDXPROM18_EPIL:%.*]] = sext i32 [[ADD17_EPIL]] to i64
; APPLE-NEXT: [[GEP_P_2_EPIL:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM18_EPIL]]
; APPLE-NEXT: [[L_6_EPIL:%.*]] = load i32, ptr [[GEP_P_2_EPIL]], align 4
; APPLE-NEXT: [[SUB_EPIL:%.*]] = sub nsw i32 [[X]], [[MERGE_2_EPIL]]
; APPLE-NEXT: [[MUL21_EPIL:%.*]] = mul nsw i32 [[SUB_EPIL]], [[WIDTH]]
; APPLE-NEXT: [[SUB22_EPIL:%.*]] = sub i32 [[TMP7]], [[MERGE_1_EPIL]]
; APPLE-NEXT: [[ADD23_EPIL:%.*]] = add nsw i32 [[SUB22_EPIL]], [[MUL21_EPIL]]
; APPLE-NEXT: [[IDXPROM24_EPIL:%.*]] = sext i32 [[ADD23_EPIL]] to i64
; APPLE-NEXT: [[GEP_P2_1_EPIL:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM24_EPIL]]
; APPLE-NEXT: [[L_7_EPIL:%.*]] = load i32, ptr [[GEP_P2_1_EPIL]], align 4
; APPLE-NEXT: [[C_3_EPIL:%.*]] = icmp sgt i32 [[L_1_EPIL]], [[L_6_EPIL]]
; APPLE-NEXT: [[C_4_EPIL:%.*]] = icmp sgt i32 [[L_1_EPIL]], [[L_7_EPIL]]
; APPLE-NEXT: [[AND34_EPIL:%.*]] = and i1 [[C_3_EPIL]], [[C_4_EPIL]]
; APPLE-NEXT: br i1 [[AND34_EPIL]], label %[[STORE_RES_EPIL:.*]], label %[[LOOP_LATCH_EPIL]]
; APPLE: [[STORE_RES_EPIL]]:
; APPLE-NEXT: [[C_5_EPIL:%.*]] = icmp sgt i32 [[L_1_EPIL]], [[T_2]]
; APPLE-NEXT: [[GEP_5_EPIL:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP_EPIL]], i64 5
; APPLE-NEXT: [[RES_EPIL:%.*]] = select i1 [[C_5_EPIL]], i8 1, i8 2
; APPLE-NEXT: store i8 [[RES_EPIL]], ptr [[GEP_5_EPIL]], align 1
; APPLE-NEXT: br label %[[LOOP_LATCH_EPIL]]
; APPLE: [[LOOP_LATCH_EPIL]]:
; APPLE-NEXT: [[IV_NEXT_EPIL]] = add nuw nsw i64 [[IV_EPIL]], 1
; APPLE-NEXT: [[EC_EPIL:%.*]] = icmp eq i64 [[IV_NEXT_EPIL]], [[N]]
; APPLE-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
; APPLE-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
; APPLE-NEXT: br i1 [[EPIL_ITER_CMP]], label %[[LOOP_HEADER_EPIL]], label %[[EXIT_EPILOG_LCSSA:.*]], !llvm.loop [[LOOP2:![0-9]+]]
; APPLE: [[EXIT_EPILOG_LCSSA]]:
; APPLE-NEXT: br label %[[EXIT]]
; APPLE: [[EXIT]]:
; APPLE-NEXT: ret void
;
; OTHER-LABEL: define void @early_continue_dep_on_load_large(
; OTHER-SAME: ptr [[P_1:%.*]], ptr [[P_2:%.*]], i64 [[N:%.*]], i32 [[X:%.*]], i32 [[WIDTH:%.*]], i32 [[T_1:%.*]], i32 [[T_2:%.*]]) #[[ATTR0]] {
; OTHER-NEXT: [[ENTRY:.*]]:
; OTHER-NEXT: br label %[[LOOP_HEADER:.*]]
; OTHER: [[LOOP_HEADER]]:
; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 1, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; OTHER-NEXT: [[GEP:%.*]] = getelementptr { i32, i8, i8, [2 x i8] }, ptr [[P_1]], i64 [[IV]]
; OTHER-NEXT: [[L_1:%.*]] = load i32, ptr [[GEP]], align 4
; OTHER-NEXT: [[CMP6_NOT:%.*]] = icmp sgt i32 [[L_1]], [[T_1]]
; OTHER-NEXT: br i1 [[CMP6_NOT]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
; OTHER: [[THEN]]:
; OTHER-NEXT: [[GEP_4:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP]], i64 4
; OTHER-NEXT: [[L_2:%.*]] = load i8, ptr [[GEP_4]], align 4
; OTHER-NEXT: [[OR_COND:%.*]] = icmp ugt i8 [[L_2]], 7
; OTHER-NEXT: br i1 [[OR_COND]], label %[[MERGE:.*]], label %[[ELSE:.*]]
; OTHER: [[ELSE]]:
; OTHER-NEXT: [[CONV_I:%.*]] = zext nneg i8 [[L_2]] to i64
; OTHER-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [9 x i8], ptr @A, i64 0, i64 [[CONV_I]]
; OTHER-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX_I]], align 1
; OTHER-NEXT: [[IDXPROM_I:%.*]] = sext i8 [[TMP0]] to i64
; OTHER-NEXT: [[ARRAYIDX_I37:%.*]] = getelementptr inbounds [8 x i32], ptr @B, i64 0, i64 [[IDXPROM_I]]
; OTHER-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX_I37]], align 4
; OTHER-NEXT: [[ARRAYIDX_I42:%.*]] = getelementptr inbounds [8 x i32], ptr @C, i64 0, i64 [[IDXPROM_I]]
; OTHER-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX_I42]], align 4
; OTHER-NEXT: br label %[[MERGE]]
; OTHER: [[MERGE]]:
; OTHER-NEXT: [[RETVAL_0_I3851:%.*]] = phi i32 [ 0, %[[THEN]] ], [ [[TMP1]], %[[ELSE]] ]
; OTHER-NEXT: [[RETVAL_0_I43:%.*]] = phi i32 [ 0, %[[THEN]] ], [ [[TMP2]], %[[ELSE]] ]
; OTHER-NEXT: [[ADD14:%.*]] = add nsw i32 [[RETVAL_0_I43]], [[X]]
; OTHER-NEXT: [[MUL15:%.*]] = mul nsw i32 [[ADD14]], [[WIDTH]]
; OTHER-NEXT: [[TMP3:%.*]] = trunc nuw nsw i64 [[IV]] to i32
; OTHER-NEXT: [[ADD16:%.*]] = add nsw i32 [[RETVAL_0_I3851]], [[TMP3]]
; OTHER-NEXT: [[ADD17:%.*]] = add nsw i32 [[ADD16]], [[MUL15]]
; OTHER-NEXT: [[IDXPROM18:%.*]] = sext i32 [[ADD17]] to i64
; OTHER-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM18]]
; OTHER-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4
; OTHER-NEXT: [[SUB:%.*]] = sub nsw i32 [[X]], [[RETVAL_0_I43]]
; OTHER-NEXT: [[MUL21:%.*]] = mul nsw i32 [[SUB]], [[WIDTH]]
; OTHER-NEXT: [[SUB22:%.*]] = sub i32 [[TMP3]], [[RETVAL_0_I3851]]
; OTHER-NEXT: [[ADD23:%.*]] = add nsw i32 [[SUB22]], [[MUL21]]
; OTHER-NEXT: [[IDXPROM24:%.*]] = sext i32 [[ADD23]] to i64
; OTHER-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr [[P_2]], i64 [[IDXPROM24]]
; OTHER-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX25]], align 4
; OTHER-NEXT: [[CMP27:%.*]] = icmp sgt i32 [[L_1]], [[TMP4]]
; OTHER-NEXT: [[CMP28:%.*]] = icmp sgt i32 [[L_1]], [[TMP5]]
; OTHER-NEXT: [[AND34:%.*]] = and i1 [[CMP27]], [[CMP28]]
; OTHER-NEXT: br i1 [[AND34]], label %[[STORE_RES:.*]], label %[[LOOP_LATCH]]
; OTHER: [[STORE_RES]]:
; OTHER-NEXT: [[CMP32:%.*]] = icmp sgt i32 [[L_1]], [[T_2]]
; OTHER-NEXT: [[GEP_5:%.*]] = getelementptr inbounds nuw i8, ptr [[GEP]], i64 5
; OTHER-NEXT: [[RES:%.*]] = select i1 [[CMP32]], i8 1, i8 2
; OTHER-NEXT: store i8 [[RES]], ptr [[GEP_5]], align 1
; OTHER-NEXT: br label %[[LOOP_LATCH]]
; OTHER: [[LOOP_LATCH]]:
; OTHER-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; OTHER-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; OTHER-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
; OTHER: [[EXIT]]:
; OTHER-NEXT: ret void
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 1, %entry ], [ %iv.next, %loop.latch ]
%gep = getelementptr { i32, i8, i8, [2 x i8] }, ptr %p.1, i64 %iv
%l.1 = load i32, ptr %gep, align 4
%c.1 = icmp sgt i32 %l.1, %t.1
br i1 %c.1, label %then, label %loop.latch
then:
%gep.4 = getelementptr inbounds nuw i8, ptr %gep, i64 4
%l.2 = load i8, ptr %gep.4, align 4
%c.2 = icmp ugt i8 %l.2, 7
br i1 %c.2, label %merge, label %else
else:
%conv.i = zext nneg i8 %l.2 to i64
%gep.A = getelementptr inbounds [9 x i8], ptr @A, i64 0, i64 %conv.i
%l.3 = load i8, ptr %gep.A, align 1
%idxprom.i = sext i8 %l.3 to i64
%gep.B = getelementptr inbounds [8 x i32], ptr @B, i64 0, i64 %idxprom.i
%l.4 = load i32, ptr %gep.B, align 4
%gep.C = getelementptr inbounds [8 x i32], ptr @C, i64 0, i64 %idxprom.i
%l.5 = load i32, ptr %gep.C, align 4
br label %merge
merge:
%merge.1 = phi i32 [ 0, %then ], [ %l.4, %else ]
%merge.2 = phi i32 [ 0, %then ], [ %l.5, %else ]
%add14 = add nsw i32 %merge.2, %x
%mul15 = mul nsw i32 %add14, %width
%12 = trunc nuw nsw i64 %iv to i32
%add16 = add nsw i32 %merge.1, %12
%add17 = add nsw i32 %add16, %mul15
%idxprom18 = sext i32 %add17 to i64
%gep.p.2 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %p.2, i64 %idxprom18
%l.6 = load i32, ptr %gep.p.2, align 4
%sub = sub nsw i32 %x, %merge.2
%mul21 = mul nsw i32 %sub, %width
%sub22 = sub i32 %12, %merge.1
%add23 = add nsw i32 %sub22, %mul21
%idxprom24 = sext i32 %add23 to i64
%gep.p2.1 = getelementptr inbounds { i32, i8, i8, [2 x i8] }, ptr %p.2, i64 %idxprom24
%l.7 = load i32, ptr %gep.p2.1, align 4
%c.3 = icmp sgt i32 %l.1, %l.6
%c.4 = icmp sgt i32 %l.1, %l.7
%and34 = and i1 %c.3, %c.4
br i1 %and34, label %store.res, label %loop.latch
store.res:
%c.5 = icmp sgt i32 %l.1, %t.2
%gep.5 = getelementptr inbounds nuw i8, ptr %gep, i64 5
%res = select i1 %c.5, i8 1, i8 2
store i8 %res, ptr %gep.5, align 1
br label %loop.latch
loop.latch:
%iv.next = add nuw nsw i64 %iv, 1
%ec = icmp eq i64 %iv.next, %N
br i1 %ec, label %exit, label %loop.header
exit:
ret void
}
define i32 @test_add_reduction_unroll_partial(ptr %a, i64 noundef %n) {
; APPLE-LABEL: define i32 @test_add_reduction_unroll_partial(
; APPLE-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
; APPLE-NEXT: [[ENTRY:.*]]:
; APPLE-NEXT: br label %[[LOOP:.*]]
; APPLE: [[LOOP]]:
; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
; APPLE-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
; APPLE-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
; APPLE-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
; APPLE-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
; APPLE-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
; APPLE-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
; APPLE-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[TMP0]]
; APPLE-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
; APPLE-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]]
; APPLE-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP_A_1]], align 2
; APPLE-NEXT: [[RDX_NEXT_1]] = add i32 [[RDX_1]], [[TMP1]]
; APPLE-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
; APPLE-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]]
; APPLE-NEXT: [[TMP2:%.*]] = load i32, ptr [[GEP_A_2]], align 2
; APPLE-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_2]], [[TMP2]]
; APPLE-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
; APPLE-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]]
; APPLE-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_3]], align 2
; APPLE-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_3]], [[TMP3]]
; APPLE-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
; APPLE-NEXT: [[EC_3:%.*]] = icmp eq i64 [[IV_NEXT_3]], 1024
; APPLE-NEXT: br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP]]
; APPLE: [[EXIT]]:
; APPLE-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
; APPLE-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]]
; APPLE-NEXT: [[BIN_RDX1:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]]
; APPLE-NEXT: [[BIN_RDX2:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX1]]
; APPLE-NEXT: ret i32 [[BIN_RDX2]]
;
; OTHER-LABEL: define i32 @test_add_reduction_unroll_partial(
; OTHER-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
; OTHER-NEXT: [[ENTRY:.*]]:
; OTHER-NEXT: br label %[[LOOP:.*]]
; OTHER: [[LOOP]]:
; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
; OTHER-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
; OTHER-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
; OTHER-NEXT: [[RDX_NEXT:%.*]] = add nuw nsw i32 [[RDX]], [[TMP0]]
; OTHER-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
; OTHER-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]]
; OTHER-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP_A_1]], align 2
; OTHER-NEXT: [[RDX_NEXT_1:%.*]] = add nuw nsw i32 [[RDX_NEXT]], [[TMP1]]
; OTHER-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
; OTHER-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]]
; OTHER-NEXT: [[TMP2:%.*]] = load i32, ptr [[GEP_A_2]], align 2
; OTHER-NEXT: [[RDX_NEXT_2:%.*]] = add nuw nsw i32 [[RDX_NEXT_1]], [[TMP2]]
; OTHER-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
; OTHER-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]]
; OTHER-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_3]], align 2
; OTHER-NEXT: [[RDX_NEXT_3]] = add nuw nsw i32 [[RDX_NEXT_2]], [[TMP3]]
; OTHER-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
; OTHER-NEXT: [[EC_3:%.*]] = icmp eq i64 [[IV_NEXT_3]], 1024
; OTHER-NEXT: br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP]]
; OTHER: [[EXIT]]:
; OTHER-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
; OTHER-NEXT: ret i32 [[RES]]
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%rdx = phi i32 [ 0, %entry ], [ %rdx.next, %loop ]
%gep.a = getelementptr inbounds nuw i32, ptr %a, i64 %iv
%1 = load i32, ptr %gep.a, align 2
%rdx.next = add nuw nsw i32 %rdx, %1
%iv.next = add nuw nsw i64 %iv, 1
%ec = icmp eq i64 %iv.next, 1024
br i1 %ec, label %exit, label %loop
exit:
%res = phi i32 [ %rdx.next, %loop ]
ret i32 %res
}
declare i1 @cond()
define i32 @test_add_reduction_multi_block(ptr %a, i64 noundef %n) {
; APPLE-LABEL: define i32 @test_add_reduction_multi_block(
; APPLE-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
; APPLE-NEXT: [[ENTRY:.*]]:
; APPLE-NEXT: br label %[[LOOP:.*]]
; APPLE: [[LOOP]]:
; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; APPLE-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP_LATCH]] ]
; APPLE-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
; APPLE-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
; APPLE-NEXT: [[C:%.*]] = call i1 @cond()
; APPLE-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
; APPLE: [[THEN]]:
; APPLE-NEXT: store i32 0, ptr [[GEP_A]], align 4
; APPLE-NEXT: br label %[[LOOP_LATCH]]
; APPLE: [[LOOP_LATCH]]:
; APPLE-NEXT: [[RDX_NEXT]] = add nuw nsw i32 [[RDX]], [[TMP0]]
; APPLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; APPLE-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; APPLE-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
; APPLE: [[EXIT]]:
; APPLE-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP_LATCH]] ]
; APPLE-NEXT: ret i32 [[RES]]
;
; OTHER-LABEL: define i32 @test_add_reduction_multi_block(
; OTHER-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
; OTHER-NEXT: [[ENTRY:.*]]:
; OTHER-NEXT: br label %[[LOOP:.*]]
; OTHER: [[LOOP]]:
; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP_LATCH]] ]
; OTHER-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
; OTHER-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
; OTHER-NEXT: [[C:%.*]] = call i1 @cond()
; OTHER-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
; OTHER: [[THEN]]:
; OTHER-NEXT: store i32 0, ptr [[GEP_A]], align 4
; OTHER-NEXT: br label %[[LOOP_LATCH]]
; OTHER: [[LOOP_LATCH]]:
; OTHER-NEXT: [[RDX_NEXT]] = add nuw nsw i32 [[RDX]], [[TMP0]]
; OTHER-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; OTHER-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; OTHER-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
; OTHER: [[EXIT]]:
; OTHER-NEXT: [[RES:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP_LATCH]] ]
; OTHER-NEXT: ret i32 [[RES]]
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
%rdx = phi i32 [ 0, %entry ], [ %rdx.next, %loop.latch ]
%gep.a = getelementptr inbounds nuw i32, ptr %a, i64 %iv
%1 = load i32, ptr %gep.a, align 2
%c = call i1 @cond()
br i1 %c, label %then, label %loop.latch
then:
store i32 0, ptr %gep.a
br label %loop.latch
loop.latch:
%rdx.next = add nuw nsw i32 %rdx, %1
%iv.next = add nuw nsw i64 %iv, 1
%ec = icmp eq i64 %iv.next, 1024
br i1 %ec, label %exit, label %loop
exit:
%res = phi i32 [ %rdx.next, %loop.latch ]
ret i32 %res
}
define i32 @test_add_and_mul_reduction_unroll_partial(ptr %a, i64 noundef %n) {
; APPLE-LABEL: define i32 @test_add_and_mul_reduction_unroll_partial(
; APPLE-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
; APPLE-NEXT: [[ENTRY:.*]]:
; APPLE-NEXT: br label %[[LOOP:.*]]
; APPLE: [[LOOP]]:
; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
; APPLE-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
; APPLE-NEXT: [[RDX_21:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
; APPLE-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
; APPLE-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
; APPLE-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_2_NEXT_3:%.*]], %[[LOOP]] ]
; APPLE-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
; APPLE-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
; APPLE-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[TMP0]]
; APPLE-NEXT: [[RDX_2_NEXT:%.*]] = mul i32 [[RDX_2]], [[TMP0]]
; APPLE-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
; APPLE-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]]
; APPLE-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP_A_1]], align 2
; APPLE-NEXT: [[RDX_NEXT_1]] = add i32 [[RDX_1]], [[TMP1]]
; APPLE-NEXT: [[RDX_2_NEXT_1:%.*]] = mul i32 [[RDX_2_NEXT]], [[TMP1]]
; APPLE-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
; APPLE-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]]
; APPLE-NEXT: [[TMP2:%.*]] = load i32, ptr [[GEP_A_2]], align 2
; APPLE-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_21]], [[TMP2]]
; APPLE-NEXT: [[RDX_2_NEXT_2:%.*]] = mul i32 [[RDX_2_NEXT_1]], [[TMP2]]
; APPLE-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
; APPLE-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]]
; APPLE-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_3]], align 2
; APPLE-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_3]], [[TMP3]]
; APPLE-NEXT: [[RDX_2_NEXT_3]] = mul i32 [[RDX_2_NEXT_2]], [[TMP3]]
; APPLE-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
; APPLE-NEXT: [[EC_3:%.*]] = icmp eq i64 [[IV_NEXT_3]], 1024
; APPLE-NEXT: br i1 [[EC_3]], label %[[EXIT:.*]], label %[[LOOP]]
; APPLE: [[EXIT]]:
; APPLE-NEXT: [[RES_1:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
; APPLE-NEXT: [[RES_2:%.*]] = phi i32 [ [[RDX_2_NEXT_3]], %[[LOOP]] ]
; APPLE-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]]
; APPLE-NEXT: [[BIN_RDX2:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]]
; APPLE-NEXT: [[BIN_RDX3:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX2]]
; APPLE-NEXT: [[SUM:%.*]] = add i32 [[BIN_RDX3]], [[RES_2]]
; APPLE-NEXT: ret i32 [[SUM]]
;
; OTHER-LABEL: define i32 @test_add_and_mul_reduction_unroll_partial(
; OTHER-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
; OTHER-NEXT: [[ENTRY:.*]]:
; OTHER-NEXT: br label %[[LOOP:.*]]
; OTHER: [[LOOP]]:
; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
; OTHER-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_2_NEXT_1:%.*]], %[[LOOP]] ]
; OTHER-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
; OTHER-NEXT: [[TMP0:%.*]] = load i32, ptr [[GEP_A]], align 2
; OTHER-NEXT: [[RDX_NEXT:%.*]] = add nuw nsw i32 [[RDX]], [[TMP0]]
; OTHER-NEXT: [[RDX_2_NEXT:%.*]] = mul i32 [[RDX_2]], [[TMP0]]
; OTHER-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
; OTHER-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]]
; OTHER-NEXT: [[TMP1:%.*]] = load i32, ptr [[GEP_A_1]], align 2
; OTHER-NEXT: [[RDX_NEXT_1]] = add nuw nsw i32 [[RDX_NEXT]], [[TMP1]]
; OTHER-NEXT: [[RDX_2_NEXT_1]] = mul i32 [[RDX_2_NEXT]], [[TMP1]]
; OTHER-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
; OTHER-NEXT: [[EC_1:%.*]] = icmp eq i64 [[IV_NEXT_1]], 1024
; OTHER-NEXT: br i1 [[EC_1]], label %[[EXIT:.*]], label %[[LOOP]]
; OTHER: [[EXIT]]:
; OTHER-NEXT: [[RES_1:%.*]] = phi i32 [ [[RDX_NEXT_1]], %[[LOOP]] ]
; OTHER-NEXT: [[RES_2:%.*]] = phi i32 [ [[RDX_2_NEXT_1]], %[[LOOP]] ]
; OTHER-NEXT: [[SUM:%.*]] = add i32 [[RES_1]], [[RES_2]]
; OTHER-NEXT: ret i32 [[SUM]]
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%rdx = phi i32 [ 0, %entry ], [ %rdx.next, %loop ]
%rdx.2 = phi i32 [ 0, %entry ], [ %rdx.2.next, %loop ]
%gep.a = getelementptr inbounds nuw i32, ptr %a, i64 %iv
%1 = load i32, ptr %gep.a, align 2
%rdx.next = add nuw nsw i32 %rdx, %1
%rdx.2.next = mul i32 %rdx.2, %1
%iv.next = add nuw nsw i64 %iv, 1
%ec = icmp eq i64 %iv.next, 1024
br i1 %ec, label %exit, label %loop
exit:
%res.1 = phi i32 [ %rdx.next, %loop ]
%res.2 = phi i32 [ %rdx.2.next, %loop ]
%sum = add i32 %res.1, %res.2
ret i32 %sum
}
define i32 @test_add_reduction_runtime(ptr %a, i64 noundef %n) {
; APPLE-LABEL: define i32 @test_add_reduction_runtime(
; APPLE-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
; APPLE-NEXT: [[ENTRY:.*]]:
; APPLE-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
; APPLE-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 3
; APPLE-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 3
; APPLE-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
; APPLE: [[ENTRY_NEW]]:
; APPLE-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
; APPLE-NEXT: br label %[[LOOP:.*]]
; APPLE: [[LOOP]]:
; APPLE-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
; APPLE-NEXT: [[RDX_1:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
; APPLE-NEXT: [[RDX_2:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
; APPLE-NEXT: [[RDX_3:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
; APPLE-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
; APPLE-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[LOOP]] ]
; APPLE-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
; APPLE-NEXT: [[TMP2:%.*]] = load i32, ptr [[GEP_A]], align 2
; APPLE-NEXT: [[RDX_NEXT]] = add i32 [[RDX]], [[TMP2]]
; APPLE-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
; APPLE-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]]
; APPLE-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_1]], align 2
; APPLE-NEXT: [[RDX_NEXT_1]] = add i32 [[RDX_1]], [[TMP3]]
; APPLE-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
; APPLE-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]]
; APPLE-NEXT: [[TMP4:%.*]] = load i32, ptr [[GEP_A_2]], align 2
; APPLE-NEXT: [[RDX_NEXT_2]] = add i32 [[RDX_2]], [[TMP4]]
; APPLE-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
; APPLE-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]]
; APPLE-NEXT: [[TMP5:%.*]] = load i32, ptr [[GEP_A_3]], align 2
; APPLE-NEXT: [[RDX_NEXT_3]] = add i32 [[RDX_3]], [[TMP5]]
; APPLE-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
; APPLE-NEXT: [[NITER_NEXT_3]] = add nuw i64 [[NITER]], 4
; APPLE-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
; APPLE-NEXT: br i1 [[NITER_NCMP_3]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]]
; APPLE: [[EXIT_UNR_LCSSA]]:
; APPLE-NEXT: [[RES_PH:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
; APPLE-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_3]], %[[LOOP]] ]
; APPLE-NEXT: [[RDX_UNR:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
; APPLE-NEXT: [[BIN_RDX:%.*]] = add i32 [[RDX_NEXT_1]], [[RDX_NEXT]]
; APPLE-NEXT: [[BIN_RDX3:%.*]] = add i32 [[RDX_NEXT_2]], [[BIN_RDX]]
; APPLE-NEXT: [[BIN_RDX4:%.*]] = add i32 [[RDX_NEXT_3]], [[BIN_RDX3]]
; APPLE-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
; APPLE-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
; APPLE: [[LOOP_EPIL_PREHEADER]]:
; APPLE-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
; APPLE-NEXT: [[RDX_EPIL_INIT:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[BIN_RDX4]], %[[EXIT_UNR_LCSSA]] ]
; APPLE-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
; APPLE-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]])
; APPLE-NEXT: br label %[[LOOP_EPIL:.*]]
; APPLE: [[LOOP_EPIL]]:
; APPLE-NEXT: [[IV_EPIL:%.*]] = phi i64 [ [[IV_EPIL_INIT]], %[[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
; APPLE-NEXT: [[RDX_EPIL:%.*]] = phi i32 [ [[RDX_EPIL_INIT]], %[[LOOP_EPIL_PREHEADER]] ], [ [[RDX_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
; APPLE-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[LOOP_EPIL]] ]
; APPLE-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_EPIL]]
; APPLE-NEXT: [[TMP6:%.*]] = load i32, ptr [[GEP_A_EPIL]], align 2
; APPLE-NEXT: [[RDX_NEXT_EPIL]] = add nuw nsw i32 [[RDX_EPIL]], [[TMP6]]
; APPLE-NEXT: [[IV_NEXT_EPIL]] = add nuw nsw i64 [[IV_EPIL]], 1
; APPLE-NEXT: [[EC_EPIL:%.*]] = icmp eq i64 [[IV_NEXT_EPIL]], [[N]]
; APPLE-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
; APPLE-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
; APPLE-NEXT: br i1 [[EPIL_ITER_CMP]], label %[[LOOP_EPIL]], label %[[EXIT_EPILOG_LCSSA:.*]], !llvm.loop [[LOOP3:![0-9]+]]
; APPLE: [[EXIT_EPILOG_LCSSA]]:
; APPLE-NEXT: [[RES_PH1:%.*]] = phi i32 [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
; APPLE-NEXT: br label %[[EXIT]]
; APPLE: [[EXIT]]:
; APPLE-NEXT: [[RES:%.*]] = phi i32 [ [[BIN_RDX4]], %[[EXIT_UNR_LCSSA]] ], [ [[RES_PH1]], %[[EXIT_EPILOG_LCSSA]] ]
; APPLE-NEXT: ret i32 [[RES]]
;
; OTHER-LABEL: define i32 @test_add_reduction_runtime(
; OTHER-SAME: ptr [[A:%.*]], i64 noundef [[N:%.*]]) #[[ATTR0]] {
; OTHER-NEXT: [[ENTRY:.*]]:
; OTHER-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
; OTHER-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 3
; OTHER-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 3
; OTHER-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
; OTHER: [[ENTRY_NEW]]:
; OTHER-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
; OTHER-NEXT: br label %[[LOOP:.*]]
; OTHER: [[LOOP]]:
; OTHER-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
; OTHER-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
; OTHER-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[LOOP]] ]
; OTHER-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV]]
; OTHER-NEXT: [[TMP2:%.*]] = load i32, ptr [[GEP_A]], align 2
; OTHER-NEXT: [[RDX_NEXT:%.*]] = add nuw nsw i32 [[RDX]], [[TMP2]]
; OTHER-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
; OTHER-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT]]
; OTHER-NEXT: [[TMP3:%.*]] = load i32, ptr [[GEP_A_1]], align 2
; OTHER-NEXT: [[RDX_NEXT_1:%.*]] = add nuw nsw i32 [[RDX_NEXT]], [[TMP3]]
; OTHER-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
; OTHER-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_1]]
; OTHER-NEXT: [[TMP4:%.*]] = load i32, ptr [[GEP_A_2]], align 2
; OTHER-NEXT: [[RDX_NEXT_2:%.*]] = add nuw nsw i32 [[RDX_NEXT_1]], [[TMP4]]
; OTHER-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
; OTHER-NEXT: [[GEP_A_3:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_NEXT_2]]
; OTHER-NEXT: [[TMP5:%.*]] = load i32, ptr [[GEP_A_3]], align 2
; OTHER-NEXT: [[RDX_NEXT_3]] = add nuw nsw i32 [[RDX_NEXT_2]], [[TMP5]]
; OTHER-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
; OTHER-NEXT: [[NITER_NEXT_3]] = add i64 [[NITER]], 4
; OTHER-NEXT: [[NITER_NCMP_3:%.*]] = icmp eq i64 [[NITER_NEXT_3]], [[UNROLL_ITER]]
; OTHER-NEXT: br i1 [[NITER_NCMP_3]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]]
; OTHER: [[EXIT_UNR_LCSSA]]:
; OTHER-NEXT: [[RES_PH:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
; OTHER-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_3]], %[[LOOP]] ]
; OTHER-NEXT: [[RDX_UNR:%.*]] = phi i32 [ [[RDX_NEXT_3]], %[[LOOP]] ]
; OTHER-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
; OTHER-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
; OTHER: [[LOOP_EPIL_PREHEADER]]:
; OTHER-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
; OTHER-NEXT: [[RDX_EPIL_INIT:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_UNR]], %[[EXIT_UNR_LCSSA]] ]
; OTHER-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
; OTHER-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]])
; OTHER-NEXT: br label %[[LOOP_EPIL:.*]]
; OTHER: [[LOOP_EPIL]]:
; OTHER-NEXT: [[IV_EPIL:%.*]] = phi i64 [ [[IV_EPIL_INIT]], %[[LOOP_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
; OTHER-NEXT: [[RDX_EPIL:%.*]] = phi i32 [ [[RDX_EPIL_INIT]], %[[LOOP_EPIL_PREHEADER]] ], [ [[RDX_NEXT_EPIL:%.*]], %[[LOOP_EPIL]] ]
; OTHER-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], %[[LOOP_EPIL]] ]
; OTHER-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw i32, ptr [[A]], i64 [[IV_EPIL]]
; OTHER-NEXT: [[TMP6:%.*]] = load i32, ptr [[GEP_A_EPIL]], align 2
; OTHER-NEXT: [[RDX_NEXT_EPIL]] = add nuw nsw i32 [[RDX_EPIL]], [[TMP6]]
; OTHER-NEXT: [[IV_NEXT_EPIL]] = add nuw nsw i64 [[IV_EPIL]], 1
; OTHER-NEXT: [[EC_EPIL:%.*]] = icmp eq i64 [[IV_NEXT_EPIL]], [[N]]
; OTHER-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
; OTHER-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
; OTHER-NEXT: br i1 [[EPIL_ITER_CMP]], label %[[LOOP_EPIL]], label %[[EXIT_EPILOG_LCSSA:.*]], !llvm.loop [[LOOP0:![0-9]+]]
; OTHER: [[EXIT_EPILOG_LCSSA]]:
; OTHER-NEXT: [[RES_PH1:%.*]] = phi i32 [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
; OTHER-NEXT: br label %[[EXIT]]
; OTHER: [[EXIT]]:
; OTHER-NEXT: [[RES:%.*]] = phi i32 [ [[RES_PH]], %[[EXIT_UNR_LCSSA]] ], [ [[RES_PH1]], %[[EXIT_EPILOG_LCSSA]] ]
; OTHER-NEXT: ret i32 [[RES]]
;
entry:
br label %loop
loop:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
%rdx = phi i32 [ 0, %entry ], [ %rdx.next, %loop ]
%gep.a = getelementptr inbounds nuw i32, ptr %a, i64 %iv
%1 = load i32, ptr %gep.a, align 2
%rdx.next = add nuw nsw i32 %rdx, %1
%iv.next = add nuw nsw i64 %iv, 1
%ec = icmp eq i64 %iv.next, %n
br i1 %ec, label %exit, label %loop
exit:
%res = phi i32 [ %rdx.next, %loop ]
ret i32 %res
}
;.
; APPLE: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
; APPLE: [[META1]] = !{!"llvm.loop.unroll.disable"}
; APPLE: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
; APPLE: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
;.
; OTHER: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
; OTHER: [[META1]] = !{!"llvm.loop.unroll.disable"}
;.