blob: f6dd8564c001b9cbab088bfdfc9f73994df20769 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 6
; RUN: opt -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s
define void @test(ptr %dst, ptr %src, ptr %cond) {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], ptr [[COND:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 400
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[COND]], i64 400
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SRC]], i64 400
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[COND]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]]
; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP24]], align 4, !alias.scope [[META0:![0-9]+]]
; CHECK-NEXT: [[TMP15:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META3:![0-9]+]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META3]]
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0
; CHECK-NEXT: [[TMP36:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP7]], i32 1
; CHECK-NEXT: [[TMP25:%.*]] = add <2 x i32> [[TMP36]], splat (i32 10)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP36]], <2 x i32> [[TMP25]]
; CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP37]], align 4, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP38]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br [[EXIT:label %.*]]
; CHECK: [[SCALAR_PH]]:
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
%gep.src = getelementptr inbounds i32, ptr %src, i32 %iv
%gep.cond = getelementptr inbounds i32, ptr %cond, i32 %iv
%l.c = load i32, ptr %gep.cond
%c = icmp ule i32 %l.c, 11
br i1 %c, label %then, label %else
then:
%l.src = load i32, ptr %gep.src, align 4
br label %loop.latch
else:
%l.src.2 = load i32, ptr %gep.src, align 4
%add = add i32 %l.src.2, 10
br label %loop.latch
loop.latch:
%merge = phi i32 [ %l.src, %then ], [ %add, %else ]
%gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
store i32 %merge, ptr %gep.dst, align 4
%iv.next = add nuw nsw i32 %iv, 1
%ec = icmp eq i32 %iv.next, 100
br i1 %ec, label %exit, label %loop
exit:
ret void
}
; Negative test: Different addresses - should NOT hoist
define void @different_addresses(ptr %dst, ptr %src1, ptr %src2, ptr %cond) {
; CHECK-LABEL: define void @different_addresses(
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC1:%.*]], ptr [[SRC2:%.*]], ptr [[COND:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[SRC15:%.*]] = ptrtoint ptr [[SRC1]] to i64
; CHECK-NEXT: [[SRC23:%.*]] = ptrtoint ptr [[SRC2]] to i64
; CHECK-NEXT: [[COND2:%.*]] = ptrtoint ptr [[COND]] to i64
; CHECK-NEXT: [[DST1:%.*]] = ptrtoint ptr [[DST]] to i64
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[DST1]], [[COND2]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = icmp ult i64 [[TMP0]], 8
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[DST1]], [[SRC23]]
; CHECK-NEXT: [[FOUND_CONFLICT6:%.*]] = icmp ult i64 [[TMP1]], 8
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT6]]
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[DST1]], [[SRC15]]
; CHECK-NEXT: [[FOUND_CONFLICT9:%.*]] = icmp ult i64 [[TMP2]], 8
; CHECK-NEXT: [[CONFLICT_RDX10:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT9]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX10]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE13:.*]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP16]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[TMP11]], splat (i1 true)
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i32 [[TMP4]]
; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP20]], align 4
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> poison, i32 [[TMP15]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
; CHECK: [[PRED_LOAD_CONTINUE]]:
; CHECK-NEXT: [[TMP17:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP19]], %[[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1
; CHECK-NEXT: br i1 [[TMP18]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]]
; CHECK: [[PRED_LOAD_IF8]]:
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP25]], align 4
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i32> [[TMP17]], i32 [[TMP29]], i32 1
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE9]]
; CHECK: [[PRED_LOAD_CONTINUE9]]:
; CHECK-NEXT: [[TMP22:%.*]] = phi <2 x i32> [ [[TMP17]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP21]], %[[PRED_LOAD_IF8]] ]
; CHECK-NEXT: [[TMP23:%.*]] = add <2 x i32> [[TMP22]], splat (i32 10)
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11:.*]]
; CHECK: [[PRED_LOAD_IF10]]:
; CHECK-NEXT: [[TMP49:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i32 [[TMP4]]
; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP49]], align 4
; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x i32> poison, i32 [[TMP24]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]]
; CHECK: [[PRED_LOAD_CONTINUE11]]:
; CHECK-NEXT: [[TMP32:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP30]], %[[PRED_LOAD_IF10]] ]
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_LOAD_IF12:.*]], label %[[PRED_LOAD_CONTINUE13]]
; CHECK: [[PRED_LOAD_IF12]]:
; CHECK-NEXT: [[TMP52:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP52]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <2 x i32> [[TMP32]], i32 [[TMP26]], i32 1
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE13]]
; CHECK: [[PRED_LOAD_CONTINUE13]]:
; CHECK-NEXT: [[TMP33:%.*]] = phi <2 x i32> [ [[TMP32]], %[[PRED_LOAD_CONTINUE11]] ], [ [[TMP27]], %[[PRED_LOAD_IF12]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP11]], <2 x i32> [[TMP33]], <2 x i32> [[TMP23]]
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP34]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP35]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br [[EXIT:label %.*]]
; CHECK: [[SCALAR_PH]]:
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
%gep.src.1 = getelementptr inbounds i32, ptr %src1, i32 %iv
%gep.src.2 = getelementptr inbounds i32, ptr %src2, i32 %iv
%gep.cond = getelementptr inbounds i32, ptr %cond, i32 %iv
%l.c = load i32, ptr %gep.cond
%c = icmp ule i32 %l.c, 11
br i1 %c, label %then, label %else
then:
%l.src = load i32, ptr %gep.src.1, align 4
br label %loop.latch
else:
%l.src.2 = load i32, ptr %gep.src.2, align 4
%add = add i32 %l.src.2, 10
br label %loop.latch
loop.latch:
%merge = phi i32 [ %l.src, %then ], [ %add, %else ]
%gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
store i32 %merge, ptr %gep.dst, align 4
%iv.next = add nuw nsw i32 %iv, 1
%ec = icmp eq i32 %iv.next, 100
br i1 %ec, label %exit, label %loop
exit:
ret void
}
; Negative test: Non-complementary masks - should NOT hoist
define void @non_complementary_masks(ptr %dst, ptr %src, ptr %cond1, ptr %cond2) {
; CHECK-LABEL: define void @non_complementary_masks(
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], ptr [[COND1:%.*]], ptr [[COND2:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 400
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[COND1]], i64 400
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[COND2]], i64 400
; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[SRC]], i64 400
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[COND1]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: [[BOUND04:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]]
; CHECK-NEXT: [[BOUND15:%.*]] = icmp ult ptr [[COND2]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT6:%.*]] = and i1 [[BOUND04]], [[BOUND15]]
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT6]]
; CHECK-NEXT: [[BOUND07:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP3]]
; CHECK-NEXT: [[BOUND18:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT9:%.*]] = and i1 [[BOUND07]], [[BOUND18]]
; CHECK-NEXT: [[CONFLICT_RDX10:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT9]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX10]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE17:.*]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP8]], i32 0
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x ptr> [[TMP10]], ptr [[TMP9]], i32 1
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[COND1]], i32 [[TMP4]]
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[COND2]], i32 [[TMP4]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP24]], align 4, !alias.scope [[META14:![0-9]+]]
; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <2 x i32>, ptr [[TMP25]], align 4, !alias.scope [[META17:![0-9]+]]
; CHECK-NEXT: [[TMP37:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP38:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD11]], splat (i32 20)
; CHECK-NEXT: [[TMP18:%.*]] = xor <2 x i1> [[TMP37]], splat (i1 true)
; CHECK-NEXT: [[TMP19:%.*]] = select <2 x i1> [[TMP18]], <2 x i1> [[TMP38]], <2 x i1> zeroinitializer
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP19]], i32 0
; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META19:![0-9]+]]
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> poison, i32 [[TMP21]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
; CHECK: [[PRED_LOAD_CONTINUE]]:
; CHECK-NEXT: [[TMP23:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP22]], %[[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i1> [[TMP19]], i32 1
; CHECK-NEXT: br i1 [[TMP39]], label %[[PRED_LOAD_IF12:.*]], label %[[PRED_LOAD_CONTINUE13:.*]]
; CHECK: [[PRED_LOAD_IF12]]:
; CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META19]]
; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP40]], i32 1
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE13]]
; CHECK: [[PRED_LOAD_CONTINUE13]]:
; CHECK-NEXT: [[TMP27:%.*]] = phi <2 x i32> [ [[TMP23]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP26]], %[[PRED_LOAD_IF12]] ]
; CHECK-NEXT: [[TMP28:%.*]] = add <2 x i32> [[TMP27]], splat (i32 10)
; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x i1> [[TMP37]], i32 0
; CHECK-NEXT: br i1 [[TMP33]], label %[[PRED_LOAD_IF14:.*]], label %[[PRED_LOAD_CONTINUE15:.*]]
; CHECK: [[PRED_LOAD_IF14]]:
; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP8]], align 4, !alias.scope [[META19]]
; CHECK-NEXT: [[TMP34:%.*]] = insertelement <2 x i32> poison, i32 [[TMP29]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE15]]
; CHECK: [[PRED_LOAD_CONTINUE15]]:
; CHECK-NEXT: [[TMP36:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE13]] ], [ [[TMP34]], %[[PRED_LOAD_IF14]] ]
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x i1> [[TMP37]], i32 1
; CHECK-NEXT: br i1 [[TMP35]], label %[[PRED_LOAD_IF16:.*]], label %[[PRED_LOAD_CONTINUE17]]
; CHECK: [[PRED_LOAD_IF16]]:
; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr [[TMP9]], align 4, !alias.scope [[META19]]
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x i32> [[TMP36]], i32 [[TMP30]], i32 1
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE17]]
; CHECK: [[PRED_LOAD_CONTINUE17]]:
; CHECK-NEXT: [[TMP32:%.*]] = phi <2 x i32> [ [[TMP36]], %[[PRED_LOAD_CONTINUE15]] ], [ [[TMP31]], %[[PRED_LOAD_IF16]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP19]], <2 x i32> [[TMP28]], <2 x i32> zeroinitializer
; CHECK-NEXT: [[PREDPHI18:%.*]] = select <2 x i1> [[TMP37]], <2 x i32> [[TMP32]], <2 x i32> [[PREDPHI]]
; CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
; CHECK-NEXT: store <2 x i32> [[PREDPHI18]], ptr [[TMP41]], align 4, !alias.scope [[META21:![0-9]+]], !noalias [[META23:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP42]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br [[EXIT:label %.*]]
; CHECK: [[SCALAR_PH]]:
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
%gep.src = getelementptr inbounds i32, ptr %src, i32 %iv
%gep.cond1 = getelementptr inbounds i32, ptr %cond1, i32 %iv
%gep.cond2 = getelementptr inbounds i32, ptr %cond2, i32 %iv
%l.c1 = load i32, ptr %gep.cond1
%l.c2 = load i32, ptr %gep.cond2
%c1 = icmp ule i32 %l.c1, 11
%c2 = icmp ule i32 %l.c2, 20
br i1 %c1, label %then, label %else
then:
%l.src = load i32, ptr %gep.src, align 4
br label %loop.latch
else:
br i1 %c2, label %else.then, label %loop.latch
else.then:
%l.src.2 = load i32, ptr %gep.src, align 4
%add = add i32 %l.src.2, 10
br label %loop.latch
loop.latch:
%merge = phi i32 [ %l.src, %then ], [ %add, %else.then ], [ 0, %else ]
%gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
store i32 %merge, ptr %gep.dst, align 4
%iv.next = add nuw nsw i32 %iv, 1
%ec = icmp eq i32 %iv.next, 100
br i1 %ec, label %exit, label %loop
exit:
ret void
}
; Negative test: Different access sizes - should NOT hoist
; Both loads use the same pointer but have different types (i8 vs i32)
define void @different_access_sizes(ptr %dst, ptr %src, ptr %cond) {
; CHECK-LABEL: define void @different_access_sizes(
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], ptr [[COND:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 400
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[COND]], i64 400
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SRC]], i64 400
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[COND]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]]
; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> [[TMP8]], ptr [[TMP7]], i32 1
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP10]], align 4, !alias.scope [[META26:![0-9]+]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[TMP11]], splat (i1 true)
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0
; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META29:![0-9]+]]
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> poison, i32 [[TMP14]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
; CHECK: [[PRED_LOAD_CONTINUE]]:
; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP15]], %[[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP12]], i32 1
; CHECK-NEXT: br i1 [[TMP17]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7:.*]]
; CHECK: [[PRED_LOAD_IF6]]:
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META29]]
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP16]], i32 [[TMP18]], i32 1
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]]
; CHECK: [[PRED_LOAD_CONTINUE7]]:
; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i32> [ [[TMP16]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP19]], %[[PRED_LOAD_IF6]] ]
; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP20]], splat (i32 10)
; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
; CHECK-NEXT: br i1 [[TMP22]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]]
; CHECK: [[PRED_LOAD_IF8]]:
; CHECK-NEXT: [[TMP23:%.*]] = load i8, ptr [[TMP6]], align 4, !alias.scope [[META29]]
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <2 x i8> poison, i8 [[TMP23]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE9]]
; CHECK: [[PRED_LOAD_CONTINUE9]]:
; CHECK-NEXT: [[TMP25:%.*]] = phi <2 x i8> [ poison, %[[PRED_LOAD_CONTINUE7]] ], [ [[TMP24]], %[[PRED_LOAD_IF8]] ]
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11]]
; CHECK: [[PRED_LOAD_IF10]]:
; CHECK-NEXT: [[TMP27:%.*]] = load i8, ptr [[TMP7]], align 4, !alias.scope [[META29]]
; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x i8> [[TMP25]], i8 [[TMP27]], i32 1
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]]
; CHECK: [[PRED_LOAD_CONTINUE11]]:
; CHECK-NEXT: [[TMP29:%.*]] = phi <2 x i8> [ [[TMP25]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP28]], %[[PRED_LOAD_IF10]] ]
; CHECK-NEXT: [[TMP30:%.*]] = zext <2 x i8> [[TMP29]] to <2 x i32>
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP11]], <2 x i32> [[TMP30]], <2 x i32> [[TMP21]]
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP31]], align 4, !alias.scope [[META31:![0-9]+]], !noalias [[META33:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP32]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br [[EXIT:label %.*]]
; CHECK: [[SCALAR_PH]]:
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
%gep.src = getelementptr inbounds i32, ptr %src, i32 %iv
%gep.cond = getelementptr inbounds i32, ptr %cond, i32 %iv
%l.c = load i32, ptr %gep.cond
%c = icmp ule i32 %l.c, 11
br i1 %c, label %then, label %else
then:
%l.src = load i8, ptr %gep.src, align 4
%ext = zext i8 %l.src to i32
br label %loop.latch
else:
%l.src.2 = load i32, ptr %gep.src, align 4
%add = add i32 %l.src.2, 10
br label %loop.latch
loop.latch:
%merge = phi i32 [ %ext, %then ], [ %add, %else ]
%gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
store i32 %merge, ptr %gep.dst, align 4
%iv.next = add nuw nsw i32 %iv, 1
%ec = icmp eq i32 %iv.next, 100
br i1 %ec, label %exit, label %loop
exit:
ret void
}
; Make sure the minimum alignment is used when loads have different alignments.
define void @different_alignments_same_address(ptr %dst, ptr %src, ptr %cond) {
; CHECK-LABEL: define void @different_alignments_same_address(
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], ptr [[COND:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 400
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[COND]], i64 400
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SRC]], i64 400
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[COND]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]]
; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP20]], align 4, !alias.scope [[META36:![0-9]+]]
; CHECK-NEXT: [[TMP15:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP8]], align 2, !alias.scope [[META39:![0-9]+]]
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP9]], align 2, !alias.scope [[META39]]
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i32 0
; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP7]], i32 1
; CHECK-NEXT: [[TMP26:%.*]] = add <2 x i32> [[TMP25]], splat (i32 10)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[TMP25]], <2 x i32> [[TMP26]]
; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP35]], align 4, !alias.scope [[META41:![0-9]+]], !noalias [[META43:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br [[EXIT:label %.*]]
; CHECK: [[SCALAR_PH]]:
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
%gep.src = getelementptr inbounds i32, ptr %src, i32 %iv
%gep.cond = getelementptr inbounds i32, ptr %cond, i32 %iv
%l.c = load i32, ptr %gep.cond
%c = icmp ule i32 %l.c, 11
br i1 %c, label %then, label %else
then:
%l.src = load i32, ptr %gep.src, align 2
br label %loop.latch
else:
%l.src.2 = load i32, ptr %gep.src, align 4
%add = add i32 %l.src.2, 10
br label %loop.latch
loop.latch:
%merge = phi i32 [ %l.src, %then ], [ %add, %else ]
%gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
store i32 %merge, ptr %gep.dst, align 4
%iv.next = add nuw nsw i32 %iv, 1
%ec = icmp eq i32 %iv.next, 100
br i1 %ec, label %exit, label %loop
exit:
ret void
}
; Negative test: Volatile loads - should NOT hoist
define void @volatile_load(ptr %dst, ptr %src, ptr %cond) {
; CHECK-LABEL: define void @volatile_load(
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], ptr [[COND:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]]
; CHECK-NEXT: [[GEP_COND:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[IV]]
; CHECK-NEXT: [[L_C:%.*]] = load i32, ptr [[GEP_COND]], align 4
; CHECK-NEXT: [[C:%.*]] = icmp ule i32 [[L_C]], 11
; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[ELSE:.*]]
; CHECK: [[THEN]]:
; CHECK-NEXT: [[L_SRC:%.*]] = load volatile i32, ptr [[GEP_SRC]], align 4
; CHECK-NEXT: br label %[[LOOP_LATCH]]
; CHECK: [[ELSE]]:
; CHECK-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[L_SRC_2]], 10
; CHECK-NEXT: br label %[[LOOP_LATCH]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ [[L_SRC]], %[[THEN]] ], [ [[ADD]], %[[ELSE]] ]
; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]]
; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_DST]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 100
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
%gep.src = getelementptr inbounds i32, ptr %src, i32 %iv
%gep.cond = getelementptr inbounds i32, ptr %cond, i32 %iv
%l.c = load i32, ptr %gep.cond
%c = icmp ule i32 %l.c, 11
br i1 %c, label %then, label %else
then:
%l.src = load volatile i32, ptr %gep.src, align 4
br label %loop.latch
else:
%l.src.2 = load i32, ptr %gep.src, align 4
%add = add i32 %l.src.2, 10
br label %loop.latch
loop.latch:
%merge = phi i32 [ %l.src, %then ], [ %add, %else ]
%gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
store i32 %merge, ptr %gep.dst, align 4
%iv.next = add nuw nsw i32 %iv, 1
%ec = icmp eq i32 %iv.next, 100
br i1 %ec, label %exit, label %loop
exit:
ret void
}
; Test hoisting with duplicate GEPs: The same address is computed by different
; GEP instructions in different branches. The hoisting pass should use SCEV to
; recognize they compute the same address and hoist the load.
define void @duplicate_gep(ptr %dst, ptr %src, ptr %cond) {
; CHECK-LABEL: define void @duplicate_gep(
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], ptr [[COND:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 400
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[COND]], i64 400
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SRC]], i64 400
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[COND]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]]
; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 4, !alias.scope [[META46:![0-9]+]]
; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope [[META49:![0-9]+]]
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP15]], align 4, !alias.scope [[META49]]
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> poison, i32 [[TMP22]], i32 0
; CHECK-NEXT: [[TMP29:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP8]], i32 1
; CHECK-NEXT: [[TMP19:%.*]] = add <2 x i32> [[TMP29]], splat (i32 10)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP7]], <2 x i32> [[TMP29]], <2 x i32> [[TMP19]]
; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP30]], align 4, !alias.scope [[META51:![0-9]+]], !noalias [[META53:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP31:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br [[EXIT:label %.*]]
; CHECK: [[SCALAR_PH]]:
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
%gep.cond = getelementptr inbounds i32, ptr %cond, i32 %iv
%l.c = load i32, ptr %gep.cond
%c = icmp ule i32 %l.c, 11
br i1 %c, label %then, label %else
then:
%gep.src.then = getelementptr inbounds i32, ptr %src, i32 %iv
%l.src = load i32, ptr %gep.src.then, align 4
br label %loop.latch
else:
%gep.src.else= getelementptr inbounds i32, ptr %src, i32 %iv
%l.src.2 = load i32, ptr %gep.src.else, align 4
%add = add i32 %l.src.2, 10
br label %loop.latch
loop.latch:
%merge = phi i32 [ %l.src, %then ], [ %add, %else ]
%gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
store i32 %merge, ptr %gep.dst, align 4
%iv.next = add nuw nsw i32 %iv, 1
%ec = icmp eq i32 %iv.next, 100
br i1 %ec, label %exit, label %loop
exit:
ret void
}
; Test with non-unit-stride loads: Loads have stride 16 (2 doubles * 8 bytes)
; instead of unit stride (8 bytes). The hoisting optimization should still work
; since both loads access the same address with the same stride.
define void @non_unit_stride_i64(ptr %dst, ptr %src, ptr %cond) {
; CHECK-LABEL: define void @non_unit_stride_i64(
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], ptr [[COND:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 400
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[COND]], i64 400
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SRC]], i64 796
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[COND]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]]
; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
; CHECK-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]]
; CHECK-NEXT: br i1 [[CONFLICT_RDX]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP6]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META56:![0-9]+]]
; CHECK-NEXT: [[TMP9:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP6]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i32 [[TMP7]]
; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP12]], align 4, !alias.scope [[META59:![0-9]+]]
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP17]], align 4, !alias.scope [[META59]]
; CHECK-NEXT: [[TMP25:%.*]] = insertelement <2 x i32> poison, i32 [[TMP24]], i32 0
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x i32> [[TMP25]], i32 [[TMP10]], i32 1
; CHECK-NEXT: [[TMP21:%.*]] = add <2 x i32> [[TMP31]], splat (i32 10)
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x i32> [[TMP31]], <2 x i32> [[TMP21]]
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP6]]
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP32]], align 4, !alias.scope [[META61:![0-9]+]], !noalias [[META63:![0-9]+]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP33:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP33]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP64:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br [[EXIT:label %.*]]
; CHECK: [[SCALAR_PH]]:
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
%gep.cond = getelementptr inbounds i32, ptr %cond, i32 %iv
%l.c = load i32, ptr %gep.cond
%c = icmp ule i32 %l.c, 11
br i1 %c, label %then, label %else
then:
%gep.src.then = getelementptr inbounds i64, ptr %src, i32 %iv
%l.src = load i32, ptr %gep.src.then, align 4
br label %loop.latch
else:
%gep.src.else= getelementptr inbounds i64, ptr %src, i32 %iv
%l.src.2 = load i32, ptr %gep.src.else, align 4
%add = add i32 %l.src.2, 10
br label %loop.latch
loop.latch:
%merge = phi i32 [ %l.src, %then ], [ %add, %else ]
%gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
store i32 %merge, ptr %gep.dst, align 4
%iv.next = add nuw nsw i32 %iv, 1
%ec = icmp eq i32 %iv.next, 100
br i1 %ec, label %exit, label %loop
exit:
ret void
}
; Test that loads inside masked regions (without individual masks) are
; correctly detected and hoisted when they have complementary predicates.
define void @hoist_loads_in_masked_regions(ptr noalias %dst, ptr noalias %src, ptr %cond) {
; CHECK-LABEL: define void @hoist_loads_in_masked_regions(
; CHECK-SAME: ptr noalias [[DST:%.*]], ptr noalias [[SRC:%.*]], ptr [[COND:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[SRC]], i32 [[INDEX]]
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[WIDE_LOAD1]], <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[INDEX]]
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP3]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP66:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
%gep.src = getelementptr inbounds i32, ptr %src, i32 %iv
%gep.cond = getelementptr inbounds i32, ptr %cond, i32 %iv
%l.c = load i32, ptr %gep.cond
%c = icmp ule i32 %l.c, 11
br i1 %c, label %then, label %loop.latch
then:
%l.src = load i32, ptr %gep.src, align 4
br label %loop.latch
loop.latch:
%merge = phi i32 [ %l.src, %then ], [ 0, %loop ]
%l.src.2 = load i32, ptr %gep.src, align 4
%add = add i32 %l.src.2, %merge
%gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
store i32 %merge, ptr %gep.dst, align 4
%iv.next = add nuw nsw i32 %iv, 1
%ec = icmp eq i32 %iv.next, 100
br i1 %ec, label %exit, label %loop
exit:
ret void
}
; Test that when there are 3 or more regions with complementary predicates
; loading from the same address, all loads are hoisted and replaced, not just
; the first pair. This tests the K loop that continues searching after finding
; the initial complementary pair.
define void @hoist_multiple_complementary_loads(ptr noalias %dst, ptr noalias %src, ptr %cond) {
; CHECK-LABEL: define void @hoist_multiple_complementary_loads(
; CHECK-SAME: ptr noalias [[DST:%.*]], ptr noalias [[SRC:%.*]], ptr [[COND:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE10:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP0]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP43]], align 4
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i32> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i1> [[TMP7]], splat (i1 true)
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0
; CHECK-NEXT: br i1 [[TMP9]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP64:%.*]] = load i32, ptr [[TMP63]], align 4
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[TMP64]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
; CHECK: [[PRED_LOAD_CONTINUE]]:
; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP12]], %[[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1
; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]]
; CHECK: [[PRED_LOAD_IF1]]:
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[TMP1]]
; CHECK-NEXT: [[TMP70:%.*]] = load i32, ptr [[TMP69]], align 4
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP70]], i32 1
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
; CHECK: [[PRED_LOAD_CONTINUE2]]:
; CHECK-NEXT: [[TMP28:%.*]] = phi <2 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP17]], %[[PRED_LOAD_IF1]] ]
; CHECK-NEXT: [[TMP15:%.*]] = add <2 x i32> [[TMP28]], splat (i32 1)
; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <2 x i32> [[WIDE_LOAD]], splat (i32 32)
; CHECK-NEXT: [[TMP29:%.*]] = xor <2 x i1> [[TMP16]], splat (i1 true)
; CHECK-NEXT: [[TMP32:%.*]] = select <2 x i1> [[TMP7]], <2 x i1> [[TMP29]], <2 x i1> zeroinitializer
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[TMP32]], i32 0
; CHECK-NEXT: br i1 [[TMP19]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]]
; CHECK: [[PRED_LOAD_IF3]]:
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4
; CHECK-NEXT: [[TMP33:%.*]] = insertelement <2 x i32> poison, i32 [[TMP21]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]]
; CHECK: [[PRED_LOAD_CONTINUE4]]:
; CHECK-NEXT: [[TMP23:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE2]] ], [ [[TMP33]], %[[PRED_LOAD_IF3]] ]
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[TMP32]], i32 1
; CHECK-NEXT: br i1 [[TMP24]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6:.*]]
; CHECK: [[PRED_LOAD_IF5]]:
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[TMP1]]
; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP25]], align 4
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP26]], i32 1
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]]
; CHECK: [[PRED_LOAD_CONTINUE6]]:
; CHECK-NEXT: [[TMP18:%.*]] = phi <2 x i32> [ [[TMP23]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP27]], %[[PRED_LOAD_IF5]] ]
; CHECK-NEXT: [[TMP22:%.*]] = mul <2 x i32> [[TMP18]], splat (i32 2)
; CHECK-NEXT: [[TMP30:%.*]] = select <2 x i1> [[TMP7]], <2 x i1> [[TMP16]], <2 x i1> zeroinitializer
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x i1> [[TMP30]], i32 0
; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_LOAD_IF7:.*]], label %[[PRED_LOAD_CONTINUE8:.*]]
; CHECK: [[PRED_LOAD_IF7]]:
; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP61]], align 4
; CHECK-NEXT: [[TMP38:%.*]] = insertelement <2 x i32> poison, i32 [[TMP34]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE8]]
; CHECK: [[PRED_LOAD_CONTINUE8]]:
; CHECK-NEXT: [[TMP35:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE6]] ], [ [[TMP38]], %[[PRED_LOAD_IF7]] ]
; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x i1> [[TMP30]], i32 1
; CHECK-NEXT: br i1 [[TMP36]], label %[[PRED_LOAD_IF9:.*]], label %[[PRED_LOAD_CONTINUE10]]
; CHECK: [[PRED_LOAD_IF9]]:
; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[TMP1]]
; CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP65]], align 4
; CHECK-NEXT: [[TMP44:%.*]] = insertelement <2 x i32> [[TMP35]], i32 [[TMP37]], i32 1
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE10]]
; CHECK: [[PRED_LOAD_CONTINUE10]]:
; CHECK-NEXT: [[TMP45:%.*]] = phi <2 x i32> [ [[TMP35]], %[[PRED_LOAD_CONTINUE8]] ], [ [[TMP44]], %[[PRED_LOAD_IF9]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP32]], <2 x i32> [[TMP22]], <2 x i32> [[TMP15]]
; CHECK-NEXT: [[TMP42:%.*]] = select <2 x i1> [[TMP30]], <2 x i32> [[TMP45]], <2 x i32> [[PREDPHI]]
; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP0]]
; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr [[TMP39]], i64 32
; CHECK-NEXT: store <2 x i32> [[TMP42]], ptr [[TMP40]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP46:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP46]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP67:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
%gep.cond = getelementptr inbounds i32, ptr %cond, i32 %iv
%l.cond = load i32, ptr %gep.cond, align 4
%c.1 = icmp ne i32 %l.cond, 0
br i1 %c.1, label %check2, label %region3
check2:
%c.2 = icmp ne i32 %l.cond, 32
br i1 %c.2, label %region1, label %region2
region1:
%gep.src.8.r1 = getelementptr inbounds i8, ptr %src, i32 %iv
%val1 = load i32, ptr %gep.src.8.r1, align 4
br label %loop.latch
region2:
%gep.src.8.r2 = getelementptr inbounds i8, ptr %src, i32 %iv
%val2 = load i32, ptr %gep.src.8.r2, align 4
%mul = mul i32 %val2, 2
br label %loop.latch
region3:
%gep.src.8.r3 = getelementptr inbounds i8, ptr %src, i32 %iv
%val3 = load i32, ptr %gep.src.8.r3, align 4
%add = add i32 %val3, 1
br label %loop.latch
loop.latch:
%merge = phi i32 [ %val1, %region1 ], [ %mul, %region2 ], [ %add, %region3 ]
%gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
%offset.dst = getelementptr inbounds i8, ptr %gep.dst, i64 32
store i32 %merge, ptr %offset.dst, align 4
%iv.next = add nuw nsw i32 %iv, 1
%ec = icmp eq i32 %iv.next, 100
br i1 %ec, label %exit, label %loop
exit:
ret void
}
define void @hoist_predicated_load_with_chained_geps1(ptr %dst, ptr %src, i1 %cond) {
; CHECK-LABEL: define void @hoist_predicated_load_with_chained_geps1(
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i1 [[COND:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 2
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SRC]], i64 8
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SRC]], i64 2210
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP1]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i64 8
; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP21]], align 2, !alias.scope [[META68:![0-9]+]]
; CHECK-NEXT: store i16 [[TMP4]], ptr [[DST]], align 2, !alias.scope [[META71:![0-9]+]], !noalias [[META68]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP73:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
br i1 %cond, label %then, label %else
then:
%gep1 = getelementptr [11 x i16], ptr %src, i64 %iv
%gep2 = getelementptr i8, ptr %gep1, i64 8
%l.0 = load i16, ptr %gep2, align 2
br label %loop.latch
else:
%gep3 = getelementptr [11 x i16], ptr %src, i64 %iv
%gep4 = getelementptr i8, ptr %gep3, i64 8
%l.1 = load i16, ptr %gep4, align 2
br label %loop.latch
loop.latch:
%merge = phi i16 [ %l.0, %then ], [ %l.1, %else ]
store i16 %merge, ptr %dst, align 2
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, 100
br i1 %ec, label %exit, label %loop.header
exit:
ret void
}
define void @hoist_predicated_load_with_chained_geps2(ptr %dst, ptr %src, i1 %cond) {
; CHECK-LABEL: define void @hoist_predicated_load_with_chained_geps2(
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], i1 [[COND:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 2
; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[SRC]], i64 8
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SRC]], i64 2210
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[SCEVGEP1]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [11 x i16], ptr [[SRC]], i64 [[TMP2]]
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP4]], i64 8
; CHECK-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP21]], align 2, !alias.scope [[META75:![0-9]+]]
; CHECK-NEXT: store i16 [[TMP5]], ptr [[DST]], align 2, !alias.scope [[META78:![0-9]+]], !noalias [[META75]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP80:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
%gep1 = getelementptr [11 x i16], ptr %src, i64 %iv
br i1 %cond, label %then, label %else
then:
%gep2 = getelementptr i8, ptr %gep1, i64 8
%l.0 = load i16, ptr %gep2, align 2
br label %loop.latch
else:
%gep3 = getelementptr i8, ptr %gep1, i64 8
%l.1 = load i16, ptr %gep3, align 2
br label %loop.latch
loop.latch:
%merge = phi i16 [ %l.0, %then ], [ %l.1, %else ]
store i16 %merge, ptr %dst, align 2
%iv.next = add i64 %iv, 1
%ec = icmp eq i64 %iv, 100
br i1 %ec, label %exit, label %loop.header
exit:
ret void
}
define void @hoist_all_three_loads_at_same_address(ptr %dst, ptr %src, ptr noalias %cond) {
; CHECK-LABEL: define void @hoist_all_three_loads_at_same_address(
; CHECK-SAME: ptr [[DST:%.*]], ptr [[SRC:%.*]], ptr noalias [[COND:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 400
; CHECK-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[SRC]], i64 400
; CHECK-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP2]]
; CHECK-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[SRC]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT5]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE11:.*]] ]
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP4]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP5]]
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> [[TMP8]], ptr [[TMP7]], i32 1
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[COND]], i32 [[TMP4]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP10]], align 4
; CHECK-NEXT: [[TMP11:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 11)
; CHECK-NEXT: [[TMP12:%.*]] = icmp ule <2 x i32> [[WIDE_LOAD]], splat (i32 20)
; CHECK-NEXT: [[TMP13:%.*]] = xor <2 x i1> [[TMP11]], splat (i1 true)
; CHECK-NEXT: [[TMP14:%.*]] = xor <2 x i1> [[TMP12]], splat (i1 true)
; CHECK-NEXT: [[TMP15:%.*]] = select <2 x i1> [[TMP13]], <2 x i1> [[TMP14]], <2 x i1> zeroinitializer
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[TMP15]], i32 0
; CHECK-NEXT: br i1 [[TMP16]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; CHECK: [[PRED_LOAD_IF]]:
; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META82:![0-9]+]]
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> poison, i32 [[TMP17]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
; CHECK: [[PRED_LOAD_CONTINUE]]:
; CHECK-NEXT: [[TMP19:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP18]], %[[PRED_LOAD_IF]] ]
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP15]], i32 1
; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF2:.*]], label %[[PRED_LOAD_CONTINUE3:.*]]
; CHECK: [[PRED_LOAD_IF2]]:
; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META82]]
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[TMP21]], i32 1
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE3]]
; CHECK: [[PRED_LOAD_CONTINUE3]]:
; CHECK-NEXT: [[TMP23:%.*]] = phi <2 x i32> [ [[TMP19]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP22]], %[[PRED_LOAD_IF2]] ]
; CHECK-NEXT: [[TMP24:%.*]] = add <2 x i32> [[TMP23]], splat (i32 10)
; CHECK-NEXT: [[TMP25:%.*]] = select <2 x i1> [[TMP13]], <2 x i1> [[TMP12]], <2 x i1> zeroinitializer
; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[TMP25]], i32 0
; CHECK-NEXT: br i1 [[TMP26]], label %[[PRED_LOAD_IF4:.*]], label %[[PRED_LOAD_CONTINUE5:.*]]
; CHECK: [[PRED_LOAD_IF4]]:
; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META82]]
; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x i32> poison, i32 [[TMP27]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE5]]
; CHECK: [[PRED_LOAD_CONTINUE5]]:
; CHECK-NEXT: [[TMP29:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE3]] ], [ [[TMP28]], %[[PRED_LOAD_IF4]] ]
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP25]], i32 1
; CHECK-NEXT: br i1 [[TMP30]], label %[[PRED_LOAD_IF6:.*]], label %[[PRED_LOAD_CONTINUE7:.*]]
; CHECK: [[PRED_LOAD_IF6]]:
; CHECK-NEXT: [[TMP31:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META82]]
; CHECK-NEXT: [[TMP32:%.*]] = insertelement <2 x i32> [[TMP29]], i32 [[TMP31]], i32 1
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE7]]
; CHECK: [[PRED_LOAD_CONTINUE7]]:
; CHECK-NEXT: [[TMP33:%.*]] = phi <2 x i32> [ [[TMP29]], %[[PRED_LOAD_CONTINUE5]] ], [ [[TMP32]], %[[PRED_LOAD_IF6]] ]
; CHECK-NEXT: [[TMP34:%.*]] = mul <2 x i32> [[TMP33]], splat (i32 2)
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0
; CHECK-NEXT: br i1 [[TMP35]], label %[[PRED_LOAD_IF8:.*]], label %[[PRED_LOAD_CONTINUE9:.*]]
; CHECK: [[PRED_LOAD_IF8]]:
; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope [[META82]]
; CHECK-NEXT: [[TMP37:%.*]] = insertelement <2 x i32> poison, i32 [[TMP36]], i32 0
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE9]]
; CHECK: [[PRED_LOAD_CONTINUE9]]:
; CHECK-NEXT: [[TMP38:%.*]] = phi <2 x i32> [ poison, %[[PRED_LOAD_CONTINUE7]] ], [ [[TMP37]], %[[PRED_LOAD_IF8]] ]
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1
; CHECK-NEXT: br i1 [[TMP39]], label %[[PRED_LOAD_IF10:.*]], label %[[PRED_LOAD_CONTINUE11]]
; CHECK: [[PRED_LOAD_IF10]]:
; CHECK-NEXT: [[TMP40:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope [[META82]]
; CHECK-NEXT: [[TMP41:%.*]] = insertelement <2 x i32> [[TMP38]], i32 [[TMP40]], i32 1
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE11]]
; CHECK: [[PRED_LOAD_CONTINUE11]]:
; CHECK-NEXT: [[TMP42:%.*]] = phi <2 x i32> [ [[TMP38]], %[[PRED_LOAD_CONTINUE9]] ], [ [[TMP41]], %[[PRED_LOAD_IF10]] ]
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP25]], <2 x i32> [[TMP34]], <2 x i32> [[TMP24]]
; CHECK-NEXT: [[PREDPHI16:%.*]] = select <2 x i1> [[TMP11]], <2 x i32> [[TMP42]], <2 x i32> [[PREDPHI]]
; CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[TMP4]]
; CHECK-NEXT: store <2 x i32> [[PREDPHI16]], ptr [[TMP43]], align 4, !alias.scope [[META85:![0-9]+]], !noalias [[META82]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP44:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP44]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP87:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br [[EXIT:label %.*]]
; CHECK: [[SCALAR_PH]]:
;
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
%gep.src = getelementptr inbounds i32, ptr %src, i32 %iv
%gep.cond = getelementptr inbounds i32, ptr %cond, i32 %iv
%l.c = load i32, ptr %gep.cond
%c1 = icmp ule i32 %l.c, 11
%c2 = icmp ule i32 %l.c, 20
br i1 %c1, label %then, label %else
then:
%l.src.then = load i32, ptr %gep.src, align 4
br label %loop.latch
else:
br i1 %c2, label %else.if, label %else.else
else.if:
%l.src.else.if = load i32, ptr %gep.src, align 4
%mul = mul i32 %l.src.else.if, 2
br label %loop.latch
else.else:
%l.src.else.else = load i32, ptr %gep.src, align 4
%add = add i32 %l.src.else.else, 10
br label %loop.latch
loop.latch:
%merge = phi i32 [ %l.src.then, %then ], [ %mul, %else.if ], [ %add, %else.else ]
%gep.dst = getelementptr inbounds i32, ptr %dst, i32 %iv
store i32 %merge, ptr %gep.dst, align 4
%iv.next = add nuw nsw i32 %iv, 1
%ec = icmp eq i32 %iv.next, 100
br i1 %ec, label %exit, label %loop
exit:
ret void
}