| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt < %s -aa-pipeline=basic-aa -passes='loop-mssa(licm)' -S | FileCheck %s |
| |
| @X = global i32 0 ; <i32*> [#uses=1] |
| |
| declare void @foo() |
| |
| declare i32 @llvm.bitreverse.i32(i32) |
| |
| ; This testcase tests for a problem where LICM hoists |
| ; potentially trapping instructions when they are not guaranteed to execute. |
| define i32 @test1(i1 %c) { |
| ; CHECK-LABEL: @test1( |
| ; CHECK-NEXT: [[A:%.*]] = load i32, i32* @X, align 4 |
| ; CHECK-NEXT: br label [[LOOP:%.*]] |
| ; CHECK: Loop: |
| ; CHECK-NEXT: call void @foo() |
| ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOPTAIL:%.*]], label [[IFUNEQUAL:%.*]] |
| ; CHECK: IfUnEqual: |
| ; CHECK-NEXT: [[B1:%.*]] = sdiv i32 4, [[A]] |
| ; CHECK-NEXT: br label [[LOOPTAIL]] |
| ; CHECK: LoopTail: |
| ; CHECK-NEXT: [[B:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[B1]], [[IFUNEQUAL]] ] |
| ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[OUT:%.*]] |
| ; CHECK: Out: |
| ; CHECK-NEXT: [[B_LCSSA:%.*]] = phi i32 [ [[B]], [[LOOPTAIL]] ] |
| ; CHECK-NEXT: [[C:%.*]] = sub i32 [[A]], [[B_LCSSA]] |
| ; CHECK-NEXT: ret i32 [[C]] |
| ; |
| %A = load i32, i32* @X ; <i32> [#uses=2] |
| br label %Loop |
| Loop: ; preds = %LoopTail, %0 |
| call void @foo( ) |
| br i1 %c, label %LoopTail, label %IfUnEqual |
| |
| IfUnEqual: ; preds = %Loop |
| %B1 = sdiv i32 4, %A ; <i32> [#uses=1] |
| br label %LoopTail |
| |
| LoopTail: ; preds = %IfUnEqual, %Loop |
| %B = phi i32 [ 0, %Loop ], [ %B1, %IfUnEqual ] ; <i32> [#uses=1] |
| br i1 %c, label %Loop, label %Out |
| Out: ; preds = %LoopTail |
| %C = sub i32 %A, %B ; <i32> [#uses=1] |
| ret i32 %C |
| } |
| |
| |
| declare void @foo2(i32) nounwind |
| |
| |
| ;; It is ok and desirable to hoist this potentially trapping instruction. |
| define i32 @test2(i1 %c) { |
| ; CHECK-LABEL: @test2( |
| ; CHECK-NEXT: [[A:%.*]] = load i32, i32* @X, align 4 |
| ; CHECK-NEXT: [[B:%.*]] = sdiv i32 4, [[A]] |
| ; CHECK-NEXT: br label [[LOOP:%.*]] |
| ; CHECK: Loop: |
| ; CHECK-NEXT: br label [[LOOP2:%.*]] |
| ; CHECK: loop2: |
| ; CHECK-NEXT: call void @foo2(i32 [[B]]) |
| ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[OUT:%.*]] |
| ; CHECK: Out: |
| ; CHECK-NEXT: [[B_LCSSA:%.*]] = phi i32 [ [[B]], [[LOOP2]] ] |
| ; CHECK-NEXT: [[C:%.*]] = sub i32 [[A]], [[B_LCSSA]] |
| ; CHECK-NEXT: ret i32 [[C]] |
| ; |
| %A = load i32, i32* @X |
| br label %Loop |
| |
| Loop: |
| ;; Should have hoisted this div! |
| %B = sdiv i32 4, %A |
| br label %loop2 |
| |
| loop2: |
| call void @foo2( i32 %B ) |
| br i1 %c, label %Loop, label %Out |
| |
| Out: |
| %C = sub i32 %A, %B |
| ret i32 %C |
| } |
| |
| |
| ; This loop invariant instruction should be constant folded, not hoisted. |
| define i32 @test3(i1 %c) { |
| ; CHECK-LABEL: @test3( |
| ; CHECK-NEXT: [[A:%.*]] = load i32, i32* @X, align 4 |
| ; CHECK-NEXT: br label [[LOOP:%.*]] |
| ; CHECK: Loop: |
| ; CHECK-NEXT: call void @foo2(i32 6) |
| ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[OUT:%.*]] |
| ; CHECK: Out: |
| ; CHECK-NEXT: [[B_LCSSA:%.*]] = phi i32 [ 6, [[LOOP]] ] |
| ; CHECK-NEXT: [[C:%.*]] = sub i32 [[A]], [[B_LCSSA]] |
| ; CHECK-NEXT: ret i32 [[C]] |
| ; |
| %A = load i32, i32* @X ; <i32> [#uses=2] |
| br label %Loop |
| Loop: |
| %B = add i32 4, 2 ; <i32> [#uses=2] |
| call void @foo2( i32 %B ) |
| br i1 %c, label %Loop, label %Out |
| Out: ; preds = %Loop |
| %C = sub i32 %A, %B ; <i32> [#uses=1] |
| ret i32 %C |
| } |
| |
| define i32 @test4(i32 %x, i32 %y) nounwind uwtable ssp { |
| ; CHECK-LABEL: @test4( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| ; CHECK: for.body: |
| ; CHECK-NEXT: [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: [[N_01:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: call void @foo_may_call_exit(i32 0) |
| ; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[X:%.*]], [[Y:%.*]] |
| ; CHECK-NEXT: [[ADD]] = add nsw i32 [[N_01]], [[DIV]] |
| ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1 |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 10000 |
| ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] |
| ; CHECK: for.end: |
| ; CHECK-NEXT: [[N_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: ret i32 [[N_0_LCSSA]] |
| ; |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %entry, %for.body |
| %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| %n.01 = phi i32 [ 0, %entry ], [ %add, %for.body ] |
| call void @foo_may_call_exit(i32 0) |
| %div = sdiv i32 %x, %y |
| %add = add nsw i32 %n.01, %div |
| %inc = add nsw i32 %i.02, 1 |
| %cmp = icmp slt i32 %inc, 10000 |
| br i1 %cmp, label %for.body, label %for.end |
| |
| for.end: ; preds = %for.body |
| %n.0.lcssa = phi i32 [ %add, %for.body ] |
| ret i32 %n.0.lcssa |
| } |
| |
| declare void @foo_may_call_exit(i32) |
| |
| ; PR14854 |
| define { i32*, i32 } @test5(i32 %i, { i32*, i32 } %e) { |
| ; CHECK-LABEL: @test5( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[OUT:%.*]] = extractvalue { i32*, i32 } [[E:%.*]], 1 |
| ; CHECK-NEXT: br label [[TAILRECURSE:%.*]] |
| ; CHECK: tailrecurse: |
| ; CHECK-NEXT: [[I_TR:%.*]] = phi i32 [ [[I:%.*]], [[ENTRY:%.*]] ], [ [[CMP2:%.*]], [[THEN:%.*]] ] |
| ; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[OUT]], [[I_TR]] |
| ; CHECK-NEXT: br i1 [[CMP1]], label [[THEN]], label [[IFEND:%.*]] |
| ; CHECK: then: |
| ; CHECK-NEXT: call void @foo() |
| ; CHECK-NEXT: [[CMP2]] = add i32 [[I_TR]], 1 |
| ; CHECK-NEXT: br label [[TAILRECURSE]] |
| ; CHECK: ifend: |
| ; CHECK-NEXT: [[D_LE:%.*]] = insertvalue { i32*, i32 } [[E]], i32* null, 0 |
| ; CHECK-NEXT: ret { i32*, i32 } [[D_LE]] |
| ; |
| entry: |
| br label %tailrecurse |
| |
| tailrecurse: ; preds = %then, %entry |
| %i.tr = phi i32 [ %i, %entry ], [ %cmp2, %then ] |
| %out = extractvalue { i32*, i32 } %e, 1 |
| %d = insertvalue { i32*, i32 } %e, i32* null, 0 |
| %cmp1 = icmp sgt i32 %out, %i.tr |
| br i1 %cmp1, label %then, label %ifend |
| |
| then: ; preds = %tailrecurse |
| call void @foo() |
| %cmp2 = add i32 %i.tr, 1 |
| br label %tailrecurse |
| |
| ifend: ; preds = %tailrecurse |
| ret { i32*, i32 } %d |
| } |
| |
| define void @test6(float %f) #2 { |
| ; CHECK-LABEL: @test6( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[NEG:%.*]] = fneg float [[F:%.*]] |
| ; CHECK-NEXT: br label [[FOR_BODY:%.*]] |
| ; CHECK: for.body: |
| ; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] |
| ; CHECK-NEXT: call void @foo_may_call_exit(i32 0) |
| ; CHECK-NEXT: call void @use(float [[NEG]]) |
| ; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], 10000 |
| ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] |
| ; CHECK: for.end: |
| ; CHECK-NEXT: ret void |
| ; |
| entry: |
| br label %for.body |
| |
| for.body: ; preds = %for.body, %entry |
| %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] |
| call void @foo_may_call_exit(i32 0) |
| %neg = fneg float %f |
| call void @use(float %neg) |
| %inc = add nsw i32 %i, 1 |
| %cmp = icmp slt i32 %inc, 10000 |
| br i1 %cmp, label %for.body, label %for.end |
| |
| for.end: ; preds = %for.body |
| ret void |
| } |
| |
| declare void @use(float) |
| |
| define i32 @hoist_bitreverse(i32 %0) { |
| ; CHECK-LABEL: @hoist_bitreverse( |
| ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[TMP0:%.*]]) |
| ; CHECK-NEXT: br label [[HEADER:%.*]] |
| ; CHECK: header: |
| ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[TMP1:%.*]] ], [ [[TMP5:%.*]], [[LATCH:%.*]] ] |
| ; CHECK-NEXT: [[TMP3:%.*]] = phi i32 [ 0, [[TMP1]] ], [ [[TMP6:%.*]], [[LATCH]] ] |
| ; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1024 |
| ; CHECK-NEXT: br i1 [[TMP4]], label [[BODY:%.*]], label [[RETURN:%.*]] |
| ; CHECK: body: |
| ; CHECK-NEXT: [[TMP5]] = add i32 [[SUM]], [[TMP2]] |
| ; CHECK-NEXT: br label [[LATCH]] |
| ; CHECK: latch: |
| ; CHECK-NEXT: [[TMP6]] = add nsw i32 [[TMP3]], 1 |
| ; CHECK-NEXT: br label [[HEADER]] |
| ; CHECK: return: |
| ; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[HEADER]] ] |
| ; CHECK-NEXT: ret i32 [[SUM_LCSSA]] |
| ; |
| br label %header |
| |
| header: |
| %sum = phi i32 [ 0, %1 ], [ %5, %latch ] |
| %2 = phi i32 [ 0, %1 ], [ %6, %latch ] |
| %3 = icmp slt i32 %2, 1024 |
| br i1 %3, label %body, label %return |
| |
| body: |
| %4 = call i32 @llvm.bitreverse.i32(i32 %0) |
| %5 = add i32 %sum, %4 |
| br label %latch |
| |
| latch: |
| %6 = add nsw i32 %2, 1 |
| br label %header |
| |
| return: |
| ret i32 %sum |
| } |
| |
| ; Can neither sink nor hoist |
| define i32 @test_volatile(i1 %c) { |
| ; CHECK-LABEL: @test_volatile( |
| ; CHECK-NEXT: br label [[LOOP:%.*]] |
| ; CHECK: Loop: |
| ; CHECK-NEXT: [[A:%.*]] = load volatile i32, i32* @X, align 4 |
| ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP]], label [[OUT:%.*]] |
| ; CHECK: Out: |
| ; CHECK-NEXT: [[A_LCSSA:%.*]] = phi i32 [ [[A]], [[LOOP]] ] |
| ; CHECK-NEXT: ret i32 [[A_LCSSA]] |
| ; |
| br label %Loop |
| |
| Loop: |
| %A = load volatile i32, i32* @X |
| br i1 %c, label %Loop, label %Out |
| |
| Out: |
| ret i32 %A |
| } |
| |
| |
| declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture) nounwind readonly |
| declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture) nounwind |
| declare void @escaping.invariant.start({}*) nounwind |
| ; invariant.start dominates the load, and in this scope, the |
| ; load is invariant. So, we can hoist the `addrld` load out of the loop. |
| define i32 @test_fence(i8* %addr, i32 %n, i8* %volatile) { |
| ; CHECK-LABEL: @test_fence( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, i8* [[ADDR:%.*]], i64 8 |
| ; CHECK-NEXT: [[ADDR_I:%.*]] = bitcast i8* [[GEP]] to i32* |
| ; CHECK-NEXT: store atomic i32 5, i32* [[ADDR_I]] unordered, align 8 |
| ; CHECK-NEXT: fence release |
| ; CHECK-NEXT: [[INVST:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 4, i8* [[GEP]]) |
| ; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, i32* [[ADDR_I]] unordered, align 8 |
| ; CHECK-NEXT: br label [[LOOP:%.*]] |
| ; CHECK: loop: |
| ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] |
| ; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, i8* [[VOLATILE:%.*]] unordered, align 8 |
| ; CHECK-NEXT: fence acquire |
| ; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0 |
| ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]] |
| ; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]] |
| ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 |
| ; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]] |
| ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]] |
| ; CHECK: loopexit: |
| ; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ] |
| ; CHECK-NEXT: ret i32 [[SUM_LCSSA]] |
| ; |
| entry: |
| %gep = getelementptr inbounds i8, i8* %addr, i64 8 |
| %addr.i = bitcast i8* %gep to i32 * |
| store atomic i32 5, i32 * %addr.i unordered, align 8 |
| fence release |
| %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) |
| br label %loop |
| |
| loop: |
| %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] |
| %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] |
| %volload = load atomic i8, i8* %volatile unordered, align 8 |
| fence acquire |
| %volchk = icmp eq i8 %volload, 0 |
| %addrld = load atomic i32, i32* %addr.i unordered, align 8 |
| %sel = select i1 %volchk, i32 0, i32 %addrld |
| %sum.next = add i32 %sel, %sum |
| %indvar.next = add i32 %indvar, 1 |
| %cond = icmp slt i32 %indvar.next, %n |
| br i1 %cond, label %loop, label %loopexit |
| |
| loopexit: |
| ret i32 %sum |
| } |
| |
| |
| |
| ; Same as test above, but the load is no longer invariant (presence of |
| ; invariant.end). We cannot hoist the addrld out of loop. |
| define i32 @test_fence1(i8* %addr, i32 %n, i8* %volatile) { |
| ; CHECK-LABEL: @test_fence1( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, i8* [[ADDR:%.*]], i64 8 |
| ; CHECK-NEXT: [[ADDR_I:%.*]] = bitcast i8* [[GEP]] to i32* |
| ; CHECK-NEXT: store atomic i32 5, i32* [[ADDR_I]] unordered, align 8 |
| ; CHECK-NEXT: fence release |
| ; CHECK-NEXT: [[INVST:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 4, i8* [[GEP]]) |
| ; CHECK-NEXT: call void @llvm.invariant.end.p0i8({}* [[INVST]], i64 4, i8* [[GEP]]) |
| ; CHECK-NEXT: br label [[LOOP:%.*]] |
| ; CHECK: loop: |
| ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] |
| ; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, i8* [[VOLATILE:%.*]] unordered, align 8 |
| ; CHECK-NEXT: fence acquire |
| ; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0 |
| ; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, i32* [[ADDR_I]] unordered, align 8 |
| ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]] |
| ; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]] |
| ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 |
| ; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]] |
| ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]] |
| ; CHECK: loopexit: |
| ; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ] |
| ; CHECK-NEXT: ret i32 [[SUM_LCSSA]] |
| ; |
| entry: |
| %gep = getelementptr inbounds i8, i8* %addr, i64 8 |
| %addr.i = bitcast i8* %gep to i32 * |
| store atomic i32 5, i32 * %addr.i unordered, align 8 |
| fence release |
| %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) |
| call void @llvm.invariant.end.p0i8({}* %invst, i64 4, i8* %gep) |
| br label %loop |
| |
| loop: |
| %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] |
| %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] |
| %volload = load atomic i8, i8* %volatile unordered, align 8 |
| fence acquire |
| %volchk = icmp eq i8 %volload, 0 |
| %addrld = load atomic i32, i32* %addr.i unordered, align 8 |
| %sel = select i1 %volchk, i32 0, i32 %addrld |
| %sum.next = add i32 %sel, %sum |
| %indvar.next = add i32 %indvar, 1 |
| %cond = icmp slt i32 %indvar.next, %n |
| br i1 %cond, label %loop, label %loopexit |
| |
| loopexit: |
| ret i32 %sum |
| } |
| |
| ; same as test above, but instead of invariant.end, we have the result of |
| ; invariant.start escaping through a call. We cannot hoist the load. |
| define i32 @test_fence2(i8* %addr, i32 %n, i8* %volatile) { |
| ; CHECK-LABEL: @test_fence2( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, i8* [[ADDR:%.*]], i64 8 |
| ; CHECK-NEXT: [[ADDR_I:%.*]] = bitcast i8* [[GEP]] to i32* |
| ; CHECK-NEXT: store atomic i32 5, i32* [[ADDR_I]] unordered, align 8 |
| ; CHECK-NEXT: fence release |
| ; CHECK-NEXT: [[INVST:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 4, i8* [[GEP]]) |
| ; CHECK-NEXT: call void @escaping.invariant.start({}* [[INVST]]) |
| ; CHECK-NEXT: br label [[LOOP:%.*]] |
| ; CHECK: loop: |
| ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] |
| ; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, i8* [[VOLATILE:%.*]] unordered, align 8 |
| ; CHECK-NEXT: fence acquire |
| ; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0 |
| ; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, i32* [[ADDR_I]] unordered, align 8 |
| ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]] |
| ; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]] |
| ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 |
| ; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]] |
| ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]] |
| ; CHECK: loopexit: |
| ; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ] |
| ; CHECK-NEXT: ret i32 [[SUM_LCSSA]] |
| ; |
| entry: |
| %gep = getelementptr inbounds i8, i8* %addr, i64 8 |
| %addr.i = bitcast i8* %gep to i32 * |
| store atomic i32 5, i32 * %addr.i unordered, align 8 |
| fence release |
| %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) |
| call void @escaping.invariant.start({}* %invst) |
| br label %loop |
| |
| loop: |
| %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] |
| %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] |
| %volload = load atomic i8, i8* %volatile unordered, align 8 |
| fence acquire |
| %volchk = icmp eq i8 %volload, 0 |
| %addrld = load atomic i32, i32* %addr.i unordered, align 8 |
| %sel = select i1 %volchk, i32 0, i32 %addrld |
| %sum.next = add i32 %sel, %sum |
| %indvar.next = add i32 %indvar, 1 |
| %cond = icmp slt i32 %indvar.next, %n |
| br i1 %cond, label %loop, label %loopexit |
| |
| loopexit: |
| ret i32 %sum |
| } |
| |
| ; FIXME: invariant.start dominates the load, and in this scope, the |
| ; load is invariant. So, we can hoist the `addrld` load out of the loop. |
| ; Consider the loadoperand addr.i bitcasted before being passed to |
| ; invariant.start |
| define i32 @test_fence3(i32* %addr, i32 %n, i8* %volatile) { |
| ; CHECK-LABEL: @test_fence3( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[ADDR_I:%.*]] = getelementptr inbounds i32, i32* [[ADDR:%.*]], i64 8 |
| ; CHECK-NEXT: [[GEP:%.*]] = bitcast i32* [[ADDR_I]] to i8* |
| ; CHECK-NEXT: store atomic i32 5, i32* [[ADDR_I]] unordered, align 8 |
| ; CHECK-NEXT: fence release |
| ; CHECK-NEXT: [[INVST:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 4, i8* [[GEP]]) |
| ; CHECK-NEXT: br label [[LOOP:%.*]] |
| ; CHECK: loop: |
| ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] |
| ; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, i8* [[VOLATILE:%.*]] unordered, align 8 |
| ; CHECK-NEXT: fence acquire |
| ; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0 |
| ; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, i32* [[ADDR_I]] unordered, align 8 |
| ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]] |
| ; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]] |
| ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 |
| ; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]] |
| ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]] |
| ; CHECK: loopexit: |
| ; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ] |
| ; CHECK-NEXT: ret i32 [[SUM_LCSSA]] |
| ; |
| entry: |
| %addr.i = getelementptr inbounds i32, i32* %addr, i64 8 |
| %gep = bitcast i32* %addr.i to i8 * |
| store atomic i32 5, i32 * %addr.i unordered, align 8 |
| fence release |
| %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) |
| br label %loop |
| |
| loop: |
| %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] |
| %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] |
| %volload = load atomic i8, i8* %volatile unordered, align 8 |
| fence acquire |
| %volchk = icmp eq i8 %volload, 0 |
| %addrld = load atomic i32, i32* %addr.i unordered, align 8 |
| %sel = select i1 %volchk, i32 0, i32 %addrld |
| %sum.next = add i32 %sel, %sum |
| %indvar.next = add i32 %indvar, 1 |
| %cond = icmp slt i32 %indvar.next, %n |
| br i1 %cond, label %loop, label %loopexit |
| |
| loopexit: |
| ret i32 %sum |
| } |
| |
| ; We should not hoist the addrld out of the loop. |
| define i32 @test_fence4(i32* %addr, i32 %n, i8* %volatile) { |
| ; CHECK-LABEL: @test_fence4( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[ADDR_I:%.*]] = getelementptr inbounds i32, i32* [[ADDR:%.*]], i64 8 |
| ; CHECK-NEXT: [[GEP:%.*]] = bitcast i32* [[ADDR_I]] to i8* |
| ; CHECK-NEXT: br label [[LOOP:%.*]] |
| ; CHECK: loop: |
| ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] |
| ; CHECK-NEXT: store atomic i32 5, i32* [[ADDR_I]] unordered, align 8 |
| ; CHECK-NEXT: fence release |
| ; CHECK-NEXT: [[INVST:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 4, i8* [[GEP]]) |
| ; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, i8* [[VOLATILE:%.*]] unordered, align 8 |
| ; CHECK-NEXT: fence acquire |
| ; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0 |
| ; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, i32* [[ADDR_I]] unordered, align 8 |
| ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]] |
| ; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]] |
| ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 |
| ; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]] |
| ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]] |
| ; CHECK: loopexit: |
| ; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ] |
| ; CHECK-NEXT: ret i32 [[SUM_LCSSA]] |
| ; |
| entry: |
| %addr.i = getelementptr inbounds i32, i32* %addr, i64 8 |
| %gep = bitcast i32* %addr.i to i8 * |
| br label %loop |
| |
| loop: |
| %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] |
| %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] |
| store atomic i32 5, i32 * %addr.i unordered, align 8 |
| fence release |
| %invst = call {}* @llvm.invariant.start.p0i8(i64 4, i8* %gep) |
| %volload = load atomic i8, i8* %volatile unordered, align 8 |
| fence acquire |
| %volchk = icmp eq i8 %volload, 0 |
| %addrld = load atomic i32, i32* %addr.i unordered, align 8 |
| %sel = select i1 %volchk, i32 0, i32 %addrld |
| %sum.next = add i32 %sel, %sum |
| %indvar.next = add i32 %indvar, 1 |
| %cond = icmp slt i32 %indvar.next, %n |
| br i1 %cond, label %loop, label %loopexit |
| |
| loopexit: |
| ret i32 %sum |
| } |
| |
| ; We can't hoist the invariant load out of the loop because |
| ; the marker is given a variable size (-1). |
| define i32 @test_fence5(i8* %addr, i32 %n, i8* %volatile) { |
| ; CHECK-LABEL: @test_fence5( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, i8* [[ADDR:%.*]], i64 8 |
| ; CHECK-NEXT: [[ADDR_I:%.*]] = bitcast i8* [[GEP]] to i32* |
| ; CHECK-NEXT: store atomic i32 5, i32* [[ADDR_I]] unordered, align 8 |
| ; CHECK-NEXT: fence release |
| ; CHECK-NEXT: [[INVST:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 -1, i8* [[GEP]]) |
| ; CHECK-NEXT: br label [[LOOP:%.*]] |
| ; CHECK: loop: |
| ; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ [[SUM_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ] |
| ; CHECK-NEXT: [[VOLLOAD:%.*]] = load atomic i8, i8* [[VOLATILE:%.*]] unordered, align 8 |
| ; CHECK-NEXT: fence acquire |
| ; CHECK-NEXT: [[VOLCHK:%.*]] = icmp eq i8 [[VOLLOAD]], 0 |
| ; CHECK-NEXT: [[ADDRLD:%.*]] = load atomic i32, i32* [[ADDR_I]] unordered, align 8 |
| ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[VOLCHK]], i32 0, i32 [[ADDRLD]] |
| ; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SEL]], [[SUM]] |
| ; CHECK-NEXT: [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1 |
| ; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[INDVAR_NEXT]], [[N:%.*]] |
| ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[LOOPEXIT:%.*]] |
| ; CHECK: loopexit: |
| ; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[SUM]], [[LOOP]] ] |
| ; CHECK-NEXT: ret i32 [[SUM_LCSSA]] |
| ; |
| entry: |
| %gep = getelementptr inbounds i8, i8* %addr, i64 8 |
| %addr.i = bitcast i8* %gep to i32 * |
| store atomic i32 5, i32 * %addr.i unordered, align 8 |
| fence release |
| %invst = call {}* @llvm.invariant.start.p0i8(i64 -1, i8* %gep) |
| br label %loop |
| |
| loop: |
| %indvar = phi i32 [ %indvar.next, %loop ], [ 0, %entry ] |
| %sum = phi i32 [ %sum.next, %loop ], [ 0, %entry ] |
| %volload = load atomic i8, i8* %volatile unordered, align 8 |
| fence acquire |
| %volchk = icmp eq i8 %volload, 0 |
| %addrld = load atomic i32, i32* %addr.i unordered, align 8 |
| %sel = select i1 %volchk, i32 0, i32 %addrld |
| %sum.next = add i32 %sel, %sum |
| %indvar.next = add i32 %indvar, 1 |
| %cond = icmp slt i32 %indvar.next, %n |
| br i1 %cond, label %loop, label %loopexit |
| |
| loopexit: |
| ret i32 %sum |
| } |
| |
| declare void @g(i1) |
| |
| @a = external global i8 |
| |
| ; FIXME: Support hoisting invariant loads of globals. |
| define void @test_fence6() { |
| ; CHECK-LABEL: @test_fence6( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[I:%.*]] = call {}* @llvm.invariant.start.p0i8(i64 1, i8* @a) |
| ; CHECK-NEXT: br label [[F:%.*]] |
| ; CHECK: f: |
| ; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* @a, align 1 |
| ; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[TMP0]], 0 |
| ; CHECK-NEXT: [[T:%.*]] = icmp eq i8 [[TMP1]], 0 |
| ; CHECK-NEXT: tail call void @g(i1 [[T]]) |
| ; CHECK-NEXT: br label [[F]] |
| ; |
| entry: |
| %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* @a) |
| br label %f |
| |
| f: |
| %0 = load i8, i8* @a |
| %1 = and i8 %0, 0 |
| %t = icmp eq i8 %1, 0 |
| tail call void @g(i1 %t) |
| br label %f |
| } |