| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 |
| ; RUN: opt -S < %s -p loop-vectorize -force-vector-width=4 | FileCheck %s |
| ; |
| @A = global [64 x i8] zeroinitializer |
| @B = global [64 x i8] zeroinitializer |
| @C = global [64 x i8] zeroinitializer |
| @D = global [64 x i8] zeroinitializer |
| |
| define i64 @diamond_with_2_early_exits() { |
| ; CHECK-LABEL: define i64 @diamond_with_2_early_exits() { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] |
| ; CHECK: [[LOOP_HEADER]]: |
| ; CHECK-NEXT: br label %[[BLOCK_A:.*]] |
| ; CHECK: [[BLOCK_A]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 |
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer |
| ; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true) |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 |
| ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1 |
| ; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i8> [[WIDE_LOAD2]] to <4 x i64> |
| ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 |
| ; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> splat (i1 true), <4 x i1> [[TMP9]] |
| ; CHECK-NEXT: [[TMP11:%.*]] = freeze <4 x i1> [[TMP10]] |
| ; CHECK-NEXT: [[CMP_A:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP11]]) |
| ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 |
| ; CHECK-NEXT: br i1 [[CMP_A]], label %[[LOOP_END:.*]], label %[[LOOP_LATCH]] |
| ; CHECK: [[LOOP_LATCH]]: |
| ; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[BLOCK_A]], !llvm.loop [[LOOP0:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1:.*]] |
| ; CHECK: [[LOOP_END]]: |
| ; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP10]], i1 false) |
| ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP5]], i64 [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: br i1 [[TMP14]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_1]]: |
| ; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i64> [[TMP7]], i64 [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: br label %[[LOOP_END1]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_0]]: |
| ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: br label %[[LOOP_END1]] |
| ; CHECK: [[LOOP_END1]]: |
| ; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP16]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP15]], %[[VECTOR_EARLY_EXIT_1]] ], [ 0, %[[MIDDLE_BLOCK]] ] |
| ; CHECK-NEXT: ret i64 [[RETVAL]] |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ] |
| %gep.A = getelementptr inbounds i8, ptr @A, i64 %iv |
| %l.A = load i8, ptr %gep.A, align 1 |
| %branch.cond = icmp slt i8 %l.A, 0 |
| br i1 %branch.cond, label %block.a, label %block.b |
| |
| block.a: |
| %gep.B = getelementptr inbounds i8, ptr @B, i64 %iv |
| %l.B = load i8, ptr %gep.B, align 1 |
| %ext = zext i8 %l.B to i64 |
| %cmp.a = icmp eq i8 %l.A, %l.B |
| br i1 %cmp.a, label %loop.end, label %loop.latch |
| |
| block.b: |
| %gep.C = getelementptr inbounds i8, ptr @C, i64 %iv |
| %l.C = load i8, ptr %gep.C, align 1 |
| %cmp.b = icmp eq i8 %l.A, %l.C |
| br i1 %cmp.b, label %loop.end, label %loop.latch |
| |
| loop.latch: |
| %iv.next = add i64 %iv, 1 |
| %exitcond = icmp ne i64 %iv.next, 64 |
| br i1 %exitcond, label %loop.header, label %loop.end |
| |
| loop.end: |
| %retval = phi i64 [ %ext, %block.a ], [ %iv, %block.b ], [ 0, %loop.latch ] |
| ret i64 %retval |
| } |
| |
| define i64 @three_early_exits() { |
| ; CHECK-LABEL: define i64 @three_early_exits() { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] |
| ; CHECK: [[LOOP_HEADER]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[CHECK_B:.*]] ] |
| ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[GEP_A]], align 1 |
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], splat (i8 -42) |
| ; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true) |
| ; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], splat (i8 42) |
| ; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) |
| ; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr @D, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP6]], align 1 |
| ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1 |
| ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] |
| ; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1 |
| ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]] |
| ; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 |
| ; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP8]], <4 x i1> splat (i1 true), <4 x i1> [[TMP12]] |
| ; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> splat (i1 true), <4 x i1> [[TMP15]] |
| ; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP17]] |
| ; CHECK-NEXT: [[COND_A:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP18]]) |
| ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 |
| ; CHECK-NEXT: br i1 [[COND_A]], label %[[BLOCK_A:.*]], label %[[CHECK_B]] |
| ; CHECK: [[CHECK_B]]: |
| ; CHECK-NEXT: br i1 [[TMP20]], label %[[BLOCK_B:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] |
| ; CHECK: [[BLOCK_B]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1:.*]] |
| ; CHECK: [[BLOCK_A]]: |
| ; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 false) |
| ; CHECK-NEXT: [[CMP_B:%.*]] = extractelement <4 x i1> [[TMP8]], i64 [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: br i1 [[CMP_B]], label %[[LOOP_END:.*]], label %[[LOOP_LATCH:.*]] |
| ; CHECK: [[LOOP_LATCH]]: |
| ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP12]], i64 [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: br i1 [[TMP22]], label %[[VECTOR_EARLY_EXIT_1:.*]], label %[[VECTOR_EARLY_EXIT_2:.*]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_2]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_1]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1]] |
| ; CHECK: [[LOOP_END]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1]] |
| ; CHECK: [[LOOP_END1]]: |
| ; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 3, %[[LOOP_END]] ], [ 2, %[[VECTOR_EARLY_EXIT_1]] ], [ 1, %[[VECTOR_EARLY_EXIT_2]] ], [ 0, %[[BLOCK_B]] ] |
| ; CHECK-NEXT: ret i64 [[RETVAL]] |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ] |
| %gep.A = getelementptr inbounds i8, ptr @A, i64 %iv |
| %l.A = load i8, ptr %gep.A, align 1 |
| %cond.a = icmp slt i8 %l.A, -42 |
| br i1 %cond.a, label %block.a, label %check.b |
| |
| check.b: |
| %cond.b = icmp slt i8 %l.A, 42 |
| br i1 %cond.b, label %block.b, label %block.c |
| |
| block.a: |
| %gep.B = getelementptr inbounds i8, ptr @B, i64 %iv |
| %l.B = load i8, ptr %gep.B, align 1 |
| %cmp.a = icmp eq i8 %l.A, %l.B |
| br i1 %cmp.a, label %loop.end, label %loop.latch |
| |
| block.b: |
| %gep.C = getelementptr inbounds i8, ptr @C, i64 %iv |
| %l.C = load i8, ptr %gep.C, align 1 |
| %cmp.b = icmp eq i8 %l.A, %l.C |
| br i1 %cmp.b, label %loop.end, label %loop.latch |
| |
| block.c: |
| %gep.D = getelementptr inbounds i8, ptr @D, i64 %iv |
| %l.D = load i8, ptr %gep.D, align 1 |
| %cmp.c = icmp eq i8 %l.A, %l.D |
| br i1 %cmp.c, label %loop.end, label %loop.latch |
| |
| loop.latch: |
| %iv.next = add i64 %iv, 1 |
| %exitcond = icmp ne i64 %iv.next, 64 |
| br i1 %exitcond, label %loop.header, label %loop.end |
| |
| loop.end: |
| %retval = phi i64 [ 1, %block.a ], [ 2, %block.b ], [ 3, %block.c ], [ 0, %loop.latch ] |
| ret i64 %retval |
| } |
| |
| define i64 @nested_diamond_inner_exits() { |
| ; CHECK-LABEL: define i64 @nested_diamond_inner_exits() { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] |
| ; CHECK: [[LOOP_HEADER]]: |
| ; CHECK-NEXT: br label %[[BLOCK_A:.*]] |
| ; CHECK: [[BLOCK_A]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[BLOCK_A2:.*]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 |
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer |
| ; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true) |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr @D, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 |
| ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1 |
| ; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD2]], zeroinitializer |
| ; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) |
| ; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1 |
| ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]] |
| ; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP9]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] |
| ; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP13]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 |
| ; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> splat (i1 true), <4 x i1> [[TMP12]] |
| ; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> splat (i1 true), <4 x i1> [[TMP15]] |
| ; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP17]] |
| ; CHECK-NEXT: [[INNER_COND:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP18]]) |
| ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 |
| ; CHECK-NEXT: br i1 [[INNER_COND]], label %[[BLOCK_A1:.*]], label %[[BLOCK_A2]] |
| ; CHECK: [[BLOCK_A2]]: |
| ; CHECK-NEXT: br i1 [[TMP20]], label %[[MIDDLE_BLOCK:.*]], label %[[BLOCK_A]], !llvm.loop [[LOOP4:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: br label %[[LOOP_LATCH:.*]] |
| ; CHECK: [[BLOCK_A1]]: |
| ; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 false) |
| ; CHECK-NEXT: [[CMP_A1:%.*]] = extractelement <4 x i1> [[TMP5]], i64 [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: br i1 [[CMP_A1]], label %[[LOOP_END:.*]], label %[[JOIN_A:.*]] |
| ; CHECK: [[JOIN_A]]: |
| ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP12]], i64 [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: br i1 [[TMP22]], label %[[BLOCK_B:.*]], label %[[VECTOR_EARLY_EXIT_2:.*]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_2]]: |
| ; CHECK-NEXT: br label %[[LOOP_LATCH]] |
| ; CHECK: [[BLOCK_B]]: |
| ; CHECK-NEXT: br label %[[LOOP_LATCH]] |
| ; CHECK: [[LOOP_END]]: |
| ; CHECK-NEXT: br label %[[LOOP_LATCH]] |
| ; CHECK: [[LOOP_LATCH]]: |
| ; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 3, %[[LOOP_END]] ], [ 2, %[[BLOCK_B]] ], [ 1, %[[VECTOR_EARLY_EXIT_2]] ], [ 0, %[[MIDDLE_BLOCK]] ] |
| ; CHECK-NEXT: ret i64 [[RETVAL]] |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ] |
| %gep.A = getelementptr inbounds i8, ptr @A, i64 %iv |
| %l.A = load i8, ptr %gep.A, align 1 |
| %outer.cond = icmp slt i8 %l.A, 0 |
| br i1 %outer.cond, label %block.a, label %block.b |
| |
| block.a: |
| %gep.B = getelementptr inbounds i8, ptr @B, i64 %iv |
| %l.B = load i8, ptr %gep.B, align 1 |
| %inner.cond = icmp slt i8 %l.B, 0 |
| br i1 %inner.cond, label %block.a1, label %block.a2 |
| |
| block.a1: |
| %cmp.a1 = icmp eq i8 %l.A, %l.B |
| br i1 %cmp.a1, label %loop.end, label %join.a |
| |
| block.a2: |
| %gep.C = getelementptr inbounds i8, ptr @C, i64 %iv |
| %l.C = load i8, ptr %gep.C, align 1 |
| %cmp.a2 = icmp eq i8 %l.A, %l.C |
| br i1 %cmp.a2, label %loop.end, label %join.a |
| |
| join.a: |
| br label %loop.latch |
| |
| block.b: |
| %gep.D = getelementptr inbounds i8, ptr @D, i64 %iv |
| %l.D = load i8, ptr %gep.D, align 1 |
| %cmp.b = icmp eq i8 %l.A, %l.D |
| br i1 %cmp.b, label %loop.end, label %loop.latch |
| |
| loop.latch: |
| %iv.next = add i64 %iv, 1 |
| %exitcond = icmp ne i64 %iv.next, 64 |
| br i1 %exitcond, label %loop.header, label %loop.end |
| |
| loop.end: |
| %retval = phi i64 [ 1, %block.a1 ], [ 2, %block.a2 ], [ 3, %block.b ], [ 0, %loop.latch ] |
| ret i64 %retval |
| } |
| |
| define i64 @chain_of_3_exits() { |
| ; CHECK-LABEL: define i64 @chain_of_3_exits() { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] |
| ; CHECK: [[LOOP_HEADER]]: |
| ; CHECK-NEXT: br label %[[BLOCK_A:.*]] |
| ; CHECK: [[BLOCK_A]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[BLOCK_C:.*]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 |
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer |
| ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_C]], align 1 |
| ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] |
| ; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP6]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr @D, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP8]], align 1 |
| ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]] |
| ; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 |
| ; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> splat (i1 true), <4 x i1> [[TMP7]] |
| ; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP11]], <4 x i1> splat (i1 true), <4 x i1> [[TMP10]] |
| ; CHECK-NEXT: [[TMP13:%.*]] = freeze <4 x i1> [[TMP12]] |
| ; CHECK-NEXT: [[CMP_B:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]]) |
| ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 |
| ; CHECK-NEXT: br i1 [[CMP_B]], label %[[LOOP_END:.*]], label %[[BLOCK_C]] |
| ; CHECK: [[BLOCK_C]]: |
| ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[BLOCK_A]], !llvm.loop [[LOOP5:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1:.*]] |
| ; CHECK: [[LOOP_END]]: |
| ; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP12]], i1 false) |
| ; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP4]], i64 [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: br i1 [[TMP16]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[LOOP_LATCH:.*]] |
| ; CHECK: [[LOOP_LATCH]]: |
| ; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP7]], i64 [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: br i1 [[TMP17]], label %[[VECTOR_EARLY_EXIT_1:.*]], label %[[VECTOR_EARLY_EXIT_2:.*]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_2]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_1]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_0]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1]] |
| ; CHECK: [[LOOP_END1]]: |
| ; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 3, %[[VECTOR_EARLY_EXIT_2]] ], [ 2, %[[VECTOR_EARLY_EXIT_1]] ], [ 1, %[[VECTOR_EARLY_EXIT_0]] ], [ 0, %[[MIDDLE_BLOCK]] ] |
| ; CHECK-NEXT: ret i64 [[RETVAL]] |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ] |
| %gep.A = getelementptr inbounds i8, ptr @A, i64 %iv |
| %l.A = load i8, ptr %gep.A, align 1 |
| %cond.a = icmp slt i8 %l.A, 0 |
| br i1 %cond.a, label %block.a, label %loop.latch |
| |
| block.a: |
| %gep.B = getelementptr inbounds i8, ptr @B, i64 %iv |
| %l.B = load i8, ptr %gep.B, align 1 |
| %cmp.a = icmp eq i8 %l.A, %l.B |
| br i1 %cmp.a, label %loop.end, label %block.b |
| |
| block.b: |
| %gep.C = getelementptr inbounds i8, ptr @C, i64 %iv |
| %l.C = load i8, ptr %gep.C, align 1 |
| %cmp.b = icmp eq i8 %l.A, %l.C |
| br i1 %cmp.b, label %loop.end, label %block.c |
| |
| block.c: |
| %gep.D = getelementptr inbounds i8, ptr @D, i64 %iv |
| %l.D = load i8, ptr %gep.D, align 1 |
| %cmp.c = icmp eq i8 %l.A, %l.D |
| br i1 %cmp.c, label %loop.end, label %loop.latch |
| |
| loop.latch: |
| %iv.next = add i64 %iv, 1 |
| %exitcond = icmp ne i64 %iv.next, 64 |
| br i1 %exitcond, label %loop.header, label %loop.end |
| |
| loop.end: |
| %retval = phi i64 [ 1, %block.a ], [ 2, %block.b ], [ 3, %block.c ], [ 0, %loop.latch ] |
| ret i64 %retval |
| } |
| |
| define i64 @four_exits_2x2_diamond() { |
| ; CHECK-LABEL: define i64 @four_exits_2x2_diamond() { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] |
| ; CHECK: [[LOOP_HEADER]]: |
| ; CHECK-NEXT: br label %[[BRANCH1_A:.*]] |
| ; CHECK: [[BRANCH1_A]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[BRANCH2:.*]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 |
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer |
| ; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], splat (i1 true) |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 |
| ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1 |
| ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr @D, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP9]], align 1 |
| ; CHECK-NEXT: [[TMP10:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD3]], zeroinitializer |
| ; CHECK-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true) |
| ; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]] |
| ; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i1> [[TMP12]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD3]] |
| ; CHECK-NEXT: [[TMP15:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP14]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 |
| ; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> splat (i1 true), <4 x i1> [[TMP8]] |
| ; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> splat (i1 true), <4 x i1> [[TMP13]] |
| ; CHECK-NEXT: [[TMP18:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> splat (i1 true), <4 x i1> [[TMP15]] |
| ; CHECK-NEXT: [[TMP19:%.*]] = freeze <4 x i1> [[TMP18]] |
| ; CHECK-NEXT: [[CMP1A:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP19]]) |
| ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 |
| ; CHECK-NEXT: br i1 [[CMP1A]], label %[[LOOP_END:.*]], label %[[BRANCH2]] |
| ; CHECK: [[BRANCH2]]: |
| ; CHECK-NEXT: br i1 [[TMP21]], label %[[BRANCH2_A:.*]], label %[[BRANCH1_A]], !llvm.loop [[LOOP6:![0-9]+]] |
| ; CHECK: [[BRANCH2_A]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1:.*]] |
| ; CHECK: [[LOOP_END]]: |
| ; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP18]], i1 false) |
| ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP5]], i64 [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: br i1 [[TMP22]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[BRANCH2_B:.*]] |
| ; CHECK: [[BRANCH2_B]]: |
| ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP8]], i64 [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: br i1 [[TMP23]], label %[[VECTOR_EARLY_EXIT_1:.*]], label %[[LOOP_LATCH:.*]] |
| ; CHECK: [[LOOP_LATCH]]: |
| ; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP13]], i64 [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: br i1 [[TMP24]], label %[[VECTOR_EARLY_EXIT_2:.*]], label %[[VECTOR_EARLY_EXIT_3:.*]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_3]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_2]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_1]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_0]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1]] |
| ; CHECK: [[LOOP_END1]]: |
| ; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 4, %[[VECTOR_EARLY_EXIT_2]] ], [ 3, %[[VECTOR_EARLY_EXIT_3]] ], [ 2, %[[VECTOR_EARLY_EXIT_0]] ], [ 1, %[[VECTOR_EARLY_EXIT_1]] ], [ 0, %[[BRANCH2_A]] ] |
| ; CHECK-NEXT: ret i64 [[RETVAL]] |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ] |
| %gep.A = getelementptr inbounds i8, ptr @A, i64 %iv |
| %l.A = load i8, ptr %gep.A, align 1 |
| %cond1 = icmp slt i8 %l.A, 0 |
| br i1 %cond1, label %branch1.a, label %branch1.b |
| |
| branch1.a: |
| %gep.B = getelementptr inbounds i8, ptr @B, i64 %iv |
| %l.B = load i8, ptr %gep.B, align 1 |
| %cmp1a = icmp eq i8 %l.A, %l.B |
| br i1 %cmp1a, label %loop.end, label %branch2 |
| |
| branch1.b: |
| %gep.C = getelementptr inbounds i8, ptr @C, i64 %iv |
| %l.C = load i8, ptr %gep.C, align 1 |
| %cmp1b = icmp eq i8 %l.A, %l.C |
| br i1 %cmp1b, label %loop.end, label %branch2 |
| |
| branch2: |
| %gep.D = getelementptr inbounds i8, ptr @D, i64 %iv |
| %l.D = load i8, ptr %gep.D, align 1 |
| %cond2 = icmp slt i8 %l.D, 0 |
| br i1 %cond2, label %branch2.a, label %branch2.b |
| |
| branch2.a: |
| %cmp2a = icmp eq i8 %l.A, %l.D |
| br i1 %cmp2a, label %loop.end, label %loop.latch |
| |
| branch2.b: |
| %cmp2b = icmp ne i8 %l.A, %l.D |
| br i1 %cmp2b, label %loop.end, label %loop.latch |
| |
| loop.latch: |
| %iv.next = add i64 %iv, 1 |
| %exitcond = icmp ne i64 %iv.next, 64 |
| br i1 %exitcond, label %loop.header, label %loop.end |
| |
| loop.end: |
| %retval = phi i64 [ 1, %branch1.a ], [ 2, %branch1.b ], [ 3, %branch2.a ], [ 4, %branch2.b ], [ 0, %loop.latch ] |
| ret i64 %retval |
| } |
| |
| ; Diamond where neither branch exits directly. Both sides of the diamond |
| ; merge to a block that then has an exit with a phi-dependent live-out value. |
| define i64 @diamond_merge_then_exit_with_phi_liveout() { |
| ; CHECK-LABEL: define i64 @diamond_merge_then_exit_with_phi_liveout() { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[INDEX]] |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 |
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer |
| ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[INDEX]] |
| ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[INDEX]] |
| ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 |
| ; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[WIDE_LOAD2]], <4 x i8> [[WIDE_LOAD1]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[PREDPHI]] |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 |
| ; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) |
| ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 |
| ; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]] |
| ; CHECK: [[VECTOR_BODY_INTERIM]]: |
| ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: br label %[[LOOP_END:.*]] |
| ; CHECK: [[VECTOR_EARLY_EXIT]]: |
| ; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[WIDE_LOAD1]] to <4 x i64> |
| ; CHECK-NEXT: [[TMP9:%.*]] = zext <4 x i8> [[WIDE_LOAD2]] to <4 x i64> |
| ; CHECK-NEXT: [[PREDPHI3:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[TMP9]], <4 x i64> [[TMP8]] |
| ; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false) |
| ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[PREDPHI3]], i64 [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: br label %[[LOOP_END]] |
| ; CHECK: [[LOOP_END]]: |
| ; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP10]], %[[VECTOR_EARLY_EXIT]] ], [ 0, %[[MIDDLE_BLOCK]] ] |
| ; CHECK-NEXT: ret i64 [[RETVAL]] |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ] |
| %gep.A = getelementptr inbounds i8, ptr @A, i64 %iv |
| %l.A = load i8, ptr %gep.A, align 1 |
| %cond = icmp slt i8 %l.A, 0 |
| br i1 %cond, label %left, label %right |
| |
| left: |
| %gep.B = getelementptr inbounds i8, ptr @B, i64 %iv |
| %l.B = load i8, ptr %gep.B, align 1 |
| %val.left = zext i8 %l.B to i64 |
| br label %merge |
| |
| right: |
| %gep.C = getelementptr inbounds i8, ptr @C, i64 %iv |
| %l.C = load i8, ptr %gep.C, align 1 |
| %val.right = zext i8 %l.C to i64 |
| br label %merge |
| |
| merge: |
| %val = phi i64 [ %val.left, %left ], [ %val.right, %right ] |
| %ld.for.cmp = phi i8 [ %l.B, %left ], [ %l.C, %right ] |
| %cmp = icmp eq i8 %l.A, %ld.for.cmp |
| br i1 %cmp, label %loop.end, label %loop.latch |
| |
| loop.latch: |
| %iv.next = add i64 %iv, 1 |
| %exitcond = icmp ne i64 %iv.next, 64 |
| br i1 %exitcond, label %loop.header, label %loop.end |
| |
| loop.end: |
| %retval = phi i64 [ %val, %merge ], [ 0, %loop.latch ] |
| ret i64 %retval |
| } |
| |
| ; Diamond where both exit conditions compare l.A against l.B and l.C |
| ; respectively. If l.B == l.C at runtime, both conditions could be true |
| ; for the same lane, but the masking with cond/NOT cond prevents both |
| ; from firing simultaneously. Tests that the predication correctly |
| ; disambiguates the exits. |
| define i64 @diamond_exits_overlapping_conditions() { |
| ; CHECK-LABEL: define i64 @diamond_exits_overlapping_conditions() { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] |
| ; CHECK: [[LOOP_HEADER]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[BLOCK_B:.*]] ] |
| ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[GEP_A]], align 1 |
| ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1 |
| ; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_C]], align 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer |
| ; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) |
| ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 |
| ; CHECK-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP6]], <4 x i1> splat (i1 true), <4 x i1> [[TMP8]] |
| ; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP9]] |
| ; CHECK-NEXT: [[COND:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]]) |
| ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 |
| ; CHECK-NEXT: br i1 [[COND]], label %[[BLOCK_A:.*]], label %[[BLOCK_B]] |
| ; CHECK: [[BLOCK_B]]: |
| ; CHECK-NEXT: br i1 [[TMP12]], label %[[LOOP_LATCH:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] |
| ; CHECK: [[LOOP_LATCH]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1:.*]] |
| ; CHECK: [[BLOCK_A]]: |
| ; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP9]], i1 false) |
| ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP6]], i64 [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: br i1 [[TMP13]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[LOOP_END:.*]] |
| ; CHECK: [[LOOP_END]]: |
| ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: br label %[[LOOP_END1]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_0]]: |
| ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[IV]], [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: br label %[[LOOP_END1]] |
| ; CHECK: [[LOOP_END1]]: |
| ; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP15]], %[[VECTOR_EARLY_EXIT_0]] ], [ [[TMP14]], %[[LOOP_END]] ], [ 0, %[[LOOP_LATCH]] ] |
| ; CHECK-NEXT: ret i64 [[RETVAL]] |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ] |
| %gep.A = getelementptr inbounds i8, ptr @A, i64 %iv |
| %l.A = load i8, ptr %gep.A, align 1 |
| %gep.B = getelementptr inbounds i8, ptr @B, i64 %iv |
| %l.B = load i8, ptr %gep.B, align 1 |
| %gep.C = getelementptr inbounds i8, ptr @C, i64 %iv |
| %l.C = load i8, ptr %gep.C, align 1 |
| %cond = icmp slt i8 %l.A, 0 |
| br i1 %cond, label %block.a, label %block.b |
| |
| block.a: |
| %cmp.a = icmp eq i8 %l.A, %l.B |
| br i1 %cmp.a, label %loop.end, label %loop.latch |
| |
| block.b: |
| %cmp.b = icmp eq i8 %l.A, %l.C |
| br i1 %cmp.b, label %loop.end, label %loop.latch |
| |
| loop.latch: |
| %iv.next = add i64 %iv, 1 |
| %exitcond = icmp ne i64 %iv.next, 64 |
| br i1 %exitcond, label %loop.header, label %loop.end |
| |
| loop.end: |
| %retval = phi i64 [ %iv, %block.a ], [ %iv, %block.b ], [ 0, %loop.latch ] |
| ret i64 %retval |
| } |
| |
| ; Block C is reachable from both an exit-fallthrough path and a direct branch. |
| ; block.a has an exit; if it doesn't exit, it falls through to block.c. |
| ; The header's false branch goes directly to block.c. |
| ; block.c then has its own exit. |
| define i64 @exit_from_merge_of_exit_fallthrough_and_bypass() { |
| ; CHECK-LABEL: define i64 @exit_from_merge_of_exit_fallthrough_and_bypass() { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] |
| ; CHECK: [[LOOP_HEADER]]: |
| ; CHECK-NEXT: br label %[[BLOCK_A:.*]] |
| ; CHECK: [[BLOCK_A]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 |
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer |
| ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[GEP_C:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]] |
| ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[GEP_C]], align 1 |
| ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 |
| ; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> splat (i1 true), <4 x i1> [[TMP6]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = freeze <4 x i1> [[TMP7]] |
| ; CHECK-NEXT: [[CMP_C:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP8]]) |
| ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 |
| ; CHECK-NEXT: br i1 [[CMP_C]], label %[[LOOP_END:.*]], label %[[LOOP_LATCH]] |
| ; CHECK: [[LOOP_LATCH]]: |
| ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[BLOCK_A]], !llvm.loop [[LOOP9:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1:.*]] |
| ; CHECK: [[LOOP_END]]: |
| ; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP7]], i1 false) |
| ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i64 [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: br i1 [[TMP11]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_1]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_0]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1]] |
| ; CHECK: [[LOOP_END1]]: |
| ; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 2, %[[VECTOR_EARLY_EXIT_1]] ], [ 1, %[[VECTOR_EARLY_EXIT_0]] ], [ 0, %[[MIDDLE_BLOCK]] ] |
| ; CHECK-NEXT: ret i64 [[RETVAL]] |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ] |
| %gep.A = getelementptr inbounds i8, ptr @A, i64 %iv |
| %l.A = load i8, ptr %gep.A, align 1 |
| %cond = icmp slt i8 %l.A, 0 |
| br i1 %cond, label %block.a, label %block.c |
| |
| block.a: |
| %gep.B = getelementptr inbounds i8, ptr @B, i64 %iv |
| %l.B = load i8, ptr %gep.B, align 1 |
| %cmp.a = icmp eq i8 %l.A, %l.B |
| br i1 %cmp.a, label %loop.end, label %block.c |
| |
| block.c: |
| %gep.C = getelementptr inbounds i8, ptr @C, i64 %iv |
| %l.C = load i8, ptr %gep.C, align 1 |
| %cmp.c = icmp eq i8 %l.A, %l.C |
| br i1 %cmp.c, label %loop.end, label %loop.latch |
| |
| loop.latch: |
| %iv.next = add i64 %iv, 1 |
| %exitcond = icmp ne i64 %iv.next, 64 |
| br i1 %exitcond, label %loop.header, label %loop.end |
| |
| loop.end: |
| %retval = phi i64 [ 1, %block.a ], [ 2, %block.c ], [ 0, %loop.latch ] |
| ret i64 %retval |
| } |
| |
| ; When the else branch is speculatively executed for iv < 2, `sub nuw` wraps |
| ; producing poison. This poison condition is processed first in RPO. |
| ; Test for https://github.com/llvm/llvm-project/issues/187061. |
| define i32 @diamond_exit_poison_from_speculated_branch() { |
| ; CHECK-LABEL: define i32 @diamond_exit_poison_from_speculated_branch() { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i1> splat (i1 true) |
| ; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP0]]) |
| ; CHECK-NEXT: br i1 [[TMP1]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[MIDDLE_BLOCK:.*]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: br label %[[LOOP_END:.*]] |
| ; CHECK: [[LOOP_END]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1:.*]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]: |
| ; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> splat (i1 true), i1 false) |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> zeroinitializer, i64 [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_1]]: |
| ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> <i32 10, i32 11, i32 1, i32 2>, i64 [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: br label %[[LOOP_END1]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_0]]: |
| ; CHECK-NEXT: br label %[[UNREACHABLE_EXIT:.*]] |
| ; CHECK: [[UNREACHABLE_EXIT]]: |
| ; CHECK-NEXT: call void @llvm.trap() |
| ; CHECK-NEXT: unreachable |
| ; CHECK: [[LOOP_END1]]: |
| ; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ [[TMP3]], %[[VECTOR_EARLY_EXIT_1]] ], [ -1, %[[LOOP_END]] ] |
| ; CHECK-NEXT: ret i32 [[RETVAL]] |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] |
| %cmp = icmp ult i32 %iv, 2 |
| br i1 %cmp, label %then, label %else |
| |
| then: |
| %lo.val = add i32 %iv, 10 |
| br label %loop.exiting |
| |
| else: |
| %sub = sub nuw i32 %iv, 2 |
| %shl = shl nuw i32 1, %sub |
| %trap.cond = icmp eq i32 %shl, 999 |
| br i1 %trap.cond, label %unreachable.exit, label %loop.exiting |
| |
| unreachable.exit: |
| call void @llvm.trap() |
| unreachable |
| |
| loop.exiting: |
| %val = phi i32 [ %lo.val, %then ], [ %shl, %else ] |
| %found.cond = icmp ult i32 %val, 12 |
| br i1 %found.cond, label %loop.end, label %loop.latch |
| |
| loop.latch: |
| %iv.next = add nuw nsw i32 %iv, 1 |
| %done = icmp eq i32 %iv.next, 4 |
| br i1 %done, label %loop.end, label %loop.header |
| |
| loop.end: |
| %retval = phi i32 [ %val, %loop.exiting ], [ -1, %loop.latch ] |
| ret i32 %retval |
| } |
| |
| ; Same as above but the poison exit condition (trap.cond from the speculated |
| ; else branch) comes second in RPO. The first processed exit is from |
| ; the then branch (RPO-before else). |
| define i32 @diamond_exit_poison_cond_second() { |
| ; CHECK-LABEL: define i32 @diamond_exit_poison_cond_second() { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i1> <i1 true, i1 false, i1 false, i1 false> |
| ; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP0]]) |
| ; CHECK-NEXT: br i1 [[TMP1]], label %[[VECTOR_EARLY_EXIT_CHECK:.*]], label %[[MIDDLE_BLOCK:.*]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: br label %[[LOOP_END:.*]] |
| ; CHECK: [[LOOP_END]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1:.*]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_CHECK]]: |
| ; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> <i1 true, i1 false, i1 false, i1 false>, i1 false) |
| ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> zeroinitializer, i64 [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT_0:.*]], label %[[VECTOR_EARLY_EXIT_1:.*]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_1]]: |
| ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> <i32 10, i32 11, i32 12, i32 13>, i64 [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: br label %[[LOOP_END1]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_0]]: |
| ; CHECK-NEXT: br label %[[UNREACHABLE_EXIT:.*]] |
| ; CHECK: [[UNREACHABLE_EXIT]]: |
| ; CHECK-NEXT: call void @llvm.trap() |
| ; CHECK-NEXT: unreachable |
| ; CHECK: [[LOOP_END1]]: |
| ; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ [[TMP3]], %[[VECTOR_EARLY_EXIT_1]] ], [ -1, %[[LOOP_END]] ] |
| ; CHECK-NEXT: ret i32 [[RETVAL]] |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] |
| %cmp = icmp ult i32 %iv, 2 |
| br i1 %cmp, label %then, label %else |
| |
| then: |
| %val = add i32 %iv, 10 |
| %found.cond = icmp ult i32 %val, 11 |
| br i1 %found.cond, label %loop.end, label %loop.latch |
| |
| else: |
| ; sub nuw produces poison when speculatively executed for iv < 2. |
| %sub = sub nuw i32 %iv, 2 |
| %shl = shl nuw i32 1, %sub |
| %trap.cond = icmp eq i32 %shl, 999 |
| br i1 %trap.cond, label %unreachable.exit, label %loop.latch |
| |
| unreachable.exit: |
| call void @llvm.trap() |
| unreachable |
| |
| loop.latch: |
| %iv.next = add nuw nsw i32 %iv, 1 |
| %done = icmp eq i32 %iv.next, 4 |
| br i1 %done, label %loop.end, label %loop.header |
| |
| loop.end: |
| %retval = phi i32 [ %val, %then ], [ -1, %loop.latch ] |
| ret i32 %retval |
| } |
| |