| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 |
| ; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck %s |
| ; |
| @A = global [64 x i8] zeroinitializer |
| @B = global [64 x i8] zeroinitializer |
| @C = global [64 x i8] zeroinitializer |
| @D = global [64 x i8] zeroinitializer |
| |
| define i64 @single_exit_in_conditional_block() { |
| ; CHECK-LABEL: define i64 @single_exit_in_conditional_block() { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] |
| ; CHECK: [[LOOP_HEADER]]: |
| ; CHECK-NEXT: br label %[[BLOCK_A:.*]] |
| ; CHECK: [[BLOCK_A]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 |
| ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 |
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], zeroinitializer |
| ; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD2]], zeroinitializer |
| ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]] |
| ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[GEP_B]], i64 4 |
| ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[GEP_B]], align 1 |
| ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1 |
| ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]] |
| ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD2]], [[WIDE_LOAD3]] |
| ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP13]], <4 x i1> [[TMP6]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8 |
| ; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]] |
| ; CHECK-NEXT: [[TMP11:%.*]] = freeze <4 x i1> [[TMP14]] |
| ; CHECK-NEXT: [[TMP12:%.*]] = or <4 x i1> [[TMP5]], [[TMP11]] |
| ; CHECK-NEXT: [[CMP:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP12]]) |
| ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 |
| ; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_END:.*]], label %[[LOOP_LATCH]] |
| ; CHECK: [[LOOP_LATCH]]: |
| ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[BLOCK_A]], !llvm.loop [[LOOP0:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1:.*]] |
| ; CHECK: [[LOOP_END]]: |
| ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP14]], i1 false) |
| ; CHECK-NEXT: [[TMP16:%.*]] = add i64 4, [[TMP15]] |
| ; CHECK-NEXT: [[TMP17:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP4]], i1 false) |
| ; CHECK-NEXT: [[TMP18:%.*]] = add i64 0, [[TMP17]] |
| ; CHECK-NEXT: [[TMP19:%.*]] = icmp ne i64 [[TMP17]], 4 |
| ; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP19]], i64 [[TMP18]], i64 [[TMP16]] |
| ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[IV]], [[TMP8]] |
| ; CHECK-NEXT: br label %[[LOOP_END1]] |
| ; CHECK: [[LOOP_END1]]: |
| ; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP9]], %[[LOOP_END]] ], [ -1, %[[MIDDLE_BLOCK]] ] |
| ; CHECK-NEXT: ret i64 [[RETVAL]] |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] |
| %gep.A = getelementptr inbounds i8, ptr @A, i64 %iv |
| %l.A = load i8, ptr %gep.A, align 1 |
| %branch.cond = icmp slt i8 %l.A, 0 |
| br i1 %branch.cond, label %block.a, label %loop.latch |
| |
| block.a: |
| %gep.B = getelementptr inbounds i8, ptr @B, i64 %iv |
| %l.B = load i8, ptr %gep.B, align 1 |
| %cmp = icmp eq i8 %l.A, %l.B |
| br i1 %cmp, label %loop.end, label %loop.latch |
| |
| loop.latch: |
| %iv.next = add i64 %iv, 1 |
| %ec = icmp ne i64 %iv.next, 64 |
| br i1 %ec, label %loop.header, label %loop.end |
| |
| loop.end: |
| %retval = phi i64 [ %iv, %block.a ], [ -1, %loop.latch ] |
| ret i64 %retval |
| } |
| |
| define i64 @diamond_with_join_then_exit() { |
| ; CHECK-LABEL: define i64 @diamond_with_join_then_exit() { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4 |
| ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1 |
| ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 |
| ; CHECK-NEXT: [[TMP14:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD4]], zeroinitializer |
| ; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD1]], zeroinitializer |
| ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 4 |
| ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP2]], align 1 |
| ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP5]], align 1 |
| ; CHECK-NEXT: [[TMP18:%.*]] = zext <4 x i8> [[WIDE_LOAD3]] to <4 x i64> |
| ; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i8> [[WIDE_LOAD6]] to <4 x i64> |
| ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[INDEX]] |
| ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 4 |
| ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP4]], align 1 |
| ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP10]], align 1 |
| ; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i8> [[WIDE_LOAD5]] to <4 x i64> |
| ; CHECK-NEXT: [[TMP12:%.*]] = zext <4 x i8> [[WIDE_LOAD7]] to <4 x i64> |
| ; CHECK-NEXT: [[PREDPHI6:%.*]] = select <4 x i1> [[TMP14]], <4 x i64> [[TMP11]], <4 x i64> [[TMP18]] |
| ; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP3]], <4 x i64> [[TMP12]], <4 x i64> [[TMP7]] |
| ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <4 x i64> [[PREDPHI6]], splat (i64 42) |
| ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq <4 x i64> [[PREDPHI7]], splat (i64 42) |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 |
| ; CHECK-NEXT: [[TMP15:%.*]] = freeze <4 x i1> [[TMP13]] |
| ; CHECK-NEXT: [[TMP22:%.*]] = freeze <4 x i1> [[TMP17]] |
| ; CHECK-NEXT: [[TMP16:%.*]] = or <4 x i1> [[TMP15]], [[TMP22]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP16]]) |
| ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 |
| ; CHECK-NEXT: br i1 [[TMP8]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]] |
| ; CHECK: [[VECTOR_BODY_INTERIM]]: |
| ; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: br label %[[LOOP_END:.*]] |
| ; CHECK: [[VECTOR_EARLY_EXIT]]: |
| ; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP17]], i1 false) |
| ; CHECK-NEXT: [[TMP19:%.*]] = add i64 4, [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: [[FIRST_ACTIVE_LANE7:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP13]], i1 false) |
| ; CHECK-NEXT: [[TMP20:%.*]] = add i64 0, [[FIRST_ACTIVE_LANE7]] |
| ; CHECK-NEXT: [[TMP21:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE7]], 4 |
| ; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP21]], i64 [[TMP20]], i64 [[TMP19]] |
| ; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i64> [[PREDPHI6]], i64 [[TMP24]] |
| ; CHECK-NEXT: [[TMP28:%.*]] = sub i64 [[TMP24]], 4 |
| ; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[PREDPHI7]], i64 [[TMP28]] |
| ; CHECK-NEXT: [[TMP26:%.*]] = icmp uge i64 [[TMP24]], 4 |
| ; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i64 [[TMP29]], i64 [[TMP25]] |
| ; CHECK-NEXT: br label %[[LOOP_END]] |
| ; CHECK: [[LOOP_END]]: |
| ; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[TMP27]], %[[VECTOR_EARLY_EXIT]] ], [ 0, %[[MIDDLE_BLOCK]] ] |
| ; CHECK-NEXT: ret i64 [[RETVAL]] |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ] |
| %gep.A = getelementptr inbounds i8, ptr @A, i64 %iv |
| %l.A = load i8, ptr %gep.A, align 1 |
| %branch.cond = icmp slt i8 %l.A, 0 |
| br i1 %branch.cond, label %block.a, label %block.b |
| |
| block.a: |
| %gep.B = getelementptr inbounds i8, ptr @B, i64 %iv |
| %l.B = load i8, ptr %gep.B, align 1 |
| %val.a = zext i8 %l.B to i64 |
| br label %join |
| |
| block.b: |
| %gep.C = getelementptr inbounds i8, ptr @C, i64 %iv |
| %l.C = load i8, ptr %gep.C, align 1 |
| %val.b = zext i8 %l.C to i64 |
| br label %join |
| |
| join: |
| %val = phi i64 [ %val.a, %block.a ], [ %val.b, %block.b ] |
| %cmp = icmp eq i64 %val, 42 |
| br i1 %cmp, label %loop.end, label %loop.latch |
| |
| loop.latch: |
| %iv.next = add i64 %iv, 1 |
| %ec = icmp ne i64 %iv.next, 64 |
| br i1 %ec, label %loop.header, label %loop.end |
| |
| loop.end: |
| %retval = phi i64 [ %val, %join ], [ 0, %loop.latch ] |
| ret i64 %retval |
| } |
| |
| define i64 @three_early_exits() { |
| ; CHECK-LABEL: define i64 @three_early_exits() { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] |
| ; CHECK: [[LOOP_HEADER]]: |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP_HEADER]] ], [ [[INDEX_NEXT:%.*]], %[[CHECK_B:.*]] ] |
| ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr @A, i64 [[IV]] |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[GEP_A]], i64 4 |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[GEP_A]], align 1 |
| ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1 |
| ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], splat (i8 -42) |
| ; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD1]], splat (i8 -42) |
| ; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP2]], splat (i1 true) |
| ; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], splat (i1 true) |
| ; CHECK-NEXT: [[TMP6:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD]], splat (i8 42) |
| ; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <4 x i8> [[WIDE_LOAD1]], splat (i8 42) |
| ; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP6]], splat (i1 true) |
| ; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP7]], splat (i1 true) |
| ; CHECK-NEXT: [[TMP10:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP8]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> [[TMP9]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr @D, i64 [[IV]] |
| ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[TMP12]], i64 4 |
| ; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP12]], align 1 |
| ; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i8>, ptr [[TMP13]], align 1 |
| ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]] |
| ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD1]], [[WIDE_LOAD3]] |
| ; CHECK-NEXT: [[TMP16:%.*]] = select <4 x i1> [[TMP14]], <4 x i1> [[TMP10]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[TMP17:%.*]] = select <4 x i1> [[TMP15]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[TMP18:%.*]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP6]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[TMP19:%.*]] = select <4 x i1> [[TMP5]], <4 x i1> [[TMP7]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, ptr @C, i64 [[IV]] |
| ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP20]], i64 4 |
| ; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i8>, ptr [[TMP20]], align 1 |
| ; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <4 x i8>, ptr [[TMP21]], align 1 |
| ; CHECK-NEXT: [[TMP22:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD4]] |
| ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD1]], [[WIDE_LOAD5]] |
| ; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP22]], <4 x i1> [[TMP18]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[TMP25:%.*]] = select <4 x i1> [[TMP23]], <4 x i1> [[TMP19]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr @B, i64 [[IV]] |
| ; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i8, ptr [[TMP26]], i64 4 |
| ; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <4 x i8>, ptr [[TMP26]], align 1 |
| ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP27]], align 1 |
| ; CHECK-NEXT: [[TMP28:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD6]] |
| ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD1]], [[WIDE_LOAD7]] |
| ; CHECK-NEXT: [[TMP30:%.*]] = select <4 x i1> [[TMP28]], <4 x i1> [[TMP2]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[TMP31:%.*]] = select <4 x i1> [[TMP29]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 8 |
| ; CHECK-NEXT: [[TMP32:%.*]] = select <4 x i1> [[TMP16]], <4 x i1> splat (i1 true), <4 x i1> [[TMP24]] |
| ; CHECK-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP17]], <4 x i1> splat (i1 true), <4 x i1> [[TMP25]] |
| ; CHECK-NEXT: [[TMP34:%.*]] = select <4 x i1> [[TMP32]], <4 x i1> splat (i1 true), <4 x i1> [[TMP30]] |
| ; CHECK-NEXT: [[TMP35:%.*]] = select <4 x i1> [[TMP33]], <4 x i1> splat (i1 true), <4 x i1> [[TMP31]] |
| ; CHECK-NEXT: [[TMP36:%.*]] = freeze <4 x i1> [[TMP34]] |
| ; CHECK-NEXT: [[TMP37:%.*]] = freeze <4 x i1> [[TMP35]] |
| ; CHECK-NEXT: [[TMP38:%.*]] = or <4 x i1> [[TMP36]], [[TMP37]] |
| ; CHECK-NEXT: [[COND_A:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP38]]) |
| ; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 |
| ; CHECK-NEXT: br i1 [[COND_A]], label %[[BLOCK_A:.*]], label %[[CHECK_B]] |
| ; CHECK: [[CHECK_B]]: |
| ; CHECK-NEXT: br i1 [[TMP40]], label %[[BLOCK_B:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] |
| ; CHECK: [[BLOCK_B]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1:.*]] |
| ; CHECK: [[BLOCK_A]]: |
| ; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP35]], i1 false) |
| ; CHECK-NEXT: [[TMP41:%.*]] = add i64 4, [[FIRST_ACTIVE_LANE]] |
| ; CHECK-NEXT: [[FIRST_ACTIVE_LANE8:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP34]], i1 false) |
| ; CHECK-NEXT: [[TMP42:%.*]] = add i64 0, [[FIRST_ACTIVE_LANE8]] |
| ; CHECK-NEXT: [[TMP43:%.*]] = icmp ne i64 [[FIRST_ACTIVE_LANE8]], 4 |
| ; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i64 [[TMP42]], i64 [[TMP41]] |
| ; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i1> [[TMP16]], i64 [[TMP44]] |
| ; CHECK-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], 4 |
| ; CHECK-NEXT: [[TMP47:%.*]] = extractelement <4 x i1> [[TMP17]], i64 [[TMP46]] |
| ; CHECK-NEXT: [[TMP48:%.*]] = icmp uge i64 [[TMP44]], 4 |
| ; CHECK-NEXT: [[CMP_B:%.*]] = select i1 [[TMP48]], i1 [[TMP47]], i1 [[TMP45]] |
| ; CHECK-NEXT: br i1 [[CMP_B]], label %[[LOOP_END:.*]], label %[[LOOP_LATCH:.*]] |
| ; CHECK: [[LOOP_LATCH]]: |
| ; CHECK-NEXT: [[TMP50:%.*]] = extractelement <4 x i1> [[TMP24]], i64 [[TMP44]] |
| ; CHECK-NEXT: [[TMP51:%.*]] = sub i64 [[TMP44]], 4 |
| ; CHECK-NEXT: [[TMP52:%.*]] = extractelement <4 x i1> [[TMP25]], i64 [[TMP51]] |
| ; CHECK-NEXT: [[TMP53:%.*]] = icmp uge i64 [[TMP44]], 4 |
| ; CHECK-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i1 [[TMP52]], i1 [[TMP50]] |
| ; CHECK-NEXT: br i1 [[TMP54]], label %[[VECTOR_EARLY_EXIT_1:.*]], label %[[VECTOR_EARLY_EXIT_2:.*]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_2]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1]] |
| ; CHECK: [[VECTOR_EARLY_EXIT_1]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1]] |
| ; CHECK: [[LOOP_END]]: |
| ; CHECK-NEXT: br label %[[LOOP_END1]] |
| ; CHECK: [[LOOP_END1]]: |
| ; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 3, %[[LOOP_END]] ], [ 2, %[[VECTOR_EARLY_EXIT_1]] ], [ 1, %[[VECTOR_EARLY_EXIT_2]] ], [ 0, %[[BLOCK_B]] ] |
| ; CHECK-NEXT: ret i64 [[RETVAL]] |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ] |
| %gep.A = getelementptr inbounds i8, ptr @A, i64 %iv |
| %l.A = load i8, ptr %gep.A, align 1 |
| %cond.a = icmp slt i8 %l.A, -42 |
| br i1 %cond.a, label %block.a, label %check.b |
| |
| check.b: |
| %cond.b = icmp slt i8 %l.A, 42 |
| br i1 %cond.b, label %block.b, label %block.c |
| |
| block.a: |
| %gep.B = getelementptr inbounds i8, ptr @B, i64 %iv |
| %l.B = load i8, ptr %gep.B, align 1 |
| %cmp.a = icmp eq i8 %l.A, %l.B |
| br i1 %cmp.a, label %loop.end, label %loop.latch |
| |
| block.b: |
| %gep.C = getelementptr inbounds i8, ptr @C, i64 %iv |
| %l.C = load i8, ptr %gep.C, align 1 |
| %cmp.b = icmp eq i8 %l.A, %l.C |
| br i1 %cmp.b, label %loop.end, label %loop.latch |
| |
| block.c: |
| %gep.D = getelementptr inbounds i8, ptr @D, i64 %iv |
| %l.D = load i8, ptr %gep.D, align 1 |
| %cmp.c = icmp eq i8 %l.A, %l.D |
| br i1 %cmp.c, label %loop.end, label %loop.latch |
| |
| loop.latch: |
| %iv.next = add i64 %iv, 1 |
| %exitcond = icmp ne i64 %iv.next, 64 |
| br i1 %exitcond, label %loop.header, label %loop.end |
| |
| loop.end: |
| %retval = phi i64 [ 1, %block.a ], [ 2, %block.b ], [ 3, %block.c ], [ 0, %loop.latch ] |
| ret i64 %retval |
| } |
| |
| |