| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 |
| ; RUN: opt -p loop-vectorize -S %s | FileCheck %s |
| ; RUN: opt -p loop-vectorize -vectorizer-maximize-bandwidth -S %s | FileCheck --check-prefix=MAX-BW %s |
| |
| target triple = "arm64-apple-macosx" |
| |
| define i64 @early_exit_with_without_dereferenceable(ptr %p1, ptr %p2) { |
| ; CHECK-LABEL: define i64 @early_exit_with_without_dereferenceable( |
| ; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] |
| ; CHECK: [[LOOP_HEADER]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] |
| ; CHECK-NEXT: [[GEP_P1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]] |
| ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_P1]], align 1 |
| ; CHECK-NEXT: [[GEP_P2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV]] |
| ; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP_P2]], align 1 |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[LD1]], [[LD2]] |
| ; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_LATCH]], label %[[EXIT:.*]] |
| ; CHECK: [[LOOP_LATCH]]: |
| ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 |
| ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 100 |
| ; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP_HEADER]], label %[[EXIT]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ] |
| ; CHECK-NEXT: ret i64 [[IV_LCSSA]] |
| ; |
| ; MAX-BW-LABEL: define i64 @early_exit_with_without_dereferenceable( |
| ; MAX-BW-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) { |
| ; MAX-BW-NEXT: [[ENTRY:.*]]: |
| ; MAX-BW-NEXT: br label %[[LOOP_HEADER:.*]] |
| ; MAX-BW: [[LOOP_HEADER]]: |
| ; MAX-BW-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] |
| ; MAX-BW-NEXT: [[GEP_P1:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[IV]] |
| ; MAX-BW-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_P1]], align 1 |
| ; MAX-BW-NEXT: [[GEP_P2:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[IV]] |
| ; MAX-BW-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP_P2]], align 1 |
| ; MAX-BW-NEXT: [[CMP:%.*]] = icmp eq i8 [[LD1]], [[LD2]] |
| ; MAX-BW-NEXT: br i1 [[CMP]], label %[[LOOP_LATCH]], label %[[EXIT:.*]] |
| ; MAX-BW: [[LOOP_LATCH]]: |
| ; MAX-BW-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 |
| ; MAX-BW-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], 100 |
| ; MAX-BW-NEXT: br i1 [[EXITCOND]], label %[[LOOP_HEADER]], label %[[EXIT]] |
| ; MAX-BW: [[EXIT]]: |
| ; MAX-BW-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ] |
| ; MAX-BW-NEXT: ret i64 [[IV_LCSSA]] |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] |
| %gep.p1 = getelementptr inbounds i8, ptr %p1, i64 %iv |
| %ld1 = load i8, ptr %gep.p1, align 1 |
| %gep.p2 = getelementptr inbounds i8, ptr %p2, i64 %iv |
| %ld2 = load i8, ptr %gep.p2, align 1 |
| %cmp = icmp eq i8 %ld1, %ld2 |
| br i1 %cmp, label %loop.latch, label %exit |
| |
| loop.latch: |
| %iv.next = add i64 %iv, 1 |
| %exitcond = icmp ne i64 %iv.next, 100 |
| br i1 %exitcond, label %loop.header, label %exit |
| |
| exit: |
| ret i64 %iv |
| } |
| |
| define i64 @early_exit_mixed_width_no_dereferencable(ptr %A, ptr %B) { |
| ; CHECK-LABEL: define i64 @early_exit_mixed_width_no_dereferencable( |
| ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*]]: |
| ; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] |
| ; CHECK: [[LOOP_HEADER]]: |
| ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] |
| ; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] |
| ; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_A]], align 1 |
| ; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] |
| ; CHECK-NEXT: [[LD2:%.*]] = load i32, ptr [[GEP_B]], align 4 |
| ; CHECK-NEXT: [[LD2_TRUNC:%.*]] = trunc i32 [[LD2]] to i8 |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[LD1]], [[LD2_TRUNC]] |
| ; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_LATCH]], label %[[EXIT:.*]] |
| ; CHECK: [[LOOP_LATCH]]: |
| ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 |
| ; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 1024 |
| ; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT]] |
| ; CHECK: [[EXIT]]: |
| ; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ] |
| ; CHECK-NEXT: ret i64 [[IV_LCSSA]] |
| ; |
| ; MAX-BW-LABEL: define i64 @early_exit_mixed_width_no_dereferencable( |
| ; MAX-BW-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { |
| ; MAX-BW-NEXT: [[ENTRY:.*]]: |
| ; MAX-BW-NEXT: br label %[[LOOP_HEADER:.*]] |
| ; MAX-BW: [[LOOP_HEADER]]: |
| ; MAX-BW-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] |
| ; MAX-BW-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] |
| ; MAX-BW-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_A]], align 1 |
| ; MAX-BW-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] |
| ; MAX-BW-NEXT: [[LD2:%.*]] = load i32, ptr [[GEP_B]], align 4 |
| ; MAX-BW-NEXT: [[LD2_TRUNC:%.*]] = trunc i32 [[LD2]] to i8 |
| ; MAX-BW-NEXT: [[CMP:%.*]] = icmp eq i8 [[LD1]], [[LD2_TRUNC]] |
| ; MAX-BW-NEXT: br i1 [[CMP]], label %[[LOOP_LATCH]], label %[[EXIT:.*]] |
| ; MAX-BW: [[LOOP_LATCH]]: |
| ; MAX-BW-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 |
| ; MAX-BW-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 1024 |
| ; MAX-BW-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT]] |
| ; MAX-BW: [[EXIT]]: |
| ; MAX-BW-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LOOP_LATCH]] ], [ [[IV]], %[[LOOP_HEADER]] ] |
| ; MAX-BW-NEXT: ret i64 [[IV_LCSSA]] |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] |
| %gep.A = getelementptr inbounds i8, ptr %A, i64 %iv |
| %ld1 = load i8, ptr %gep.A, align 1 |
| %gep.B = getelementptr inbounds i32, ptr %B, i64 %iv |
| %ld2 = load i32, ptr %gep.B, align 4 |
| %ld2.trunc = trunc i32 %ld2 to i8 |
| %cmp = icmp eq i8 %ld1, %ld2.trunc |
| br i1 %cmp, label %loop.latch, label %exit |
| |
| loop.latch: |
| %iv.next = add i64 %iv, 1 |
| %ec = icmp ne i64 %iv.next, 1024 |
| br i1 %ec, label %loop.header, label %exit |
| |
| exit: |
| ret i64 %iv |
| } |