blob: 6fb18c9315f27dad9290297415c09b83f28efe5b [file]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
; RUN: opt -p loop-vectorize -force-vector-width=4 -S %s | FileCheck %s
; Test loads that are only used as live-outs in early exit loops.
; Test load in header block only used as live-out (dereferenceable).
define i8 @header_load_live_out_deref(ptr dereferenceable(1024) %A, ptr dereferenceable(1024) %B) {
; CHECK-LABEL: define i8 @header_load_live_out_deref(
; CHECK-SAME: ptr dereferenceable(1024) [[A:%.*]], ptr dereferenceable(1024) [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[OFFSET_IDX]], 4
; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP2]]
; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]])
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
; CHECK-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]]
; CHECK: [[VECTOR_BODY_INTERIM]]:
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[LOOP_END:.*]]
; CHECK: [[VECTOR_EARLY_EXIT]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false)
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i8> [[WIDE_LOAD]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 64, %[[LOOP_END]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_A]], align 1
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]]
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP_B]], align 1
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_LATCH]], label %[[LOOP_END1]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 67
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[LOOP_END1]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[LOOP_END1]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i8 [ [[LD1]], %[[LOOP_HEADER]] ], [ 0, %[[LOOP_LATCH]] ], [ [[TMP6]], %[[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i8 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ %iv.next, %loop.latch ], [ 0, %entry ]
%gep.A = getelementptr inbounds i8, ptr %A, i64 %iv
%ld1 = load i8, ptr %gep.A, align 1
%gep.B = getelementptr inbounds i8, ptr %B, i64 %iv
%ld2 = load i8, ptr %gep.B, align 1
%cmp3 = icmp eq i8 %ld1, %ld2
br i1 %cmp3, label %loop.latch, label %loop.end
loop.latch:
%iv.next = add i64 %iv, 1
%ec = icmp ne i64 %iv.next, 67
br i1 %ec, label %loop.header, label %loop.end
loop.end:
%retval = phi i8 [ %ld1, %loop.header ], [ 0, %loop.latch ]
ret i8 %retval
}
; Test load in latch block only used as live-out (dereferenceable).
define i8 @latch_load_live_out_deref(ptr dereferenceable(1024) %A, ptr dereferenceable(1024) %B) {
; CHECK-LABEL: define i8 @latch_load_live_out_deref(
; CHECK-SAME: ptr dereferenceable(1024) [[A:%.*]], ptr dereferenceable(1024) [[B:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT4:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
; CHECK-NEXT: [[INDEX_NEXT4]] = add nuw i64 [[OFFSET_IDX]], 4
; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP2]]
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT4]], 64
; CHECK-NEXT: br i1 [[TMP6]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[VECTOR_BODY_INTERIM]]
; CHECK: [[VECTOR_BODY_INTERIM]]:
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br label %[[LOOP_END:.*]]
; CHECK: [[VECTOR_EARLY_EXIT]]:
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false)
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i8> [[WIDE_LOAD]], i64 [[FIRST_ACTIVE_LANE]]
; CHECK-NEXT: br label %[[LOOP_END1:.*]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 64, %[[LOOP_END]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP_A]], align 1
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[IV]]
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[GEP_B]], align 1
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_LATCH]], label %[[LOOP_END1]]
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[GEP_A_NEXT:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV_NEXT]]
; CHECK-NEXT: [[LD3:%.*]] = load i8, ptr [[GEP_A_NEXT]], align 1
; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 67
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[LOOP_END1]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[LOOP_END1]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i8 [ [[LD1]], %[[LOOP_HEADER]] ], [ [[LD3]], %[[LOOP_LATCH]] ], [ [[TMP9]], %[[VECTOR_EARLY_EXIT]] ]
; CHECK-NEXT: ret i8 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
%gep.A = getelementptr inbounds i8, ptr %A, i64 %iv
%ld1 = load i8, ptr %gep.A, align 1
%gep.B = getelementptr inbounds i8, ptr %B, i64 %iv
%ld2 = load i8, ptr %gep.B, align 1
%cmp3 = icmp eq i8 %ld1, %ld2
br i1 %cmp3, label %loop.latch, label %loop.end
loop.latch:
%iv.next = add i64 %iv, 1
%gep.A.next = getelementptr inbounds i8, ptr %A, i64 %iv.next
%ld3 = load i8, ptr %gep.A.next, align 1
%ec = icmp ne i64 %iv.next, 67
br i1 %ec, label %loop.header, label %loop.end
loop.end:
%retval = phi i8 [ %ld1, %loop.header ], [ %ld3, %loop.latch ]
ret i8 %retval
}
; Test load in header block only used as live-out (unknown ptr, needs speculative load).
define i8 @header_load_live_out_unknown(ptr %A, ptr %B) {
; CHECK-LABEL: define i8 @header_load_live_out_unknown(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
; CHECK-NEXT: [[SCALAR_PH:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
; CHECK: [[LOOP_INC]]:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i8 [ [[LD1]], %[[LOOP]] ], [ 0, %[[LOOP_INC]] ]
; CHECK-NEXT: ret i8 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
%gep.A = getelementptr inbounds i8, ptr %A, i64 %iv
%ld1 = load i8, ptr %gep.A, align 1
%gep.B = getelementptr inbounds i8, ptr %B, i64 %iv
%ld2 = load i8, ptr %gep.B, align 1
%cmp3 = icmp eq i8 %ld1, %ld2
br i1 %cmp3, label %loop.latch, label %loop.end
loop.latch:
%iv.next = add i64 %iv, 1
%ec = icmp ne i64 %iv.next, 67
br i1 %ec, label %loop.header, label %loop.end
loop.end:
%retval = phi i8 [ %ld1, %loop.header ], [ 0, %loop.latch ]
ret i8 %retval
}
; Test load in latch block only used as live-out (unknown ptr, needs speculative load).
define i8 @latch_load_live_out_unknown(ptr %A, ptr %B) {
; CHECK-LABEL: define i8 @latch_load_live_out_unknown(
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) {
; CHECK-NEXT: [[SCALAR_PH:.*]]:
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[LOOP_INC:.*]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]]
; CHECK-NEXT: br i1 [[CMP3]], label %[[LOOP_INC]], label %[[LOOP_END:.*]]
; CHECK: [[LOOP_INC]]:
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX_NEXT]]
; CHECK-NEXT: [[LD3:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[LOOP]], label %[[LOOP_END]]
; CHECK: [[LOOP_END]]:
; CHECK-NEXT: [[RETVAL:%.*]] = phi i8 [ [[LD1]], %[[LOOP]] ], [ [[LD3]], %[[LOOP_INC]] ]
; CHECK-NEXT: ret i8 [[RETVAL]]
;
entry:
br label %loop.header
loop.header:
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
%gep.A = getelementptr inbounds i8, ptr %A, i64 %iv
%ld1 = load i8, ptr %gep.A, align 1
%gep.B = getelementptr inbounds i8, ptr %B, i64 %iv
%ld2 = load i8, ptr %gep.B, align 1
%cmp3 = icmp eq i8 %ld1, %ld2
br i1 %cmp3, label %loop.latch, label %loop.end
loop.latch:
%iv.next = add i64 %iv, 1
%gep.A.next = getelementptr inbounds i8, ptr %A, i64 %iv.next
%ld3 = load i8, ptr %gep.A.next, align 1
%ec = icmp ne i64 %iv.next, 100
br i1 %ec, label %loop.header, label %loop.end
loop.end:
%retval = phi i8 [ %ld1, %loop.header ], [ %ld3, %loop.latch ]
ret i8 %retval
}