blob: d6bb9bda4bba808fea936f226d6aea57ca9c273e [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -scoped-noalias-aa -slp-vectorizer -mtriple=arm64-apple-darwin -enable-new-pm=false -S %s | FileCheck %s
; RUN: opt -aa-pipeline='basic-aa,scoped-noalias-aa' -passes=slp-vectorizer -mtriple=arm64-apple-darwin -S %s | FileCheck %s
define void @needs_versioning_not_profitable(i32* %dst, i32* %src) {
; CHECK-LABEL: @needs_versioning_not_profitable(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SRC_0:%.*]] = load i32, i32* [[SRC:%.*]], align 4
; CHECK-NEXT: [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
; CHECK-NEXT: store i32 [[R_0]], i32* [[DST:%.*]], align 4
; CHECK-NEXT: [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 1
; CHECK-NEXT: [[SRC_1:%.*]] = load i32, i32* [[SRC_GEP_1]], align 4
; CHECK-NEXT: [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 1
; CHECK-NEXT: store i32 [[R_1]], i32* [[DST_GEP_1]], align 4
; CHECK-NEXT: ret void
;
entry:
%src.0 = load i32, i32* %src, align 4
%r.0 = ashr i32 %src.0, 16
store i32 %r.0, i32* %dst, align 4
%src.gep.1 = getelementptr inbounds i32, i32* %src, i64 1
%src.1 = load i32, i32* %src.gep.1, align 4
%r.1 = ashr i32 %src.1, 16
%dst.gep.1 = getelementptr inbounds i32, i32* %dst, i64 1
store i32 %r.1, i32* %dst.gep.1, align 4
ret void
}
define void @needs_versioning_profitable(i32* %dst, i32* %src) {
; CHECK-LABEL: @needs_versioning_profitable(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SRC_0:%.*]] = load i32, i32* [[SRC:%.*]], align 4
; CHECK-NEXT: [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
; CHECK-NEXT: store i32 [[R_0]], i32* [[DST:%.*]], align 4
; CHECK-NEXT: [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 1
; CHECK-NEXT: [[SRC_1:%.*]] = load i32, i32* [[SRC_GEP_1]], align 4
; CHECK-NEXT: [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 1
; CHECK-NEXT: store i32 [[R_1]], i32* [[DST_GEP_1]], align 4
; CHECK-NEXT: [[SRC_GEP_2:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 2
; CHECK-NEXT: [[SRC_2:%.*]] = load i32, i32* [[SRC_GEP_2]], align 4
; CHECK-NEXT: [[R_2:%.*]] = ashr i32 [[SRC_2]], 16
; CHECK-NEXT: [[DST_GEP_2:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 2
; CHECK-NEXT: store i32 [[R_2]], i32* [[DST_GEP_2]], align 4
; CHECK-NEXT: [[SRC_GEP_3:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 3
; CHECK-NEXT: [[SRC_3:%.*]] = load i32, i32* [[SRC_GEP_3]], align 4
; CHECK-NEXT: [[R_3:%.*]] = ashr i32 [[SRC_3]], 16
; CHECK-NEXT: [[DST_GEP_3:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3
; CHECK-NEXT: store i32 [[R_3]], i32* [[DST_GEP_3]], align 4
; CHECK-NEXT: ret void
;
entry:
%src.0 = load i32, i32* %src, align 4
%r.0 = ashr i32 %src.0, 16
store i32 %r.0, i32* %dst, align 4
%src.gep.1 = getelementptr inbounds i32, i32* %src, i64 1
%src.1 = load i32, i32* %src.gep.1, align 4
%r.1 = ashr i32 %src.1, 16
%dst.gep.1 = getelementptr inbounds i32, i32* %dst, i64 1
store i32 %r.1, i32* %dst.gep.1, align 4
%src.gep.2 = getelementptr inbounds i32, i32* %src, i64 2
%src.2 = load i32, i32* %src.gep.2, align 4
%r.2 = ashr i32 %src.2, 16
%dst.gep.2 = getelementptr inbounds i32, i32* %dst, i64 2
store i32 %r.2, i32* %dst.gep.2, align 4
%src.gep.3 = getelementptr inbounds i32, i32* %src, i64 3
%src.3 = load i32, i32* %src.gep.3, align 4
%r.3 = ashr i32 %src.3, 16
%dst.gep.3 = getelementptr inbounds i32, i32* %dst, i64 3
store i32 %r.3, i32* %dst.gep.3, align 4
ret void
}
define void @needs_versioning_profitable_2_sources(i32* %dst, i32* %A, i32* %B) {
; CHECK-LABEL: @needs_versioning_profitable_2_sources(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A_0:%.*]] = load i32, i32* [[A:%.*]], align 4
; CHECK-NEXT: [[B_0:%.*]] = load i32, i32* [[B:%.*]], align 4
; CHECK-NEXT: [[R_0:%.*]] = add i32 [[A_0]], [[B_0]]
; CHECK-NEXT: [[MUL_0:%.*]] = mul i32 [[R_0]], 2
; CHECK-NEXT: store i32 [[MUL_0]], i32* [[DST:%.*]], align 4
; CHECK-NEXT: [[A_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1
; CHECK-NEXT: [[A_1:%.*]] = load i32, i32* [[A_GEP_1]], align 4
; CHECK-NEXT: [[B_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 1
; CHECK-NEXT: [[B_1:%.*]] = load i32, i32* [[B_GEP_1]], align 4
; CHECK-NEXT: [[R_1:%.*]] = add i32 [[A_1]], [[B_1]]
; CHECK-NEXT: [[MUL_1:%.*]] = mul i32 [[R_1]], 2
; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 1
; CHECK-NEXT: store i32 [[MUL_1]], i32* [[DST_GEP_1]], align 4
; CHECK-NEXT: [[A_GEP_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
; CHECK-NEXT: [[A_2:%.*]] = load i32, i32* [[A_GEP_2]], align 4
; CHECK-NEXT: [[B_GEP_2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 2
; CHECK-NEXT: [[B_2:%.*]] = load i32, i32* [[B_GEP_2]], align 4
; CHECK-NEXT: [[R_2:%.*]] = add i32 [[A_2]], [[B_2]]
; CHECK-NEXT: [[MUL_2:%.*]] = mul i32 [[R_2]], 2
; CHECK-NEXT: [[DST_GEP_2:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 2
; CHECK-NEXT: store i32 [[MUL_2]], i32* [[DST_GEP_2]], align 4
; CHECK-NEXT: [[A_GEP_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
; CHECK-NEXT: [[A_3:%.*]] = load i32, i32* [[A_GEP_3]], align 4
; CHECK-NEXT: [[B_GEP_3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 3
; CHECK-NEXT: [[B_3:%.*]] = load i32, i32* [[B_GEP_3]], align 4
; CHECK-NEXT: [[R_3:%.*]] = add i32 [[A_3]], [[B_3]]
; CHECK-NEXT: [[MUL_3:%.*]] = mul i32 [[R_3]], 2
; CHECK-NEXT: [[DST_GEP_3:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3
; CHECK-NEXT: store i32 [[MUL_3]], i32* [[DST_GEP_3]], align 4
; CHECK-NEXT: ret void
;
entry:
%A.0 = load i32, i32* %A, align 4
%B.0 = load i32, i32* %B, align 4
%r.0 = add i32 %A.0, %B.0
%mul.0 = mul i32 %r.0, 2
store i32 %mul.0, i32* %dst, align 4
%A.gep.1 = getelementptr inbounds i32, i32* %A, i64 1
%A.1 = load i32, i32* %A.gep.1, align 4
%B.gep.1 = getelementptr inbounds i32, i32* %B, i64 1
%B.1 = load i32, i32* %B.gep.1, align 4
%r.1 = add i32 %A.1, %B.1
%mul.1 = mul i32 %r.1, 2
%dst.gep.1 = getelementptr inbounds i32, i32* %dst, i64 1
store i32 %mul.1, i32* %dst.gep.1, align 4
%A.gep.2 = getelementptr inbounds i32, i32* %A, i64 2
%A.2 = load i32, i32* %A.gep.2, align 4
%B.gep.2 = getelementptr inbounds i32, i32* %B, i64 2
%B.2 = load i32, i32* %B.gep.2, align 4
%r.2 = add i32 %A.2, %B.2
%mul.2 = mul i32 %r.2, 2
%dst.gep.2 = getelementptr inbounds i32, i32* %dst, i64 2
store i32 %mul.2, i32* %dst.gep.2, align 4
%A.gep.3 = getelementptr inbounds i32, i32* %A, i64 3
%A.3 = load i32, i32* %A.gep.3, align 4
%B.gep.3 = getelementptr inbounds i32, i32* %B, i64 3
%B.3 = load i32, i32* %B.gep.3, align 4
%r.3 = add i32 %A.3, %B.3
%mul.3 = mul i32 %r.3, 2
%dst.gep.3 = getelementptr inbounds i32, i32* %dst, i64 3
store i32 %mul.3, i32* %dst.gep.3, align 4
ret void
}
declare void @use(i32)
declare void @bar()
define void @needs_versioning_profitable_split_points(i32* %dst, i32* %src) {
; CHECK-LABEL: @needs_versioning_profitable_split_points(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: [[SRC_0:%.*]] = load i32, i32* [[SRC:%.*]], align 4
; CHECK-NEXT: [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
; CHECK-NEXT: store i32 [[R_0]], i32* [[DST:%.*]], align 4
; CHECK-NEXT: [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 1
; CHECK-NEXT: [[SRC_1:%.*]] = load i32, i32* [[SRC_GEP_1]], align 4
; CHECK-NEXT: [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 1
; CHECK-NEXT: store i32 [[R_1]], i32* [[DST_GEP_1]], align 4
; CHECK-NEXT: [[SRC_GEP_2:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 2
; CHECK-NEXT: [[SRC_2:%.*]] = load i32, i32* [[SRC_GEP_2]], align 4
; CHECK-NEXT: [[R_2:%.*]] = ashr i32 [[SRC_2]], 16
; CHECK-NEXT: [[DST_GEP_2:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 2
; CHECK-NEXT: store i32 [[R_2]], i32* [[DST_GEP_2]], align 4
; CHECK-NEXT: [[SRC_GEP_3:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 3
; CHECK-NEXT: [[SRC_3:%.*]] = load i32, i32* [[SRC_GEP_3]], align 4
; CHECK-NEXT: [[R_3:%.*]] = ashr i32 [[SRC_3]], 16
; CHECK-NEXT: [[DST_GEP_3:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3
; CHECK-NEXT: store i32 [[R_3]], i32* [[DST_GEP_3]], align 4
; CHECK-NEXT: call void @bar()
; CHECK-NEXT: ret void
;
entry:
call void @bar()
call void @bar()
call void @bar()
%src.0 = load i32, i32* %src, align 4
%r.0 = ashr i32 %src.0, 16
store i32 %r.0, i32* %dst, align 4
%src.gep.1 = getelementptr inbounds i32, i32* %src, i64 1
%src.1 = load i32, i32* %src.gep.1, align 4
%r.1 = ashr i32 %src.1, 16
%dst.gep.1 = getelementptr inbounds i32, i32* %dst, i64 1
store i32 %r.1, i32* %dst.gep.1, align 4
%src.gep.2 = getelementptr inbounds i32, i32* %src, i64 2
%src.2 = load i32, i32* %src.gep.2, align 4
%r.2 = ashr i32 %src.2, 16
%dst.gep.2 = getelementptr inbounds i32, i32* %dst, i64 2
store i32 %r.2, i32* %dst.gep.2, align 4
%src.gep.3 = getelementptr inbounds i32, i32* %src, i64 3
%src.3 = load i32, i32* %src.gep.3, align 4
%r.3 = ashr i32 %src.3, 16
%dst.gep.3 = getelementptr inbounds i32, i32* %dst, i64 3
store i32 %r.3, i32* %dst.gep.3, align 4
call void @bar()
ret void
}
define void @needs_versioning_profitable_load_used_outside_region1(i32* %dst, i32* %src, i1 %c) {
; CHECK-LABEL: @needs_versioning_profitable_load_used_outside_region1(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]]
; CHECK: then:
; CHECK-NEXT: [[SRC_0:%.*]] = load i32, i32* [[SRC:%.*]], align 4
; CHECK-NEXT: [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
; CHECK-NEXT: store i32 [[R_0]], i32* [[DST:%.*]], align 4
; CHECK-NEXT: [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 1
; CHECK-NEXT: [[SRC_1:%.*]] = load i32, i32* [[SRC_GEP_1]], align 4
; CHECK-NEXT: [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 1
; CHECK-NEXT: store i32 [[R_1]], i32* [[DST_GEP_1]], align 4
; CHECK-NEXT: [[SRC_GEP_2:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 2
; CHECK-NEXT: [[SRC_2:%.*]] = load i32, i32* [[SRC_GEP_2]], align 4
; CHECK-NEXT: [[R_2:%.*]] = ashr i32 [[SRC_2]], 16
; CHECK-NEXT: [[DST_GEP_2:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 2
; CHECK-NEXT: store i32 [[R_2]], i32* [[DST_GEP_2]], align 4
; CHECK-NEXT: [[SRC_GEP_3:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 3
; CHECK-NEXT: [[SRC_3:%.*]] = load i32, i32* [[SRC_GEP_3]], align 4
; CHECK-NEXT: [[R_3:%.*]] = ashr i32 [[SRC_3]], 16
; CHECK-NEXT: [[DST_GEP_3:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3
; CHECK-NEXT: store i32 [[R_3]], i32* [[DST_GEP_3]], align 4
; CHECK-NEXT: [[SRC_GEP_5:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 5
; CHECK-NEXT: [[L:%.*]] = load i32, i32* [[SRC_GEP_5]], align 4
; CHECK-NEXT: call void @use(i32 [[L]])
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br i1 %c, label %then, label %exit
then:
%src.0 = load i32, i32* %src, align 4
%r.0 = ashr i32 %src.0, 16
store i32 %r.0, i32* %dst, align 4
%src.gep.1 = getelementptr inbounds i32, i32* %src, i64 1
%src.1 = load i32, i32* %src.gep.1, align 4
%r.1 = ashr i32 %src.1, 16
%dst.gep.1 = getelementptr inbounds i32, i32* %dst, i64 1
store i32 %r.1, i32* %dst.gep.1, align 4
%src.gep.2 = getelementptr inbounds i32, i32* %src, i64 2
%src.2 = load i32, i32* %src.gep.2, align 4
%r.2 = ashr i32 %src.2, 16
%dst.gep.2 = getelementptr inbounds i32, i32* %dst, i64 2
store i32 %r.2, i32* %dst.gep.2, align 4
%src.gep.3 = getelementptr inbounds i32, i32* %src, i64 3
%src.3 = load i32, i32* %src.gep.3, align 4
%r.3 = ashr i32 %src.3, 16
%dst.gep.3 = getelementptr inbounds i32, i32* %dst, i64 3
store i32 %r.3, i32* %dst.gep.3, align 4
%src.gep.5 = getelementptr inbounds i32, i32* %src, i64 5
%l = load i32, i32* %src.gep.5
call void @use(i32 %l)
br label %exit
exit:
ret void
}
define void @needs_versioning_profitable_load_used_outside_region2(i32* %dst, i32* %src, i1 %c) {
; CHECK-LABEL: @needs_versioning_profitable_load_used_outside_region2(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[C:%.*]], label [[THEN:%.*]], label [[EXIT:%.*]]
; CHECK: then:
; CHECK-NEXT: [[SRC_0:%.*]] = load i32, i32* [[SRC:%.*]], align 4
; CHECK-NEXT: [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
; CHECK-NEXT: store i32 [[R_0]], i32* [[DST:%.*]], align 4
; CHECK-NEXT: [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 1
; CHECK-NEXT: [[SRC_1:%.*]] = load i32, i32* [[SRC_GEP_1]], align 4
; CHECK-NEXT: [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 1
; CHECK-NEXT: store i32 [[R_1]], i32* [[DST_GEP_1]], align 4
; CHECK-NEXT: [[SRC_GEP_2:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 2
; CHECK-NEXT: [[SRC_2:%.*]] = load i32, i32* [[SRC_GEP_2]], align 4
; CHECK-NEXT: [[SRC_GEP_5:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 5
; CHECK-NEXT: [[L:%.*]] = load i32, i32* [[SRC_GEP_5]], align 4
; CHECK-NEXT: [[R_2:%.*]] = ashr i32 [[SRC_2]], 16
; CHECK-NEXT: [[DST_GEP_2:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 2
; CHECK-NEXT: store i32 [[R_2]], i32* [[DST_GEP_2]], align 4
; CHECK-NEXT: [[SRC_GEP_3:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 3
; CHECK-NEXT: [[SRC_3:%.*]] = load i32, i32* [[SRC_GEP_3]], align 4
; CHECK-NEXT: [[R_3:%.*]] = ashr i32 [[SRC_3]], 16
; CHECK-NEXT: [[DST_GEP_3:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3
; CHECK-NEXT: store i32 [[R_3]], i32* [[DST_GEP_3]], align 4
; CHECK-NEXT: call void @use(i32 [[L]])
; CHECK-NEXT: br label [[EXIT]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
br i1 %c, label %then, label %exit
then:
%src.0 = load i32, i32* %src, align 4
%r.0 = ashr i32 %src.0, 16
store i32 %r.0, i32* %dst, align 4
%src.gep.1 = getelementptr inbounds i32, i32* %src, i64 1
%src.1 = load i32, i32* %src.gep.1, align 4
%r.1 = ashr i32 %src.1, 16
%dst.gep.1 = getelementptr inbounds i32, i32* %dst, i64 1
store i32 %r.1, i32* %dst.gep.1, align 4
%src.gep.2 = getelementptr inbounds i32, i32* %src, i64 2
%src.2 = load i32, i32* %src.gep.2, align 4
%src.gep.5 = getelementptr inbounds i32, i32* %src, i64 5
%l = load i32, i32* %src.gep.5
%r.2 = ashr i32 %src.2, 16
%dst.gep.2 = getelementptr inbounds i32, i32* %dst, i64 2
store i32 %r.2, i32* %dst.gep.2, align 4
%src.gep.3 = getelementptr inbounds i32, i32* %src, i64 3
%src.3 = load i32, i32* %src.gep.3, align 4
%r.3 = ashr i32 %src.3, 16
%dst.gep.3 = getelementptr inbounds i32, i32* %dst, i64 3
store i32 %r.3, i32* %dst.gep.3, align 4
call void @use(i32 %l)
br label %exit
exit:
ret void
}
define void @no_version(i32* nocapture %dst, i32* nocapture readonly %src) {
; CHECK-LABEL: @no_version(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 1
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC]] to <2 x i32>*
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4
; CHECK-NEXT: [[TMP2:%.*]] = ashr <2 x i32> [[TMP1]], <i32 16, i32 16>
; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 1
; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST]] to <2 x i32>*
; CHECK-NEXT: store <2 x i32> [[TMP2]], <2 x i32>* [[TMP3]], align 4
; CHECK-NEXT: ret void
;
entry:
%src.0 = load i32, i32* %src, align 4
%src.gep.1 = getelementptr inbounds i32, i32* %src, i64 1
%src.1 = load i32, i32* %src.gep.1, align 4
%r.0 = ashr i32 %src.0, 16
%r.1 = ashr i32 %src.1, 16
%dst.gep.1 = getelementptr inbounds i32, i32* %dst, i64 1
store i32 %r.0, i32* %dst, align 4
store i32 %r.1, i32* %dst.gep.1, align 4
ret void
}
define void @version_multiple(i32* nocapture %out_block, i32* nocapture readonly %counter) {
; CHECK-LABEL: @version_multiple(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[COUNTER:%.*]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[OUT_BLOCK:%.*]], align 4
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[TMP1]], [[TMP0]]
; CHECK-NEXT: store i32 [[XOR]], i32* [[OUT_BLOCK]], align 4
; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 1
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4
; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 1
; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2_1]], align 4
; CHECK-NEXT: [[XOR_1:%.*]] = xor i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: store i32 [[XOR_1]], i32* [[ARRAYIDX2_1]], align 4
; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 2
; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4
; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 2
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX2_2]], align 4
; CHECK-NEXT: [[XOR_2:%.*]] = xor i32 [[TMP5]], [[TMP4]]
; CHECK-NEXT: store i32 [[XOR_2]], i32* [[ARRAYIDX2_2]], align 4
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 3
; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4
; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 3
; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX2_3]], align 4
; CHECK-NEXT: [[XOR_3:%.*]] = xor i32 [[TMP7]], [[TMP6]]
; CHECK-NEXT: store i32 [[XOR_3]], i32* [[ARRAYIDX2_3]], align 4
; CHECK-NEXT: ret void
;
entry:
%0 = load i32, i32* %counter, align 4
%1 = load i32, i32* %out_block, align 4
%xor = xor i32 %1, %0
store i32 %xor, i32* %out_block, align 4
%arrayidx.1 = getelementptr inbounds i32, i32* %counter, i64 1
%2 = load i32, i32* %arrayidx.1, align 4
%arrayidx2.1 = getelementptr inbounds i32, i32* %out_block, i64 1
%3 = load i32, i32* %arrayidx2.1, align 4
%xor.1 = xor i32 %3, %2
store i32 %xor.1, i32* %arrayidx2.1, align 4
%arrayidx.2 = getelementptr inbounds i32, i32* %counter, i64 2
%4 = load i32, i32* %arrayidx.2, align 4
%arrayidx2.2 = getelementptr inbounds i32, i32* %out_block, i64 2
%5 = load i32, i32* %arrayidx2.2, align 4
%xor.2 = xor i32 %5, %4
store i32 %xor.2, i32* %arrayidx2.2, align 4
%arrayidx.3 = getelementptr inbounds i32, i32* %counter, i64 3
%6 = load i32, i32* %arrayidx.3, align 4
%arrayidx2.3 = getelementptr inbounds i32, i32* %out_block, i64 3
%7 = load i32, i32* %arrayidx2.3, align 4
%xor.3 = xor i32 %7, %6
store i32 %xor.3, i32* %arrayidx2.3, align 4
ret void
}
define i32 @use_outside_version_bb(i32* %dst, i32* %src, i1 %c.1) {
; CHECK-LABEL: @use_outside_version_bb(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SRC_0:%.*]] = load i32, i32* [[SRC:%.*]], align 4
; CHECK-NEXT: [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
; CHECK-NEXT: store i32 [[R_0]], i32* [[DST:%.*]], align 4
; CHECK-NEXT: [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 1
; CHECK-NEXT: [[SRC_1:%.*]] = load i32, i32* [[SRC_GEP_1]], align 4
; CHECK-NEXT: [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 1
; CHECK-NEXT: store i32 [[R_1]], i32* [[DST_GEP_1]], align 4
; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret i32 [[R_0]]
;
entry:
%src.0 = load i32, i32* %src, align 4
%r.0 = ashr i32 %src.0, 16
store i32 %r.0, i32* %dst, align 4
%src.gep.1 = getelementptr inbounds i32, i32* %src, i64 1
%src.1 = load i32, i32* %src.gep.1, align 4
%r.1 = ashr i32 %src.1, 16
%dst.gep.1 = getelementptr inbounds i32, i32* %dst, i64 1
store i32 %r.1, i32* %dst.gep.1, align 4
br label %exit
exit:
ret i32 %r.0
}
define i32 @value_used_in_return(i32* %dst, i32* %src, i32 %x) {
; CHECK-LABEL: @value_used_in_return(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SRC_0:%.*]] = load i32, i32* [[SRC:%.*]], align 4
; CHECK-NEXT: [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
; CHECK-NEXT: store i32 [[R_0]], i32* [[DST:%.*]], align 4
; CHECK-NEXT: [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 1
; CHECK-NEXT: [[SRC_1:%.*]] = load i32, i32* [[SRC_GEP_1]], align 4
; CHECK-NEXT: [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 1
; CHECK-NEXT: store i32 [[R_1]], i32* [[DST_GEP_1]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], 20
; CHECK-NEXT: ret i32 [[ADD]]
;
entry:
%src.0 = load i32, i32* %src, align 4
%r.0 = ashr i32 %src.0, 16
store i32 %r.0, i32* %dst, align 4
%src.gep.1 = getelementptr inbounds i32, i32* %src, i64 1
%src.1 = load i32, i32* %src.gep.1, align 4
%r.1 = ashr i32 %src.1, 16
%dst.gep.1 = getelementptr inbounds i32, i32* %dst, i64 1
store i32 %r.1, i32* %dst.gep.1, align 4
%add = add i32 %x, 20
ret i32 %add
}
define i32 @needs_versioning2_cond_br(i32* %dst, i32* %src, i1 %c.1) {
; CHECK-LABEL: @needs_versioning2_cond_br(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[C_1:%.*]], label [[THEN:%.*]], label [[ELSE:%.*]]
; CHECK: then:
; CHECK-NEXT: [[SRC_0:%.*]] = load i32, i32* [[SRC:%.*]], align 4
; CHECK-NEXT: [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
; CHECK-NEXT: store i32 [[R_0]], i32* [[DST:%.*]], align 4
; CHECK-NEXT: [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 1
; CHECK-NEXT: [[SRC_1:%.*]] = load i32, i32* [[SRC_GEP_1]], align 4
; CHECK-NEXT: [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 1
; CHECK-NEXT: store i32 [[R_1]], i32* [[DST_GEP_1]], align 4
; CHECK-NEXT: ret i32 10
; CHECK: else:
; CHECK-NEXT: ret i32 0
;
entry:
br i1 %c.1, label %then, label %else
then:
%src.0 = load i32, i32* %src, align 4
%r.0 = ashr i32 %src.0, 16
store i32 %r.0, i32* %dst, align 4
%src.gep.1 = getelementptr inbounds i32, i32* %src, i64 1
%src.1 = load i32, i32* %src.gep.1, align 4
%r.1 = ashr i32 %src.1, 16
%dst.gep.1 = getelementptr inbounds i32, i32* %dst, i64 1
store i32 %r.1, i32* %dst.gep.1, align 4
ret i32 10
else:
ret i32 0
}
define void @pointer_defined_in_bb(i32* %dst, i32** %src.p) {
; CHECK-LABEL: @pointer_defined_in_bb(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SRC:%.*]] = load i32*, i32** [[SRC_P:%.*]], align 8
; CHECK-NEXT: [[SRC_0:%.*]] = load i32, i32* [[SRC]], align 4
; CHECK-NEXT: [[R_0:%.*]] = ashr i32 [[SRC_0]], 16
; CHECK-NEXT: store i32 [[R_0]], i32* [[DST:%.*]], align 4
; CHECK-NEXT: [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 1
; CHECK-NEXT: [[SRC_1:%.*]] = load i32, i32* [[SRC_GEP_1]], align 4
; CHECK-NEXT: [[R_1:%.*]] = ashr i32 [[SRC_1]], 16
; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 1
; CHECK-NEXT: store i32 [[R_1]], i32* [[DST_GEP_1]], align 4
; CHECK-NEXT: ret void
;
entry:
%src = load i32*, i32** %src.p
%src.0 = load i32, i32* %src, align 4
%r.0 = ashr i32 %src.0, 16
store i32 %r.0, i32* %dst, align 4
%src.gep.1 = getelementptr inbounds i32, i32* %src, i64 1
%src.1 = load i32, i32* %src.gep.1, align 4
%r.1 = ashr i32 %src.1, 16
%dst.gep.1 = getelementptr inbounds i32, i32* %dst, i64 1
store i32 %r.1, i32* %dst.gep.1, align 4
ret void
}
define void @clobber_same_underlying_object(i32* %this) {
; CHECK-LABEL: @clobber_same_underlying_object(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[P_3:%.*]] = getelementptr inbounds i32, i32* [[THIS:%.*]], i32 3
; CHECK-NEXT: store i32 10, i32* [[P_3]], align 8
; CHECK-NEXT: tail call void @clobber()
; CHECK-NEXT: [[P_4:%.*]] = getelementptr inbounds i32, i32* [[THIS]], i32 4
; CHECK-NEXT: [[L2:%.*]] = load i32, i32* [[P_4]], align 8
; CHECK-NEXT: store i32 20, i32* [[P_4]], align 8
; CHECK-NEXT: ret void
;
entry:
%p.3 = getelementptr inbounds i32, i32* %this, i32 3
store i32 10, i32* %p.3, align 8
tail call void @clobber()
%p.4 = getelementptr inbounds i32, i32* %this, i32 4
%l2 = load i32, i32* %p.4, align 8
store i32 20, i32* %p.4, align 8
ret void
}
declare void @clobber()
define void @slp_not_beneficial(i32* %A, i32* %B) {
; CHECK-LABEL: @slp_not_beneficial(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 4
; CHECK-NEXT: store i32 0, i32* [[TMP]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 5
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 4
; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 8
; CHECK-NEXT: store i32 [[TMP5]], i32* [[TMP3]], align 8
; CHECK-NEXT: ret void
;
bb:
%tmp = getelementptr inbounds i32, i32* %A, i32 4
store i32 0, i32* %tmp, align 8
%tmp3 = getelementptr inbounds i32, i32* %A, i32 5
%tmp4 = getelementptr inbounds i32, i32* %B, i32 4
%tmp5 = load i32, i32* %tmp4, align 8
store i32 %tmp5, i32* %tmp3, align 8
ret void
}
define void @widget(double* %ptr, double* %ptr.2) {
; CHECK-LABEL: @widget(
; CHECK-NEXT: bb1:
; CHECK-NEXT: [[TMP3:%.*]] = load double, double* null, align 8
; CHECK-NEXT: [[TMP4:%.*]] = fmul double undef, [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i32 0
; CHECK-NEXT: [[TMP6:%.*]] = load double, double* [[TMP5]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = fadd double [[TMP6]], [[TMP4]]
; CHECK-NEXT: store double [[TMP7]], double* [[TMP5]], align 8
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds double, double* [[PTR_2:%.*]], i64 0
; CHECK-NEXT: [[TMP9:%.*]] = load double, double* [[TMP8]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = fmul double undef, [[TMP9]]
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, double* [[PTR]], i32 1
; CHECK-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 8
; CHECK-NEXT: [[TMP13:%.*]] = fadd double [[TMP12]], [[TMP10]]
; CHECK-NEXT: store double [[TMP13]], double* [[TMP11]], align 8
; CHECK-NEXT: br label [[BB15:%.*]]
; CHECK: bb15:
; CHECK-NEXT: br label [[BB15]]
;
bb1: ; preds = %bb
%tmp3 = load double, double* null, align 8
%tmp4 = fmul double undef, %tmp3
%tmp5 = getelementptr inbounds double, double* %ptr, i32 0
%tmp6 = load double, double* %tmp5, align 8
%tmp7 = fadd double %tmp6, %tmp4
store double %tmp7, double* %tmp5, align 8
%tmp8 = getelementptr inbounds double, double* %ptr.2, i64 0
%tmp9 = load double, double* %tmp8, align 8
%tmp10 = fmul double undef, %tmp9
%tmp11 = getelementptr inbounds double, double* %ptr, i32 1
%tmp12 = load double, double* %tmp11, align 8
%tmp13 = fadd double %tmp12, %tmp10
store double %tmp13, double* %tmp11, align 8
br label %bb15
bb15: ; preds = %bb15, %bb14
br label %bb15
}
%struct = type { i32, i32, float, float }
; Some points we collected as candidates for runtime checks have been removed
; before generating runtime checks. Make sure versioning is skipped.
define void @test_bounds_removed_before_runtime_checks(%struct * %A, i32** %B, i1 %c) {
; CHECK-LABEL: @test_bounds_removed_before_runtime_checks(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT:%.*]], %struct* [[A:%.*]], i64 0, i32 0
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT]], %struct* [[A]], i64 0, i32 1
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[TMP11]] to <2 x i32>*
; CHECK-NEXT: store <2 x i32> <i32 10, i32 300>, <2 x i32>* [[TMP0]], align 8
; CHECK-NEXT: [[TMP13:%.*]] = load i32*, i32** [[B:%.*]], align 8
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB23:%.*]], label [[BB14:%.*]]
; CHECK: bb14:
; CHECK-NEXT: [[TMP15:%.*]] = sext i32 10 to i64
; CHECK-NEXT: [[TMP16:%.*]] = add nsw i64 2, [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i64 [[TMP16]]
; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[TMP17]] to i8*
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, i8* [[TMP18]], i64 3
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT]], %struct* [[A]], i64 0, i32 2
; CHECK-NEXT: store float 0.000000e+00, float* [[TMP20]], align 8
; CHECK-NEXT: [[TMP21:%.*]] = load i8, i8* [[TMP19]], align 1
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT]], %struct* [[A]], i64 0, i32 3
; CHECK-NEXT: store float 0.000000e+00, float* [[TMP22]], align 4
; CHECK-NEXT: br label [[BB23]]
; CHECK: bb23:
; CHECK-NEXT: ret void
;
entry:
%tmp1 = fmul float 10.0, 20.0
%tmp2 = fptosi float %tmp1 to i32
%tmp3 = fmul float 30.0, 20.0
%tmp4 = fptosi float %tmp3 to i32
%tmp5 = icmp sgt i32 100, %tmp2
%tmp6 = select i1 %tmp5, i32 %tmp2, i32 10
%tmp7 = select i1 false, i32 0, i32 %tmp6
%tmp8 = icmp sgt i32 200, %tmp4
%tmp9 = select i1 %tmp8, i32 %tmp4, i32 300
%tmp10 = select i1 false, i32 0, i32 %tmp9
%tmp11 = getelementptr inbounds %struct, %struct* %A, i64 0, i32 0
store i32 %tmp7, i32* %tmp11, align 8
%tmp12 = getelementptr inbounds %struct, %struct* %A, i64 0, i32 1
store i32 %tmp10, i32* %tmp12, align 4
%tmp13 = load i32*, i32** %B, align 8
br i1 %c, label %bb23, label %bb14
bb14:
%tmp15 = sext i32 %tmp7 to i64
%tmp16 = add nsw i64 2, %tmp15
%tmp17 = getelementptr inbounds i32, i32* %tmp13, i64 %tmp16
%tmp18 = bitcast i32* %tmp17 to i8*
%tmp19 = getelementptr inbounds i8, i8* %tmp18, i64 3
%tmp20 = getelementptr inbounds %struct, %struct* %A, i64 0, i32 2
store float 0.0, float* %tmp20, align 8
%tmp21 = load i8, i8* %tmp19, align 1
%tmp22 = getelementptr inbounds %struct, %struct* %A, i64 0, i32 3
store float 0.0, float* %tmp22, align 4
br label %bb23
bb23:
ret void
}
; In this test there's a single bound, do not generate runtime checks.
define void @single_membound(double* %arg, double* %arg1, double %x) {
; CHECK-LABEL: @single_membound(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP:%.*]] = fsub double [[X:%.*]], 9.900000e+01
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, double* [[ARG:%.*]], i64 1
; CHECK-NEXT: store double [[TMP]], double* [[TMP9]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds double, double* [[ARG1:%.*]], i64 0
; CHECK-NEXT: [[TMP12:%.*]] = load double, double* [[TMP10]], align 8
; CHECK-NEXT: [[TMP13:%.*]] = fsub double 1.000000e+00, [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, double* [[ARG]], i64 2
; CHECK-NEXT: br label [[BB15:%.*]]
; CHECK: bb15:
; CHECK-NEXT: [[TMP16:%.*]] = fmul double [[TMP]], 2.000000e+01
; CHECK-NEXT: store double [[TMP16]], double* [[TMP9]], align 8
; CHECK-NEXT: [[TMP17:%.*]] = fmul double [[TMP13]], 3.000000e+01
; CHECK-NEXT: store double [[TMP17]], double* [[TMP14]], align 8
; CHECK-NEXT: ret void
;
entry:
%tmp = fsub double %x, 99.0
%tmp9 = getelementptr inbounds double, double* %arg, i64 1
store double %tmp, double* %tmp9, align 8
%tmp10 = getelementptr inbounds double, double* %arg1, i64 0
%tmp12 = load double, double* %tmp10, align 8
%tmp13 = fsub double 1.0, %tmp12
%tmp14 = getelementptr inbounds double, double* %arg, i64 2
br label %bb15
bb15:
%tmp16 = fmul double %tmp, 20.0
store double %tmp16, double* %tmp9, align 8
%tmp17 = fmul double %tmp13, 30.0
store double %tmp17, double* %tmp14, align 8
ret void
}
%struct.2 = type { [4 x float] }
; Make sure we do not crash when we encounter a SCEVCouldNotCompute.
define void @no_lcssa_phi(%struct.2* %A, float* %B, i1 %c) {
; CHECK-LABEL: @no_lcssa_phi(
; CHECK-NEXT: bb:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[PTR_PHI:%.*]] = phi %struct.2* [ [[A:%.*]], [[BB:%.*]] ], [ null, [[LOOP]] ]
; CHECK-NEXT: br i1 [[C:%.*]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: [[B_GEP_0:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 0
; CHECK-NEXT: [[L_0:%.*]] = load float, float* [[B_GEP_0]], align 8
; CHECK-NEXT: [[ADD_0:%.*]] = fadd float [[L_0]], 1.000000e+01
; CHECK-NEXT: [[MUL_0:%.*]] = fmul float [[ADD_0]], 3.000000e+01
; CHECK-NEXT: [[A_GEP_0:%.*]] = getelementptr inbounds [[STRUCT_2:%.*]], %struct.2* [[PTR_PHI]], i64 0, i32 0, i32 0
; CHECK-NEXT: store float [[MUL_0]], float* [[A_GEP_0]], align 8
; CHECK-NEXT: [[B_GEP_1:%.*]] = getelementptr inbounds float, float* [[B]], i64 1
; CHECK-NEXT: [[L_1:%.*]] = load float, float* [[B_GEP_1]], align 8
; CHECK-NEXT: [[ADD_1:%.*]] = fadd float [[L_1]], 1.000000e+01
; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[ADD_1]], 3.000000e+01
; CHECK-NEXT: [[A_GEP_1:%.*]] = getelementptr inbounds [[STRUCT_2]], %struct.2* [[PTR_PHI]], i64 0, i32 0, i32 1
; CHECK-NEXT: store float [[MUL_1]], float* [[A_GEP_1]], align 8
; CHECK-NEXT: [[B_GEP_2:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
; CHECK-NEXT: [[L_2:%.*]] = load float, float* [[B_GEP_2]], align 8
; CHECK-NEXT: [[ADD_2:%.*]] = fadd float [[L_2]], 1.000000e+01
; CHECK-NEXT: [[MUL_2:%.*]] = fmul float [[ADD_2]], 3.000000e+01
; CHECK-NEXT: [[A_GEP_2:%.*]] = getelementptr inbounds [[STRUCT_2]], %struct.2* [[PTR_PHI]], i64 0, i32 0, i32 2
; CHECK-NEXT: store float [[MUL_2]], float* [[A_GEP_2]], align 8
; CHECK-NEXT: [[B_GEP_3:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
; CHECK-NEXT: [[L_3:%.*]] = load float, float* [[B_GEP_3]], align 8
; CHECK-NEXT: [[ADD_3:%.*]] = fadd float [[L_3]], 1.000000e+01
; CHECK-NEXT: [[MUL_3:%.*]] = fmul float [[ADD_3]], 3.000000e+01
; CHECK-NEXT: [[A_GEP_3:%.*]] = getelementptr inbounds [[STRUCT_2]], %struct.2* [[PTR_PHI]], i64 0, i32 0, i32 3
; CHECK-NEXT: store float [[MUL_3]], float* [[A_GEP_3]], align 8
; CHECK-NEXT: ret void
;
bb:
br label %loop
loop:
%ptr.phi = phi %struct.2* [ %A, %bb ], [ null, %loop ]
br i1 %c, label %exit, label %loop
exit:
%B.gep.0 = getelementptr inbounds float, float* %B, i64 0
%l.0 = load float, float* %B.gep.0, align 8
%add.0 = fadd float %l.0, 10.0
%mul.0 = fmul float %add.0, 30.0
%A.gep.0 = getelementptr inbounds %struct.2, %struct.2* %ptr.phi, i64 0, i32 0, i32 0
store float %mul.0, float* %A.gep.0, align 8
%B.gep.1 = getelementptr inbounds float, float* %B, i64 1
%l.1 = load float, float* %B.gep.1, align 8
%add.1 = fadd float %l.1, 10.0
%mul.1 = fmul float %add.1, 30.0
%A.gep.1 = getelementptr inbounds %struct.2, %struct.2* %ptr.phi, i64 0, i32 0, i32 1
store float %mul.1, float* %A.gep.1, align 8
%B.gep.2 = getelementptr inbounds float, float* %B, i64 2
%l.2 = load float, float* %B.gep.2, align 8
%add.2 = fadd float %l.2, 10.0
%mul.2 = fmul float %add.2, 30.0
%A.gep.2 = getelementptr inbounds %struct.2, %struct.2* %ptr.phi, i64 0, i32 0, i32 2
store float %mul.2, float* %A.gep.2, align 8
%B.gep.3 = getelementptr inbounds float, float* %B, i64 3
%l.3 = load float, float* %B.gep.3, align 8
%add.3 = fadd float %l.3, 10.0
%mul.3 = fmul float %add.3, 30.0
%A.gep.3 = getelementptr inbounds %struct.2, %struct.2* %ptr.phi, i64 0, i32 0, i32 3
store float %mul.3, float* %A.gep.3, align 8
ret void
}
; Make sure lcssa phis as pointer bases are handled properly.
define void @lcssa_phi(%struct.2* %A, float* %B, i1 %c) {
; CHECK-LABEL: @lcssa_phi(
; CHECK-NEXT: bb:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[PTR_PHI:%.*]] = phi %struct.2* [ [[A:%.*]], [[BB:%.*]] ], [ null, [[LOOP]] ]
; CHECK-NEXT: br i1 [[C:%.*]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: [[PTR_PHI_LCSSA:%.*]] = phi %struct.2* [ [[PTR_PHI]], [[LOOP]] ]
; CHECK-NEXT: [[B_GEP_0:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 0
; CHECK-NEXT: [[L_0:%.*]] = load float, float* [[B_GEP_0]], align 8
; CHECK-NEXT: [[ADD_0:%.*]] = fadd float [[L_0]], 1.000000e+01
; CHECK-NEXT: [[MUL_0:%.*]] = fmul float [[ADD_0]], 3.000000e+01
; CHECK-NEXT: [[A_GEP_0:%.*]] = getelementptr inbounds [[STRUCT_2:%.*]], %struct.2* [[PTR_PHI_LCSSA]], i64 0, i32 0, i32 0
; CHECK-NEXT: store float [[MUL_0]], float* [[A_GEP_0]], align 8
; CHECK-NEXT: [[B_GEP_1:%.*]] = getelementptr inbounds float, float* [[B]], i64 1
; CHECK-NEXT: [[L_1:%.*]] = load float, float* [[B_GEP_1]], align 8
; CHECK-NEXT: [[ADD_1:%.*]] = fadd float [[L_1]], 1.000000e+01
; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[ADD_1]], 3.000000e+01
; CHECK-NEXT: [[A_GEP_1:%.*]] = getelementptr inbounds [[STRUCT_2]], %struct.2* [[PTR_PHI_LCSSA]], i64 0, i32 0, i32 1
; CHECK-NEXT: store float [[MUL_1]], float* [[A_GEP_1]], align 8
; CHECK-NEXT: [[B_GEP_2:%.*]] = getelementptr inbounds float, float* [[B]], i64 2
; CHECK-NEXT: [[L_2:%.*]] = load float, float* [[B_GEP_2]], align 8
; CHECK-NEXT: [[ADD_2:%.*]] = fadd float [[L_2]], 1.000000e+01
; CHECK-NEXT: [[MUL_2:%.*]] = fmul float [[ADD_2]], 3.000000e+01
; CHECK-NEXT: [[A_GEP_2:%.*]] = getelementptr inbounds [[STRUCT_2]], %struct.2* [[PTR_PHI_LCSSA]], i64 0, i32 0, i32 2
; CHECK-NEXT: store float [[MUL_2]], float* [[A_GEP_2]], align 8
; CHECK-NEXT: [[B_GEP_3:%.*]] = getelementptr inbounds float, float* [[B]], i64 3
; CHECK-NEXT: [[L_3:%.*]] = load float, float* [[B_GEP_3]], align 8
; CHECK-NEXT: [[ADD_3:%.*]] = fadd float [[L_3]], 1.000000e+01
; CHECK-NEXT: [[MUL_3:%.*]] = fmul float [[ADD_3]], 3.000000e+01
; CHECK-NEXT: [[A_GEP_3:%.*]] = getelementptr inbounds [[STRUCT_2]], %struct.2* [[PTR_PHI_LCSSA]], i64 0, i32 0, i32 3
; CHECK-NEXT: store float [[MUL_3]], float* [[A_GEP_3]], align 8
; CHECK-NEXT: ret void
;
bb:
br label %loop
loop:
%ptr.phi = phi %struct.2* [ %A, %bb ], [ null, %loop ]
br i1 %c, label %exit, label %loop
exit:
%ptr.phi.lcssa = phi %struct.2* [ %ptr.phi, %loop ]
%B.gep.0 = getelementptr inbounds float, float* %B, i64 0
%l.0 = load float, float* %B.gep.0, align 8
%add.0 = fadd float %l.0, 10.0
%mul.0 = fmul float %add.0, 30.0
%A.gep.0 = getelementptr inbounds %struct.2, %struct.2* %ptr.phi.lcssa, i64 0, i32 0, i32 0
store float %mul.0, float* %A.gep.0, align 8
%B.gep.1 = getelementptr inbounds float, float* %B, i64 1
%l.1 = load float, float* %B.gep.1, align 8
%add.1 = fadd float %l.1, 10.0
%mul.1 = fmul float %add.1, 30.0
%A.gep.1 = getelementptr inbounds %struct.2, %struct.2* %ptr.phi.lcssa, i64 0, i32 0, i32 1
store float %mul.1, float* %A.gep.1, align 8
%B.gep.2 = getelementptr inbounds float, float* %B, i64 2
%l.2 = load float, float* %B.gep.2, align 8
%add.2 = fadd float %l.2, 10.0
%mul.2 = fmul float %add.2, 30.0
%A.gep.2 = getelementptr inbounds %struct.2, %struct.2* %ptr.phi.lcssa, i64 0, i32 0, i32 2
store float %mul.2, float* %A.gep.2, align 8
%B.gep.3 = getelementptr inbounds float, float* %B, i64 3
%l.3 = load float, float* %B.gep.3, align 8
%add.3 = fadd float %l.3, 10.0
%mul.3 = fmul float %add.3, 30.0
%A.gep.3 = getelementptr inbounds %struct.2, %struct.2* %ptr.phi.lcssa, i64 0, i32 0, i32 3
store float %mul.3, float* %A.gep.3, align 8
ret void
}
%struct.spam = type { [60 x i32], i32, [12 x i8] }
declare void @foo(i8*)
; Test case with a basic block where parts can be vectorized without versioning.
define i32 @block_partly_vectorized_without_versioning(%struct.spam* readonly %arg, i8* nocapture readonly %arg1, i8* nocapture %arg2, i8* nocapture readonly %arg3, i8* %A, i8* %B) {
; CHECK-LABEL: @block_partly_vectorized_without_versioning(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[T:%.*]] = alloca <16 x i8>, align 16
; CHECK-NEXT: [[T4:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[T]], i64 0, i64 0
; CHECK-NEXT: [[T5:%.*]] = getelementptr inbounds i8, i8* [[ARG3:%.*]], i64 1
; CHECK-NEXT: [[T6:%.*]] = getelementptr inbounds i8, i8* [[ARG3]], i64 2
; CHECK-NEXT: [[T7:%.*]] = getelementptr inbounds i8, i8* [[ARG3]], i64 3
; CHECK-NEXT: [[T8:%.*]] = getelementptr inbounds i8, i8* [[ARG3]], i64 4
; CHECK-NEXT: [[T9:%.*]] = getelementptr inbounds i8, i8* [[ARG3]], i64 5
; CHECK-NEXT: [[T10:%.*]] = getelementptr inbounds i8, i8* [[ARG3]], i64 6
; CHECK-NEXT: [[T11:%.*]] = getelementptr inbounds i8, i8* [[ARG3]], i64 7
; CHECK-NEXT: [[T12:%.*]] = getelementptr inbounds i8, i8* [[ARG3]], i64 8
; CHECK-NEXT: [[T13:%.*]] = getelementptr inbounds i8, i8* [[ARG3]], i64 9
; CHECK-NEXT: [[T14:%.*]] = getelementptr inbounds i8, i8* [[ARG3]], i64 10
; CHECK-NEXT: [[T15:%.*]] = getelementptr inbounds i8, i8* [[ARG3]], i64 11
; CHECK-NEXT: [[T16:%.*]] = getelementptr inbounds i8, i8* [[ARG3]], i64 12
; CHECK-NEXT: [[T17:%.*]] = getelementptr inbounds i8, i8* [[ARG3]], i64 13
; CHECK-NEXT: [[T18:%.*]] = getelementptr inbounds i8, i8* [[ARG3]], i64 14
; CHECK-NEXT: [[T19:%.*]] = bitcast i8* [[ARG1:%.*]] to <16 x i8>*
; CHECK-NEXT: [[A_GEP_0:%.*]] = getelementptr i8, i8* [[A:%.*]], i64 0
; CHECK-NEXT: [[B_GEP_0:%.*]] = getelementptr i8, i8* [[B:%.*]], i64 0
; CHECK-NEXT: [[A_GEP_1:%.*]] = getelementptr i8, i8* [[A]], i64 1
; CHECK-NEXT: [[B_GEP_1:%.*]] = getelementptr i8, i8* [[B]], i64 1
; CHECK-NEXT: [[A_GEP_2:%.*]] = getelementptr i8, i8* [[A]], i64 2
; CHECK-NEXT: [[B_GEP_2:%.*]] = getelementptr i8, i8* [[B]], i64 2
; CHECK-NEXT: [[A_GEP_3:%.*]] = getelementptr i8, i8* [[A]], i64 3
; CHECK-NEXT: [[B_GEP_3:%.*]] = getelementptr i8, i8* [[B]], i64 3
; CHECK-NEXT: [[A_GEP_4:%.*]] = getelementptr i8, i8* [[A]], i64 4
; CHECK-NEXT: [[B_GEP_4:%.*]] = getelementptr i8, i8* [[B]], i64 4
; CHECK-NEXT: [[A_GEP_5:%.*]] = getelementptr i8, i8* [[A]], i64 5
; CHECK-NEXT: [[B_GEP_5:%.*]] = getelementptr i8, i8* [[B]], i64 5
; CHECK-NEXT: [[A_GEP_6:%.*]] = getelementptr i8, i8* [[A]], i64 6
; CHECK-NEXT: [[B_GEP_6:%.*]] = getelementptr i8, i8* [[B]], i64 6
; CHECK-NEXT: [[A_GEP_7:%.*]] = getelementptr i8, i8* [[A]], i64 7
; CHECK-NEXT: [[B_GEP_7:%.*]] = getelementptr i8, i8* [[B]], i64 7
; CHECK-NEXT: [[A_GEP_8:%.*]] = getelementptr i8, i8* [[A]], i64 8
; CHECK-NEXT: [[B_GEP_8:%.*]] = getelementptr i8, i8* [[B]], i64 8
; CHECK-NEXT: [[A_GEP_9:%.*]] = getelementptr i8, i8* [[A]], i64 9
; CHECK-NEXT: [[B_GEP_9:%.*]] = getelementptr i8, i8* [[B]], i64 9
; CHECK-NEXT: [[A_GEP_10:%.*]] = getelementptr i8, i8* [[A]], i64 10
; CHECK-NEXT: [[B_GEP_10:%.*]] = getelementptr i8, i8* [[B]], i64 10
; CHECK-NEXT: [[A_GEP_11:%.*]] = getelementptr i8, i8* [[A]], i64 11
; CHECK-NEXT: [[B_GEP_11:%.*]] = getelementptr i8, i8* [[B]], i64 11
; CHECK-NEXT: [[A_GEP_12:%.*]] = getelementptr i8, i8* [[A]], i64 12
; CHECK-NEXT: [[B_GEP_12:%.*]] = getelementptr i8, i8* [[B]], i64 12
; CHECK-NEXT: [[A_GEP_13:%.*]] = getelementptr i8, i8* [[A]], i64 13
; CHECK-NEXT: [[B_GEP_13:%.*]] = getelementptr i8, i8* [[B]], i64 13
; CHECK-NEXT: [[A_GEP_14:%.*]] = getelementptr i8, i8* [[A]], i64 14
; CHECK-NEXT: [[B_GEP_14:%.*]] = getelementptr i8, i8* [[B]], i64 14
; CHECK-NEXT: [[A_GEP_15:%.*]] = getelementptr i8, i8* [[A]], i64 15
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[A_GEP_0]] to <16 x i8>*
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1
; CHECK-NEXT: [[B_GEP_15:%.*]] = getelementptr i8, i8* [[B]], i64 15
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[B_GEP_0]] to <16 x i8>*
; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[TMP2]], align 1
; CHECK-NEXT: [[TMP4:%.*]] = xor <16 x i8> [[TMP1]], [[TMP3]]
; CHECK-NEXT: [[R_GEP_0:%.*]] = getelementptr i8, i8* [[ARG1]], i64 0
; CHECK-NEXT: [[R_GEP_1:%.*]] = getelementptr i8, i8* [[ARG1]], i64 1
; CHECK-NEXT: [[R_GEP_2:%.*]] = getelementptr i8, i8* [[ARG1]], i64 2
; CHECK-NEXT: [[R_GEP_3:%.*]] = getelementptr i8, i8* [[ARG1]], i64 3
; CHECK-NEXT: [[R_GEP_4:%.*]] = getelementptr i8, i8* [[ARG1]], i64 4
; CHECK-NEXT: [[R_GEP_5:%.*]] = getelementptr i8, i8* [[ARG1]], i64 5
; CHECK-NEXT: [[R_GEP_6:%.*]] = getelementptr i8, i8* [[ARG1]], i64 6
; CHECK-NEXT: [[R_GEP_7:%.*]] = getelementptr i8, i8* [[ARG1]], i64 7
; CHECK-NEXT: [[R_GEP_8:%.*]] = getelementptr i8, i8* [[ARG1]], i64 8
; CHECK-NEXT: [[R_GEP_9:%.*]] = getelementptr i8, i8* [[ARG1]], i64 9
; CHECK-NEXT: [[R_GEP_10:%.*]] = getelementptr i8, i8* [[ARG1]], i64 10
; CHECK-NEXT: [[R_GEP_11:%.*]] = getelementptr i8, i8* [[ARG1]], i64 11
; CHECK-NEXT: [[R_GEP_12:%.*]] = getelementptr i8, i8* [[ARG1]], i64 12
; CHECK-NEXT: [[R_GEP_13:%.*]] = getelementptr i8, i8* [[ARG1]], i64 13
; CHECK-NEXT: [[R_GEP_14:%.*]] = getelementptr i8, i8* [[ARG1]], i64 14
; CHECK-NEXT: [[R_GEP_15:%.*]] = getelementptr i8, i8* [[ARG1]], i64 15
; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[R_GEP_0]] to <16 x i8>*
; CHECK-NEXT: store <16 x i8> [[TMP4]], <16 x i8>* [[TMP5]], align 1
; CHECK-NEXT: [[T21:%.*]] = getelementptr inbounds i8, i8* [[ARG3]], i64 15
; CHECK-NEXT: [[T22:%.*]] = bitcast i8* [[ARG3]] to <16 x i8>*
; CHECK-NEXT: call void @foo(i8* nonnull [[T4]])
; CHECK-NEXT: [[T26:%.*]] = load i8, i8* [[ARG3]], align 1
; CHECK-NEXT: [[T27:%.*]] = load i8, i8* [[ARG2:%.*]], align 1
; CHECK-NEXT: [[T28:%.*]] = xor i8 [[T27]], [[T26]]
; CHECK-NEXT: store i8 [[T28]], i8* [[ARG2]], align 1
; CHECK-NEXT: [[T29:%.*]] = load i8, i8* [[T5]], align 1
; CHECK-NEXT: [[T30:%.*]] = getelementptr inbounds i8, i8* [[ARG2]], i64 1
; CHECK-NEXT: [[T31:%.*]] = load i8, i8* [[T30]], align 1
; CHECK-NEXT: [[T32:%.*]] = xor i8 [[T31]], [[T29]]
; CHECK-NEXT: store i8 [[T32]], i8* [[T30]], align 1
; CHECK-NEXT: [[T33:%.*]] = load i8, i8* [[T6]], align 1
; CHECK-NEXT: [[T34:%.*]] = getelementptr inbounds i8, i8* [[ARG2]], i64 2
; CHECK-NEXT: [[T35:%.*]] = load i8, i8* [[T34]], align 1
; CHECK-NEXT: [[T36:%.*]] = xor i8 [[T35]], [[T33]]
; CHECK-NEXT: store i8 [[T36]], i8* [[T34]], align 1
; CHECK-NEXT: [[T37:%.*]] = load i8, i8* [[T7]], align 1
; CHECK-NEXT: [[T38:%.*]] = getelementptr inbounds i8, i8* [[ARG2]], i64 3
; CHECK-NEXT: [[T39:%.*]] = load i8, i8* [[T38]], align 1
; CHECK-NEXT: [[T40:%.*]] = xor i8 [[T39]], [[T37]]
; CHECK-NEXT: store i8 [[T40]], i8* [[T38]], align 1
; CHECK-NEXT: [[T41:%.*]] = load i8, i8* [[T8]], align 1
; CHECK-NEXT: [[T42:%.*]] = getelementptr inbounds i8, i8* [[ARG2]], i64 4
; CHECK-NEXT: [[T43:%.*]] = load i8, i8* [[T42]], align 1
; CHECK-NEXT: [[T44:%.*]] = xor i8 [[T43]], [[T41]]
; CHECK-NEXT: store i8 [[T44]], i8* [[T42]], align 1
; CHECK-NEXT: [[T45:%.*]] = load i8, i8* [[T9]], align 1
; CHECK-NEXT: [[T46:%.*]] = getelementptr inbounds i8, i8* [[ARG2]], i64 5
; CHECK-NEXT: [[T47:%.*]] = load i8, i8* [[T46]], align 1
; CHECK-NEXT: [[T48:%.*]] = xor i8 [[T47]], [[T45]]
; CHECK-NEXT: store i8 [[T48]], i8* [[T46]], align 1
; CHECK-NEXT: [[T49:%.*]] = load i8, i8* [[T10]], align 1
; CHECK-NEXT: [[T50:%.*]] = getelementptr inbounds i8, i8* [[ARG2]], i64 6
; CHECK-NEXT: [[T51:%.*]] = load i8, i8* [[T50]], align 1
; CHECK-NEXT: [[T52:%.*]] = xor i8 [[T51]], [[T49]]
; CHECK-NEXT: store i8 [[T52]], i8* [[T50]], align 1
; CHECK-NEXT: [[T53:%.*]] = load i8, i8* [[T11]], align 1
; CHECK-NEXT: [[T54:%.*]] = getelementptr inbounds i8, i8* [[ARG2]], i64 7
; CHECK-NEXT: [[T55:%.*]] = load i8, i8* [[T54]], align 1
; CHECK-NEXT: [[T56:%.*]] = xor i8 [[T55]], [[T53]]
; CHECK-NEXT: store i8 [[T56]], i8* [[T54]], align 1
; CHECK-NEXT: [[T57:%.*]] = load i8, i8* [[T12]], align 1
; CHECK-NEXT: [[T58:%.*]] = getelementptr inbounds i8, i8* [[ARG2]], i64 8
; CHECK-NEXT: [[T59:%.*]] = load i8, i8* [[T58]], align 1
; CHECK-NEXT: [[T60:%.*]] = xor i8 [[T59]], [[T57]]
; CHECK-NEXT: store i8 [[T60]], i8* [[T58]], align 1
; CHECK-NEXT: [[T61:%.*]] = load i8, i8* [[T13]], align 1
; CHECK-NEXT: [[T62:%.*]] = getelementptr inbounds i8, i8* [[ARG2]], i64 9
; CHECK-NEXT: [[T63:%.*]] = load i8, i8* [[T62]], align 1
; CHECK-NEXT: [[T64:%.*]] = xor i8 [[T63]], [[T61]]
; CHECK-NEXT: store i8 [[T64]], i8* [[T62]], align 1
; CHECK-NEXT: [[T65:%.*]] = load i8, i8* [[T14]], align 1
; CHECK-NEXT: [[T66:%.*]] = getelementptr inbounds i8, i8* [[ARG2]], i64 10
; CHECK-NEXT: [[T67:%.*]] = load i8, i8* [[T66]], align 1
; CHECK-NEXT: [[T68:%.*]] = xor i8 [[T67]], [[T65]]
; CHECK-NEXT: store i8 [[T68]], i8* [[T66]], align 1
; CHECK-NEXT: [[T69:%.*]] = load i8, i8* [[T15]], align 1
; CHECK-NEXT: [[T70:%.*]] = getelementptr inbounds i8, i8* [[ARG2]], i64 11
; CHECK-NEXT: [[T71:%.*]] = load i8, i8* [[T70]], align 1
; CHECK-NEXT: [[T72:%.*]] = xor i8 [[T71]], [[T69]]
; CHECK-NEXT: store i8 [[T72]], i8* [[T70]], align 1
; CHECK-NEXT: [[T73:%.*]] = load i8, i8* [[T16]], align 1
; CHECK-NEXT: [[T74:%.*]] = getelementptr inbounds i8, i8* [[ARG2]], i64 12
; CHECK-NEXT: [[T75:%.*]] = load i8, i8* [[T74]], align 1
; CHECK-NEXT: [[T76:%.*]] = xor i8 [[T75]], [[T73]]
; CHECK-NEXT: store i8 [[T76]], i8* [[T74]], align 1
; CHECK-NEXT: [[T77:%.*]] = load i8, i8* [[T17]], align 1
; CHECK-NEXT: [[T78:%.*]] = getelementptr inbounds i8, i8* [[ARG2]], i64 13
; CHECK-NEXT: [[T79:%.*]] = load i8, i8* [[T78]], align 1
; CHECK-NEXT: [[T80:%.*]] = xor i8 [[T79]], [[T77]]
; CHECK-NEXT: store i8 [[T80]], i8* [[T78]], align 1
; CHECK-NEXT: [[T81:%.*]] = load i8, i8* [[T18]], align 1
; CHECK-NEXT: [[T82:%.*]] = getelementptr inbounds i8, i8* [[ARG2]], i64 14
; CHECK-NEXT: [[T83:%.*]] = load i8, i8* [[T82]], align 1
; CHECK-NEXT: [[T84:%.*]] = xor i8 [[T83]], [[T81]]
; CHECK-NEXT: store i8 [[T84]], i8* [[T82]], align 1
; CHECK-NEXT: [[T85:%.*]] = load i8, i8* [[T21]], align 1
; CHECK-NEXT: [[T86:%.*]] = getelementptr inbounds i8, i8* [[ARG2]], i64 15
; CHECK-NEXT: [[T87:%.*]] = load i8, i8* [[T86]], align 1
; CHECK-NEXT: [[T88:%.*]] = xor i8 [[T87]], [[T85]]
; CHECK-NEXT: store i8 [[T88]], i8* [[T86]], align 1
; CHECK-NEXT: ret i32 1
;
bb:
%t = alloca <16 x i8>, align 16
%t4 = getelementptr inbounds <16 x i8>, <16 x i8>* %t, i64 0, i64 0
%t5 = getelementptr inbounds i8, i8* %arg3, i64 1
%t6 = getelementptr inbounds i8, i8* %arg3, i64 2
%t7 = getelementptr inbounds i8, i8* %arg3, i64 3
%t8 = getelementptr inbounds i8, i8* %arg3, i64 4
%t9 = getelementptr inbounds i8, i8* %arg3, i64 5
%t10 = getelementptr inbounds i8, i8* %arg3, i64 6
%t11 = getelementptr inbounds i8, i8* %arg3, i64 7
%t12 = getelementptr inbounds i8, i8* %arg3, i64 8
%t13 = getelementptr inbounds i8, i8* %arg3, i64 9
%t14 = getelementptr inbounds i8, i8* %arg3, i64 10
%t15 = getelementptr inbounds i8, i8* %arg3, i64 11
%t16 = getelementptr inbounds i8, i8* %arg3, i64 12
%t17 = getelementptr inbounds i8, i8* %arg3, i64 13
%t18 = getelementptr inbounds i8, i8* %arg3, i64 14
%t19 = bitcast i8* %arg1 to <16 x i8>*
%A.gep.0 = getelementptr i8, i8* %A, i64 0
%A.0 = load i8, i8* %A.gep.0
%B.gep.0 = getelementptr i8, i8* %B, i64 0
%B.0 = load i8, i8* %B.gep.0
%xor.0 = xor i8 %A.0, %B.0
%A.gep.1 = getelementptr i8, i8* %A, i64 1
%A.1 = load i8, i8* %A.gep.1
%B.gep.1 = getelementptr i8, i8* %B, i64 1
%B.1 = load i8, i8* %B.gep.1
%xor.1 = xor i8 %A.1, %B.1
%A.gep.2 = getelementptr i8, i8* %A, i64 2
%A.2 = load i8, i8* %A.gep.2
%B.gep.2 = getelementptr i8, i8* %B, i64 2
%B.2 = load i8, i8* %B.gep.2
%xor.2 = xor i8 %A.2, %B.2
%A.gep.3 = getelementptr i8, i8* %A, i64 3
%A.3 = load i8, i8* %A.gep.3
%B.gep.3 = getelementptr i8, i8* %B, i64 3
%B.3 = load i8, i8* %B.gep.3
%xor.3 = xor i8 %A.3, %B.3
%A.gep.4 = getelementptr i8, i8* %A, i64 4
%A.4 = load i8, i8* %A.gep.4
%B.gep.4 = getelementptr i8, i8* %B, i64 4
%B.4 = load i8, i8* %B.gep.4
%xor.4 = xor i8 %A.4, %B.4
%A.gep.5 = getelementptr i8, i8* %A, i64 5
%A.5 = load i8, i8* %A.gep.5
%B.gep.5 = getelementptr i8, i8* %B, i64 5
%B.5 = load i8, i8* %B.gep.5
%xor.5 = xor i8 %A.5, %B.5
%A.gep.6 = getelementptr i8, i8* %A, i64 6
%A.6 = load i8, i8* %A.gep.6
%B.gep.6 = getelementptr i8, i8* %B, i64 6
%B.6 = load i8, i8* %B.gep.6
%xor.6 = xor i8 %A.6, %B.6
%A.gep.7 = getelementptr i8, i8* %A, i64 7
%A.7 = load i8, i8* %A.gep.7
%B.gep.7 = getelementptr i8, i8* %B, i64 7
%B.7 = load i8, i8* %B.gep.7
%xor.7 = xor i8 %A.7, %B.7
%A.gep.8 = getelementptr i8, i8* %A, i64 8
%A.8 = load i8, i8* %A.gep.8
%B.gep.8 = getelementptr i8, i8* %B, i64 8
%B.8 = load i8, i8* %B.gep.8
%xor.8 = xor i8 %A.8, %B.8
%A.gep.9 = getelementptr i8, i8* %A, i64 9
%A.9 = load i8, i8* %A.gep.9
%B.gep.9 = getelementptr i8, i8* %B, i64 9
%B.9 = load i8, i8* %B.gep.9
%xor.9 = xor i8 %A.9, %B.9
%A.gep.10 = getelementptr i8, i8* %A, i64 10
%A.10 = load i8, i8* %A.gep.10
%B.gep.10 = getelementptr i8, i8* %B, i64 10
%B.10 = load i8, i8* %B.gep.10
%xor.10 = xor i8 %A.10, %B.10
%A.gep.11 = getelementptr i8, i8* %A, i64 11
%A.11 = load i8, i8* %A.gep.11
%B.gep.11 = getelementptr i8, i8* %B, i64 11
%B.11 = load i8, i8* %B.gep.11
%xor.11 = xor i8 %A.11, %B.11
%A.gep.12 = getelementptr i8, i8* %A, i64 12
%A.12 = load i8, i8* %A.gep.12
%B.gep.12 = getelementptr i8, i8* %B, i64 12
%B.12 = load i8, i8* %B.gep.12
%xor.12 = xor i8 %A.12, %B.12
%A.gep.13 = getelementptr i8, i8* %A, i64 13
%A.13 = load i8, i8* %A.gep.13
%B.gep.13 = getelementptr i8, i8* %B, i64 13
%B.13 = load i8, i8* %B.gep.13
%xor.13 = xor i8 %A.13, %B.13
%A.gep.14 = getelementptr i8, i8* %A, i64 14
%A.14 = load i8, i8* %A.gep.14
%B.gep.14 = getelementptr i8, i8* %B, i64 14
%B.14 = load i8, i8* %B.gep.14
%xor.14 = xor i8 %A.14, %B.14
%A.gep.15 = getelementptr i8, i8* %A, i64 15
%A.15 = load i8, i8* %A.gep.15
%B.gep.15 = getelementptr i8, i8* %B, i64 15
%B.15 = load i8, i8* %B.gep.15
%xor.15 = xor i8 %A.15, %B.15
%R.gep.0 = getelementptr i8, i8* %arg1, i64 0
store i8 %xor.0, i8* %R.gep.0
%R.gep.1 = getelementptr i8, i8* %arg1, i64 1
store i8 %xor.1, i8* %R.gep.1
%R.gep.2 = getelementptr i8, i8* %arg1, i64 2
store i8 %xor.2, i8* %R.gep.2
%R.gep.3 = getelementptr i8, i8* %arg1, i64 3
store i8 %xor.3, i8* %R.gep.3
%R.gep.4 = getelementptr i8, i8* %arg1, i64 4
store i8 %xor.4, i8* %R.gep.4
%R.gep.5 = getelementptr i8, i8* %arg1, i64 5
store i8 %xor.5, i8* %R.gep.5
%R.gep.6 = getelementptr i8, i8* %arg1, i64 6
store i8 %xor.6, i8* %R.gep.6
%R.gep.7 = getelementptr i8, i8* %arg1, i64 7
store i8 %xor.7, i8* %R.gep.7
%R.gep.8 = getelementptr i8, i8* %arg1, i64 8
store i8 %xor.8, i8* %R.gep.8
%R.gep.9 = getelementptr i8, i8* %arg1, i64 9
store i8 %xor.9, i8* %R.gep.9
%R.gep.10 = getelementptr i8, i8* %arg1, i64 10
store i8 %xor.10, i8* %R.gep.10
%R.gep.11 = getelementptr i8, i8* %arg1, i64 11
store i8 %xor.11, i8* %R.gep.11
%R.gep.12 = getelementptr i8, i8* %arg1, i64 12
store i8 %xor.12, i8* %R.gep.12
%R.gep.13 = getelementptr i8, i8* %arg1, i64 13
store i8 %xor.13, i8* %R.gep.13
%R.gep.14 = getelementptr i8, i8* %arg1, i64 14
store i8 %xor.14, i8* %R.gep.14
%R.gep.15 = getelementptr i8, i8* %arg1, i64 15
store i8 %xor.15, i8* %R.gep.15
%t21 = getelementptr inbounds i8, i8* %arg3, i64 15
%t22 = bitcast i8* %arg3 to <16 x i8>*
call void @foo(i8* nonnull %t4)
%t26 = load i8, i8* %arg3, align 1
%t27 = load i8, i8* %arg2, align 1
%t28 = xor i8 %t27, %t26
store i8 %t28, i8* %arg2, align 1
%t29 = load i8, i8* %t5, align 1
%t30 = getelementptr inbounds i8, i8* %arg2, i64 1
%t31 = load i8, i8* %t30, align 1
%t32 = xor i8 %t31, %t29
store i8 %t32, i8* %t30, align 1
%t33 = load i8, i8* %t6, align 1
%t34 = getelementptr inbounds i8, i8* %arg2, i64 2
%t35 = load i8, i8* %t34, align 1
%t36 = xor i8 %t35, %t33
store i8 %t36, i8* %t34, align 1
%t37 = load i8, i8* %t7, align 1
%t38 = getelementptr inbounds i8, i8* %arg2, i64 3
%t39 = load i8, i8* %t38, align 1
%t40 = xor i8 %t39, %t37
store i8 %t40, i8* %t38, align 1
%t41 = load i8, i8* %t8, align 1
%t42 = getelementptr inbounds i8, i8* %arg2, i64 4
%t43 = load i8, i8* %t42, align 1
%t44 = xor i8 %t43, %t41
store i8 %t44, i8* %t42, align 1
%t45 = load i8, i8* %t9, align 1
%t46 = getelementptr inbounds i8, i8* %arg2, i64 5
%t47 = load i8, i8* %t46, align 1
%t48 = xor i8 %t47, %t45
store i8 %t48, i8* %t46, align 1
%t49 = load i8, i8* %t10, align 1
%t50 = getelementptr inbounds i8, i8* %arg2, i64 6
%t51 = load i8, i8* %t50, align 1
%t52 = xor i8 %t51, %t49
store i8 %t52, i8* %t50, align 1
%t53 = load i8, i8* %t11, align 1
%t54 = getelementptr inbounds i8, i8* %arg2, i64 7
%t55 = load i8, i8* %t54, align 1
%t56 = xor i8 %t55, %t53
store i8 %t56, i8* %t54, align 1
%t57 = load i8, i8* %t12, align 1
%t58 = getelementptr inbounds i8, i8* %arg2, i64 8
%t59 = load i8, i8* %t58, align 1
%t60 = xor i8 %t59, %t57
store i8 %t60, i8* %t58, align 1
%t61 = load i8, i8* %t13, align 1
%t62 = getelementptr inbounds i8, i8* %arg2, i64 9
%t63 = load i8, i8* %t62, align 1
%t64 = xor i8 %t63, %t61
store i8 %t64, i8* %t62, align 1
%t65 = load i8, i8* %t14, align 1
%t66 = getelementptr inbounds i8, i8* %arg2, i64 10
%t67 = load i8, i8* %t66, align 1
%t68 = xor i8 %t67, %t65
store i8 %t68, i8* %t66, align 1
%t69 = load i8, i8* %t15, align 1
%t70 = getelementptr inbounds i8, i8* %arg2, i64 11
%t71 = load i8, i8* %t70, align 1
%t72 = xor i8 %t71, %t69
store i8 %t72, i8* %t70, align 1
%t73 = load i8, i8* %t16, align 1
%t74 = getelementptr inbounds i8, i8* %arg2, i64 12
%t75 = load i8, i8* %t74, align 1
%t76 = xor i8 %t75, %t73
store i8 %t76, i8* %t74, align 1
%t77 = load i8, i8* %t17, align 1
%t78 = getelementptr inbounds i8, i8* %arg2, i64 13
%t79 = load i8, i8* %t78, align 1
%t80 = xor i8 %t79, %t77
store i8 %t80, i8* %t78, align 1
%t81 = load i8, i8* %t18, align 1
%t82 = getelementptr inbounds i8, i8* %arg2, i64 14
%t83 = load i8, i8* %t82, align 1
%t84 = xor i8 %t83, %t81
store i8 %t84, i8* %t82, align 1
%t85 = load i8, i8* %t21, align 1
%t86 = getelementptr inbounds i8, i8* %arg2, i64 15
%t87 = load i8, i8* %t86, align 1
%t88 = xor i8 %t87, %t85
store i8 %t88, i8* %t86, align 1
ret i32 1
}
; A test case where instructions required to compute the pointer bounds get
; vectorized before versioning. Make sure there is no crash.
define void @crash_instructions_deleted(float* %t, i32* %a, i32** noalias %ptr) {
; CHECK-LABEL: @crash_instructions_deleted(
; CHECK-NEXT: bb:
; CHECK-NEXT: [[T15:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 2
; CHECK-NEXT: [[T16:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 3
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[T15]] to <2 x i32>*
; CHECK-NEXT: store <2 x i32> <i32 0, i32 10>, <2 x i32>* [[TMP0]], align 8
; CHECK-NEXT: [[T17:%.*]] = load i32*, i32** [[PTR:%.*]], align 8
; CHECK-NEXT: br label [[BB18:%.*]]
; CHECK: bb18:
; CHECK-NEXT: [[T19:%.*]] = sext i32 0 to i64
; CHECK-NEXT: [[T20:%.*]] = add nsw i64 1, [[T19]]
; CHECK-NEXT: [[T21:%.*]] = getelementptr inbounds i32, i32* [[T17]], i64 [[T20]]
; CHECK-NEXT: [[T22:%.*]] = bitcast i32* [[T21]] to i8*
; CHECK-NEXT: [[T23:%.*]] = getelementptr inbounds i8, i8* [[T22]], i64 1
; CHECK-NEXT: [[T24:%.*]] = getelementptr inbounds i8, i8* [[T22]], i64 2
; CHECK-NEXT: [[T25:%.*]] = getelementptr inbounds i8, i8* [[T22]], i64 3
; CHECK-NEXT: [[T26:%.*]] = load i8, i8* [[T22]], align 1
; CHECK-NEXT: [[T27:%.*]] = uitofp i8 [[T26]] to float
; CHECK-NEXT: [[T28:%.*]] = fdiv float [[T27]], 2.550000e+02
; CHECK-NEXT: [[T29:%.*]] = getelementptr inbounds float, float* [[T:%.*]], i64 0
; CHECK-NEXT: store float [[T28]], float* [[T29]], align 8
; CHECK-NEXT: [[T30:%.*]] = load i8, i8* [[T23]], align 1
; CHECK-NEXT: [[T31:%.*]] = uitofp i8 [[T30]] to float
; CHECK-NEXT: [[T32:%.*]] = fdiv float [[T31]], 2.550000e+02
; CHECK-NEXT: [[T33:%.*]] = getelementptr inbounds float, float* [[T]], i64 1
; CHECK-NEXT: store float [[T32]], float* [[T33]], align 4
; CHECK-NEXT: [[T34:%.*]] = load i8, i8* [[T24]], align 1
; CHECK-NEXT: [[T35:%.*]] = uitofp i8 [[T34]] to float
; CHECK-NEXT: [[T36:%.*]] = fdiv float [[T35]], 2.550000e+02
; CHECK-NEXT: [[T37:%.*]] = getelementptr inbounds float, float* [[T]], i64 2
; CHECK-NEXT: store float [[T36]], float* [[T37]], align 8
; CHECK-NEXT: [[T38:%.*]] = load i8, i8* [[T25]], align 1
; CHECK-NEXT: [[T39:%.*]] = uitofp i8 [[T38]] to float
; CHECK-NEXT: [[T40:%.*]] = fdiv float [[T39]], 2.550000e+02
; CHECK-NEXT: [[T41:%.*]] = getelementptr inbounds float, float* [[T]], i64 3
; CHECK-NEXT: store float [[T40]], float* [[T41]], align 4
; CHECK-NEXT: ret void
;
bb:
%t6 = icmp slt i32 10, 0
%t7 = icmp sgt i32 20, 20
%t9 = select i1 %t7, i32 5, i32 0
%t10 = select i1 %t6, i32 0, i32 %t9
%t11 = icmp slt i32 10, 0
%t12 = icmp sgt i32 20, 20
%t13 = select i1 %t12, i32 5, i32 10
%t14 = select i1 %t11, i32 0, i32 %t13
%t15 = getelementptr inbounds i32, i32* %a, i32 2
store i32 %t10, i32* %t15, align 8
%t16 = getelementptr inbounds i32, i32* %a, i32 3
store i32 %t14, i32* %t16, align 4
%t17 = load i32*, i32** %ptr, align 8
br label %bb18
bb18: ; preds = %bb5
%t19 = sext i32 %t10 to i64
%t20 = add nsw i64 1, %t19
%t21 = getelementptr inbounds i32, i32* %t17, i64 %t20
%t22 = bitcast i32* %t21 to i8*
%t23 = getelementptr inbounds i8, i8* %t22, i64 1
%t24 = getelementptr inbounds i8, i8* %t22, i64 2
%t25 = getelementptr inbounds i8, i8* %t22, i64 3
%t26 = load i8, i8* %t22, align 1
%t27 = uitofp i8 %t26 to float
%t28 = fdiv float %t27, 2.550000e+02
%t29 = getelementptr inbounds float, float* %t, i64 0
store float %t28, float* %t29, align 8
%t30 = load i8, i8* %t23, align 1
%t31 = uitofp i8 %t30 to float
%t32 = fdiv float %t31, 2.550000e+02
%t33 = getelementptr inbounds float, float* %t, i64 1
store float %t32, float* %t33, align 4
%t34 = load i8, i8* %t24, align 1
%t35 = uitofp i8 %t34 to float
%t36 = fdiv float %t35, 2.550000e+02
%t37 = getelementptr inbounds float, float* %t, i64 2
store float %t36, float* %t37, align 8
%t38 = load i8, i8* %t25, align 1
%t39 = uitofp i8 %t38 to float
%t40 = fdiv float %t39, 2.550000e+02
%t41 = getelementptr inbounds float, float* %t, i64 3
store float %t40, float* %t41, align 4
ret void
}
; A test case where there are no instructions accessing a tracked object in a
; block for which versioning was requested.
define void @crash_no_tracked_instructions(float** %arg, float* %arg.2, float* %arg.3, i1 %c) {
; CHECK-LABEL: @crash_no_tracked_instructions(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[T19:%.*]] = load float*, float** [[ARG:%.*]], align 8
; CHECK-NEXT: [[T20:%.*]] = load float, float* [[ARG_3:%.*]], align 4
; CHECK-NEXT: [[T21:%.*]] = getelementptr inbounds float, float* [[ARG_2:%.*]], i64 0
; CHECK-NEXT: br i1 [[C:%.*]], label [[BB22:%.*]], label [[BB30:%.*]]
; CHECK: bb22:
; CHECK-NEXT: [[T23:%.*]] = fmul float [[T20]], 9.900000e+01
; CHECK-NEXT: [[T24:%.*]] = fmul float [[T23]], 9.900000e+01
; CHECK-NEXT: [[T25:%.*]] = getelementptr inbounds float, float* [[T19]], i64 2
; CHECK-NEXT: [[T26:%.*]] = fmul float [[T23]], 1.000000e+01
; CHECK-NEXT: store float [[T26]], float* [[T25]], align 4
; CHECK-NEXT: [[T27:%.*]] = load float, float* [[T21]], align 8
; CHECK-NEXT: [[T28:%.*]] = fadd float [[T24]], 2.000000e+01
; CHECK-NEXT: [[T29:%.*]] = fadd float [[T26]], 2.000000e+01
; CHECK-NEXT: br label [[BB30]]
; CHECK: bb30:
; CHECK-NEXT: [[T31:%.*]] = phi float [ [[T28]], [[BB22]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[T32:%.*]] = phi float [ [[T29]], [[BB22]] ], [ [[T20]], [[ENTRY]] ]
; CHECK-NEXT: br label [[BB36:%.*]]
; CHECK: bb36:
; CHECK-NEXT: [[T37:%.*]] = fmul float [[T31]], 3.000000e+00
; CHECK-NEXT: [[T38:%.*]] = getelementptr inbounds float, float* [[ARG_3]], i64 0
; CHECK-NEXT: store float [[T37]], float* [[T38]], align 4
; CHECK-NEXT: [[T39:%.*]] = fmul float [[T32]], 3.000000e+00
; CHECK-NEXT: [[T40:%.*]] = getelementptr inbounds float, float* [[ARG_3]], i64 1
; CHECK-NEXT: store float [[T39]], float* [[T40]], align 4
; CHECK-NEXT: br label [[BB41:%.*]]
; CHECK: bb41:
; CHECK-NEXT: ret void
;
entry:
%t19 = load float*, float** %arg
%t20 = load float, float* %arg.3, align 4
%t21 = getelementptr inbounds float, float* %arg.2, i64 0
br i1 %c, label %bb22, label %bb30
bb22:
%t23 = fmul float %t20, 99.0
%t24 = fmul float %t23, 99.0
%t25 = getelementptr inbounds float, float* %t19, i64 2
%t26 = fmul float %t23, 10.0
store float %t26, float* %t25, align 4
%t27 = load float, float* %t21, align 8
%t28 = fadd float %t24, 20.0
%t29 = fadd float %t26, 20.0
br label %bb30
bb30:
%t31 = phi float [ %t28, %bb22 ], [ 0.0, %entry ]
%t32 = phi float [ %t29, %bb22 ], [ %t20, %entry ]
br label %bb36
bb36:
%t37 = fmul float %t31, 3.0
%t38 = getelementptr inbounds float, float* %arg.3, i64 0
store float %t37, float* %t38, align 4
%t39 = fmul float %t32, 3.0
%t40 = getelementptr inbounds float, float* %arg.3, i64 1
store float %t39, float* %t40, align 4
br label %bb41
bb41:
ret void
}