blob: 72f87526c1d4a123b41bc52509af3f9d8ef96472 [file] [log] [blame]
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes='default<O3>' -S < %s | FileCheck %s
target triple = "arm64-apple-darwin"
; Make sure we can vectorize a loop that uses a function to clamp a double to
; be between a given minimum and maximum value.
define internal double @clamp(double %v) {
entry:
%retval = alloca double, align 8
%v.addr = alloca double, align 8
store double %v, double* %v.addr, align 8
%0 = load double, double* %v.addr, align 8
%cmp = fcmp olt double %0, 0.000000e+00
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
store double 0.000000e+00, double* %retval, align 8
br label %return
if.end: ; preds = %entry
%1 = load double, double* %v.addr, align 8
%cmp1 = fcmp ogt double %1, 6.000000e+00
br i1 %cmp1, label %if.then2, label %if.end3
if.then2: ; preds = %if.end
store double 6.000000e+00, double* %retval, align 8
br label %return
if.end3: ; preds = %if.end
%2 = load double, double* %v.addr, align 8
store double %2, double* %retval, align 8
br label %return
return: ; preds = %if.end3, %if.then2, %if.then
%3 = load double, double* %retval, align 8
ret double %3
}
define void @loop(double* %X, double* %Y) {
; CHECK-LABEL: @loop(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[X:%.*]], i64 20000
; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr double, double* [[Y:%.*]], i64 20000
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt double* [[SCEVGEP9]], [[X]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt double* [[SCEVGEP]], [[Y]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[INDEX]] to i64
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to <2 x double>*
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8, !alias.scope !0
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[TMP1]], i64 2
; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP3]] to <2 x double>*
; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8, !alias.scope !0
; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD11]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD]], <double 6.000000e+00, double 6.000000e+00>
; CHECK-NEXT: [[TMP8:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD11]], <double 6.000000e+00, double 6.000000e+00>
; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP7]], <2 x double> <double 6.000000e+00, double 6.000000e+00>, <2 x double> [[WIDE_LOAD]]
; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP8]], <2 x double> <double 6.000000e+00, double 6.000000e+00>, <2 x double> [[WIDE_LOAD11]]
; CHECK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP5]], <2 x double> zeroinitializer, <2 x double> [[TMP9]]
; CHECK-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP6]], <2 x double> zeroinitializer, <2 x double> [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[TMP0]]
; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to <2 x double>*
; CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[TMP14]], align 8, !alias.scope !3, !noalias !0
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, double* [[TMP13]], i64 2
; CHECK-NEXT: [[TMP16:%.*]] = bitcast double* [[TMP15]] to <2 x double>*
; CHECK-NEXT: store <2 x double> [[TMP12]], <2 x double>* [[TMP16]], align 8, !alias.scope !3, !noalias !0
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20000
; CHECK-NEXT: br i1 [[TMP17]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
; CHECK: for.body:
; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_05]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[IDXPROM]]
; CHECK-NEXT: [[TMP18:%.*]] = load double, double* [[ARRAYIDX]], align 8
; CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt double [[TMP18]], 0.000000e+00
; CHECK-NEXT: [[CMP1_I:%.*]] = fcmp ogt double [[TMP18]], 6.000000e+00
; CHECK-NEXT: [[DOTV_I:%.*]] = select i1 [[CMP1_I]], double 6.000000e+00, double [[TMP18]]
; CHECK-NEXT: [[RETVAL_0_I:%.*]] = select i1 [[CMP_I]], double 0.000000e+00, double [[DOTV_I]]
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[IDXPROM]]
; CHECK-NEXT: store double [[RETVAL_0_I]], double* [[ARRAYIDX2]], align 8
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_05]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_05]], 19999
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP7:![0-9]+]]
;
entry:
%X.addr = alloca double*, align 8
%Y.addr = alloca double*, align 8
%i = alloca i32, align 4
store double* %X, double** %X.addr, align 8
store double* %Y, double** %Y.addr, align 8
%0 = bitcast i32* %i to i8*
call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #2
store i32 0, i32* %i, align 4
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%1 = load i32, i32* %i, align 4
%cmp = icmp ult i32 %1, 20000
br i1 %cmp, label %for.body, label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond
%2 = bitcast i32* %i to i8*
call void @llvm.lifetime.end.p0i8(i64 4, i8* %2) #2
br label %for.end
for.body: ; preds = %for.cond
%3 = load double*, double** %Y.addr, align 8
%4 = load i32, i32* %i, align 4
%idxprom = zext i32 %4 to i64
%arrayidx = getelementptr inbounds double, double* %3, i64 %idxprom
%5 = load double, double* %arrayidx, align 8
%call = call double @clamp(double %5)
%6 = load double*, double** %X.addr, align 8
%7 = load i32, i32* %i, align 4
%idxprom1 = zext i32 %7 to i64
%arrayidx2 = getelementptr inbounds double, double* %6, i64 %idxprom1
store double %call, double* %arrayidx2, align 8
br label %for.inc
for.inc: ; preds = %for.body
%8 = load i32, i32* %i, align 4
%inc = add i32 %8, 1
store i32 %inc, i32* %i, align 4
br label %for.cond
for.end: ; preds = %for.cond.cleanup
ret void
}
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)