| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| ; RUN: opt -passes='default<O3>' -S < %s | FileCheck %s |
| |
| target triple = "arm64-apple-darwin" |
| |
| ; Make sure we can vectorize a loop that uses a function to clamp a double to |
| ; be between a given minimum and maximum value. |
| |
| define internal double @clamp(double %v) { |
| entry: |
| %retval = alloca double, align 8 |
| %v.addr = alloca double, align 8 |
| store double %v, double* %v.addr, align 8 |
| %0 = load double, double* %v.addr, align 8 |
| %cmp = fcmp olt double %0, 0.000000e+00 |
| br i1 %cmp, label %if.then, label %if.end |
| |
| if.then: ; preds = %entry |
| store double 0.000000e+00, double* %retval, align 8 |
| br label %return |
| |
| if.end: ; preds = %entry |
| %1 = load double, double* %v.addr, align 8 |
| %cmp1 = fcmp ogt double %1, 6.000000e+00 |
| br i1 %cmp1, label %if.then2, label %if.end3 |
| |
| if.then2: ; preds = %if.end |
| store double 6.000000e+00, double* %retval, align 8 |
| br label %return |
| |
| if.end3: ; preds = %if.end |
| %2 = load double, double* %v.addr, align 8 |
| store double %2, double* %retval, align 8 |
| br label %return |
| |
| return: ; preds = %if.end3, %if.then2, %if.then |
| %3 = load double, double* %retval, align 8 |
| ret double %3 |
| } |
| |
| define void @loop(double* %X, double* %Y) { |
| ; CHECK-LABEL: @loop( |
| ; CHECK-NEXT: entry: |
| ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[X:%.*]], i64 20000 |
| ; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr double, double* [[Y:%.*]], i64 20000 |
| ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt double* [[SCEVGEP9]], [[X]] |
| ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt double* [[SCEVGEP]], [[Y]] |
| ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] |
| ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY:%.*]] |
| ; CHECK: vector.body: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] |
| ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[INDEX]] to i64 |
| ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[TMP0]] |
| ; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to <2 x double>* |
| ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8, !alias.scope !0 |
| ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[TMP1]], i64 2 |
| ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP3]] to <2 x double>* |
| ; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8, !alias.scope !0 |
| ; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD]], zeroinitializer |
| ; CHECK-NEXT: [[TMP6:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD11]], zeroinitializer |
| ; CHECK-NEXT: [[TMP7:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD]], <double 6.000000e+00, double 6.000000e+00> |
| ; CHECK-NEXT: [[TMP8:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD11]], <double 6.000000e+00, double 6.000000e+00> |
| ; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP7]], <2 x double> <double 6.000000e+00, double 6.000000e+00>, <2 x double> [[WIDE_LOAD]] |
| ; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP8]], <2 x double> <double 6.000000e+00, double 6.000000e+00>, <2 x double> [[WIDE_LOAD11]] |
| ; CHECK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP5]], <2 x double> zeroinitializer, <2 x double> [[TMP9]] |
| ; CHECK-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP6]], <2 x double> zeroinitializer, <2 x double> [[TMP10]] |
| ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[TMP0]] |
| ; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to <2 x double>* |
| ; CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[TMP14]], align 8, !alias.scope !3, !noalias !0 |
| ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, double* [[TMP13]], i64 2 |
| ; CHECK-NEXT: [[TMP16:%.*]] = bitcast double* [[TMP15]] to <2 x double>* |
| ; CHECK-NEXT: store <2 x double> [[TMP12]], <2 x double>* [[TMP16]], align 8, !alias.scope !3, !noalias !0 |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 |
| ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20000 |
| ; CHECK-NEXT: br i1 [[TMP17]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] |
| ; CHECK: for.cond.cleanup: |
| ; CHECK-NEXT: ret void |
| ; CHECK: for.body: |
| ; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ] |
| ; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_05]] to i64 |
| ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[IDXPROM]] |
| ; CHECK-NEXT: [[TMP18:%.*]] = load double, double* [[ARRAYIDX]], align 8 |
| ; CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt double [[TMP18]], 0.000000e+00 |
| ; CHECK-NEXT: [[CMP1_I:%.*]] = fcmp ogt double [[TMP18]], 6.000000e+00 |
| ; CHECK-NEXT: [[DOTV_I:%.*]] = select i1 [[CMP1_I]], double 6.000000e+00, double [[TMP18]] |
| ; CHECK-NEXT: [[RETVAL_0_I:%.*]] = select i1 [[CMP_I]], double 0.000000e+00, double [[DOTV_I]] |
| ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[IDXPROM]] |
| ; CHECK-NEXT: store double [[RETVAL_0_I]], double* [[ARRAYIDX2]], align 8 |
| ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_05]], 1 |
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_05]], 19999 |
| ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP7:![0-9]+]] |
| ; |
| entry: |
| %X.addr = alloca double*, align 8 |
| %Y.addr = alloca double*, align 8 |
| %i = alloca i32, align 4 |
| store double* %X, double** %X.addr, align 8 |
| store double* %Y, double** %Y.addr, align 8 |
| %0 = bitcast i32* %i to i8* |
| call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #2 |
| store i32 0, i32* %i, align 4 |
| br label %for.cond |
| |
| for.cond: ; preds = %for.inc, %entry |
| %1 = load i32, i32* %i, align 4 |
| %cmp = icmp ult i32 %1, 20000 |
| br i1 %cmp, label %for.body, label %for.cond.cleanup |
| |
| for.cond.cleanup: ; preds = %for.cond |
| %2 = bitcast i32* %i to i8* |
| call void @llvm.lifetime.end.p0i8(i64 4, i8* %2) #2 |
| br label %for.end |
| |
| for.body: ; preds = %for.cond |
| %3 = load double*, double** %Y.addr, align 8 |
| %4 = load i32, i32* %i, align 4 |
| %idxprom = zext i32 %4 to i64 |
| %arrayidx = getelementptr inbounds double, double* %3, i64 %idxprom |
| %5 = load double, double* %arrayidx, align 8 |
| %call = call double @clamp(double %5) |
| %6 = load double*, double** %X.addr, align 8 |
| %7 = load i32, i32* %i, align 4 |
| %idxprom1 = zext i32 %7 to i64 |
| %arrayidx2 = getelementptr inbounds double, double* %6, i64 %idxprom1 |
| store double %call, double* %arrayidx2, align 8 |
| br label %for.inc |
| |
| for.inc: ; preds = %for.body |
| %8 = load i32, i32* %i, align 4 |
| %inc = add i32 %8, 1 |
| store i32 %inc, i32* %i, align 4 |
| br label %for.cond |
| |
| for.end: ; preds = %for.cond.cleanup |
| ret void |
| } |
| |
| declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) |
| |
| declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) |