|  | ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py | 
|  | ; RUN: opt -S -passes=loop-vectorize -mtriple=x86_64-apple-darwin %s | FileCheck %s --check-prefixes=CHECK,SSE | 
|  | ; RUN: opt -S -passes=loop-vectorize -mtriple=x86_64-apple-darwin -mattr=+avx %s | FileCheck %s --check-prefixes=CHECK,AVX | 
|  |  | 
|  | ; Two mostly identical functions. The only difference is the presence of | 
|  | ; fast-math flags on the second. The loop is a pretty simple reduction: | 
|  |  | 
|  | ; for (int i = 0; i < 32; ++i) | 
|  | ;   if (arr[i] != 42) | 
|  | ;     tot += arr[i]; | 
|  |  | 
|  | define double @sumIfScalar(ptr nocapture readonly %arr) { | 
|  | ; CHECK-LABEL: @sumIfScalar( | 
|  | ; CHECK-NEXT:  entry: | 
|  | ; CHECK-NEXT:    br label [[LOOP:%.*]] | 
|  | ; CHECK:       loop: | 
|  | ; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[NEXT_ITER:%.*]] ] | 
|  | ; CHECK-NEXT:    [[TOT:%.*]] = phi double [ 0.000000e+00, [[ENTRY]] ], [ [[TOT_NEXT:%.*]], [[NEXT_ITER]] ] | 
|  | ; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr double, ptr [[ARR:%.*]], i32 [[I]] | 
|  | ; CHECK-NEXT:    [[NEXTVAL:%.*]] = load double, ptr [[ADDR]], align 8 | 
|  | ; CHECK-NEXT:    [[TST:%.*]] = fcmp une double [[NEXTVAL]], 4.200000e+01 | 
|  | ; CHECK-NEXT:    br i1 [[TST]], label [[DO_ADD:%.*]], label [[NO_ADD:%.*]] | 
|  | ; CHECK:       do.add: | 
|  | ; CHECK-NEXT:    [[TOT_NEW:%.*]] = fadd double [[TOT]], [[NEXTVAL]] | 
|  | ; CHECK-NEXT:    br label [[NEXT_ITER]] | 
|  | ; CHECK:       no.add: | 
|  | ; CHECK-NEXT:    br label [[NEXT_ITER]] | 
|  | ; CHECK:       next.iter: | 
|  | ; CHECK-NEXT:    [[TOT_NEXT]] = phi double [ [[TOT]], [[NO_ADD]] ], [ [[TOT_NEW]], [[DO_ADD]] ] | 
|  | ; CHECK-NEXT:    [[I_NEXT]] = add i32 [[I]], 1 | 
|  | ; CHECK-NEXT:    [[AGAIN:%.*]] = icmp ult i32 [[I_NEXT]], 32 | 
|  | ; CHECK-NEXT:    br i1 [[AGAIN]], label [[LOOP]], label [[DONE:%.*]] | 
|  | ; CHECK:       done: | 
|  | ; CHECK-NEXT:    [[TOT_NEXT_LCSSA:%.*]] = phi double [ [[TOT_NEXT]], [[NEXT_ITER]] ] | 
|  | ; CHECK-NEXT:    ret double [[TOT_NEXT_LCSSA]] | 
|  | ; | 
|  | entry: | 
|  | br label %loop | 
|  |  | 
|  | loop: | 
|  | %i = phi i32 [0, %entry], [%i.next, %next.iter] | 
|  | %tot = phi double [0.0, %entry], [%tot.next, %next.iter] | 
|  |  | 
|  | %addr = getelementptr double, ptr %arr, i32 %i | 
|  | %nextval = load double, ptr %addr | 
|  |  | 
|  | %tst = fcmp une double %nextval, 42.0 | 
|  | br i1 %tst, label %do.add, label %no.add | 
|  |  | 
|  | do.add: | 
|  | %tot.new = fadd double %tot, %nextval | 
|  | br label %next.iter | 
|  |  | 
|  | no.add: | 
|  | br label %next.iter | 
|  |  | 
|  | next.iter: | 
|  | %tot.next = phi double [%tot, %no.add], [%tot.new, %do.add] | 
|  | %i.next = add i32 %i, 1 | 
|  | %again = icmp ult i32 %i.next, 32 | 
|  | br i1 %again, label %loop, label %done | 
|  |  | 
|  | done: | 
|  | ret double %tot.next | 
|  | } | 
|  |  | 
|  | define double @sumIfVector(ptr nocapture readonly %arr) { | 
|  | ; SSE-LABEL: @sumIfVector( | 
|  | ; SSE-NEXT:  entry: | 
|  | ; SSE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | 
|  | ; SSE:       vector.ph: | 
|  | ; SSE-NEXT:    br label [[VECTOR_BODY:%.*]] | 
|  | ; SSE:       vector.body: | 
|  | ; SSE-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] | 
|  | ; SSE-NEXT:    [[VEC_PHI:%.*]] = phi <2 x double> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] | 
|  | ; SSE-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x double> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI3:%.*]], [[VECTOR_BODY]] ] | 
|  | ; SSE-NEXT:    [[TMP2:%.*]] = getelementptr double, ptr [[ARR:%.*]], i32 [[INDEX]] | 
|  | ; SSE-NEXT:    [[TMP5:%.*]] = getelementptr double, ptr [[TMP2]], i32 2 | 
|  | ; SSE-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP2]], align 8 | 
|  | ; SSE-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x double>, ptr [[TMP5]], align 8 | 
|  | ; SSE-NEXT:    [[TMP6:%.*]] = fcmp fast une <2 x double> [[WIDE_LOAD]], splat (double 4.200000e+01) | 
|  | ; SSE-NEXT:    [[TMP7:%.*]] = fcmp fast une <2 x double> [[WIDE_LOAD2]], splat (double 4.200000e+01) | 
|  | ; SSE-NEXT:    [[TMP8:%.*]] = fadd fast <2 x double> [[VEC_PHI]], [[WIDE_LOAD]] | 
|  | ; SSE-NEXT:    [[TMP9:%.*]] = fadd fast <2 x double> [[VEC_PHI1]], [[WIDE_LOAD2]] | 
|  | ; SSE-NEXT:    [[PREDPHI]] = select <2 x i1> [[TMP6]], <2 x double> [[TMP8]], <2 x double> [[VEC_PHI]] | 
|  | ; SSE-NEXT:    [[PREDPHI3]] = select <2 x i1> [[TMP7]], <2 x double> [[TMP9]], <2 x double> [[VEC_PHI1]] | 
|  | ; SSE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 | 
|  | ; SSE-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 | 
|  | ; SSE-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] | 
|  | ; SSE:       middle.block: | 
|  | ; SSE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <2 x double> [[PREDPHI3]], [[PREDPHI]] | 
|  | ; SSE-NEXT:    [[TMP11:%.*]] = call fast double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> [[BIN_RDX]]) | 
|  | ; SSE-NEXT:    br label [[DONE:%.*]] | 
|  | ; SSE:       scalar.ph: | 
|  | ; SSE-NEXT:    br label [[LOOP:%.*]] | 
|  | ; SSE:       loop: | 
|  | ; SSE-NEXT:    [[I:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[NEXT_ITER:%.*]] ] | 
|  | ; SSE-NEXT:    [[TOT:%.*]] = phi double [ 0.000000e+00, [[SCALAR_PH]] ], [ [[TOT_NEXT:%.*]], [[NEXT_ITER]] ] | 
|  | ; SSE-NEXT:    [[ADDR:%.*]] = getelementptr double, ptr [[ARR]], i32 [[I]] | 
|  | ; SSE-NEXT:    [[NEXTVAL:%.*]] = load double, ptr [[ADDR]], align 8 | 
|  | ; SSE-NEXT:    [[TST:%.*]] = fcmp fast une double [[NEXTVAL]], 4.200000e+01 | 
|  | ; SSE-NEXT:    br i1 [[TST]], label [[DO_ADD:%.*]], label [[NO_ADD:%.*]] | 
|  | ; SSE:       do.add: | 
|  | ; SSE-NEXT:    [[TOT_NEW:%.*]] = fadd fast double [[TOT]], [[NEXTVAL]] | 
|  | ; SSE-NEXT:    br label [[NEXT_ITER]] | 
|  | ; SSE:       no.add: | 
|  | ; SSE-NEXT:    br label [[NEXT_ITER]] | 
|  | ; SSE:       next.iter: | 
|  | ; SSE-NEXT:    [[TOT_NEXT]] = phi double [ [[TOT]], [[NO_ADD]] ], [ [[TOT_NEW]], [[DO_ADD]] ] | 
|  | ; SSE-NEXT:    [[I_NEXT]] = add i32 [[I]], 1 | 
|  | ; SSE-NEXT:    [[AGAIN:%.*]] = icmp ult i32 [[I_NEXT]], 32 | 
|  | ; SSE-NEXT:    br i1 [[AGAIN]], label [[LOOP]], label [[DONE]], !llvm.loop [[LOOP3:![0-9]+]] | 
|  | ; SSE:       done: | 
|  | ; SSE-NEXT:    [[TOT_NEXT_LCSSA:%.*]] = phi double [ [[TOT_NEXT]], [[NEXT_ITER]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] | 
|  | ; SSE-NEXT:    ret double [[TOT_NEXT_LCSSA]] | 
|  | ; | 
|  | ; AVX-LABEL: @sumIfVector( | 
|  | ; AVX-NEXT:  entry: | 
|  | ; AVX-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] | 
|  | ; AVX:       vector.ph: | 
|  | ; AVX-NEXT:    br label [[VECTOR_BODY:%.*]] | 
|  | ; AVX:       vector.body: | 
|  | ; AVX-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] | 
|  | ; AVX-NEXT:    [[VEC_PHI:%.*]] = phi <4 x double> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] | 
|  | ; AVX-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x double> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI7:%.*]], [[VECTOR_BODY]] ] | 
|  | ; AVX-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x double> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI8:%.*]], [[VECTOR_BODY]] ] | 
|  | ; AVX-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x double> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI9:%.*]], [[VECTOR_BODY]] ] | 
|  | ; AVX-NEXT:    [[TMP4:%.*]] = getelementptr double, ptr [[ARR:%.*]], i32 [[INDEX]] | 
|  | ; AVX-NEXT:    [[TMP9:%.*]] = getelementptr double, ptr [[TMP4]], i32 4 | 
|  | ; AVX-NEXT:    [[TMP10:%.*]] = getelementptr double, ptr [[TMP4]], i32 8 | 
|  | ; AVX-NEXT:    [[TMP11:%.*]] = getelementptr double, ptr [[TMP4]], i32 12 | 
|  | ; AVX-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP4]], align 8 | 
|  | ; AVX-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x double>, ptr [[TMP9]], align 8 | 
|  | ; AVX-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x double>, ptr [[TMP10]], align 8 | 
|  | ; AVX-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x double>, ptr [[TMP11]], align 8 | 
|  | ; AVX-NEXT:    [[TMP12:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD]], splat (double 4.200000e+01) | 
|  | ; AVX-NEXT:    [[TMP13:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD4]], splat (double 4.200000e+01) | 
|  | ; AVX-NEXT:    [[TMP14:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD5]], splat (double 4.200000e+01) | 
|  | ; AVX-NEXT:    [[TMP15:%.*]] = fcmp fast une <4 x double> [[WIDE_LOAD6]], splat (double 4.200000e+01) | 
|  | ; AVX-NEXT:    [[TMP16:%.*]] = fadd fast <4 x double> [[VEC_PHI]], [[WIDE_LOAD]] | 
|  | ; AVX-NEXT:    [[TMP17:%.*]] = fadd fast <4 x double> [[VEC_PHI1]], [[WIDE_LOAD4]] | 
|  | ; AVX-NEXT:    [[TMP18:%.*]] = fadd fast <4 x double> [[VEC_PHI2]], [[WIDE_LOAD5]] | 
|  | ; AVX-NEXT:    [[TMP19:%.*]] = fadd fast <4 x double> [[VEC_PHI3]], [[WIDE_LOAD6]] | 
|  | ; AVX-NEXT:    [[PREDPHI]] = select <4 x i1> [[TMP12]], <4 x double> [[TMP16]], <4 x double> [[VEC_PHI]] | 
|  | ; AVX-NEXT:    [[PREDPHI7]] = select <4 x i1> [[TMP13]], <4 x double> [[TMP17]], <4 x double> [[VEC_PHI1]] | 
|  | ; AVX-NEXT:    [[PREDPHI8]] = select <4 x i1> [[TMP14]], <4 x double> [[TMP18]], <4 x double> [[VEC_PHI2]] | 
|  | ; AVX-NEXT:    [[PREDPHI9]] = select <4 x i1> [[TMP15]], <4 x double> [[TMP19]], <4 x double> [[VEC_PHI3]] | 
|  | ; AVX-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 | 
|  | ; AVX-NEXT:    [[TMP20:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 | 
|  | ; AVX-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] | 
|  | ; AVX:       middle.block: | 
|  | ; AVX-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x double> [[PREDPHI7]], [[PREDPHI]] | 
|  | ; AVX-NEXT:    [[BIN_RDX10:%.*]] = fadd fast <4 x double> [[PREDPHI8]], [[BIN_RDX]] | 
|  | ; AVX-NEXT:    [[BIN_RDX11:%.*]] = fadd fast <4 x double> [[PREDPHI9]], [[BIN_RDX10]] | 
|  | ; AVX-NEXT:    [[TMP21:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[BIN_RDX11]]) | 
|  | ; AVX-NEXT:    br label [[DONE:%.*]] | 
|  | ; AVX:       scalar.ph: | 
|  | ; AVX-NEXT:    br label [[LOOP:%.*]] | 
|  | ; AVX:       loop: | 
|  | ; AVX-NEXT:    [[I:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[NEXT_ITER:%.*]] ] | 
|  | ; AVX-NEXT:    [[TOT:%.*]] = phi double [ 0.000000e+00, [[SCALAR_PH]] ], [ [[TOT_NEXT:%.*]], [[NEXT_ITER]] ] | 
|  | ; AVX-NEXT:    [[ADDR:%.*]] = getelementptr double, ptr [[ARR]], i32 [[I]] | 
|  | ; AVX-NEXT:    [[NEXTVAL:%.*]] = load double, ptr [[ADDR]], align 8 | 
|  | ; AVX-NEXT:    [[TST:%.*]] = fcmp fast une double [[NEXTVAL]], 4.200000e+01 | 
|  | ; AVX-NEXT:    br i1 [[TST]], label [[DO_ADD:%.*]], label [[NO_ADD:%.*]] | 
|  | ; AVX:       do.add: | 
|  | ; AVX-NEXT:    [[TOT_NEW:%.*]] = fadd fast double [[TOT]], [[NEXTVAL]] | 
|  | ; AVX-NEXT:    br label [[NEXT_ITER]] | 
|  | ; AVX:       no.add: | 
|  | ; AVX-NEXT:    br label [[NEXT_ITER]] | 
|  | ; AVX:       next.iter: | 
|  | ; AVX-NEXT:    [[TOT_NEXT]] = phi double [ [[TOT]], [[NO_ADD]] ], [ [[TOT_NEW]], [[DO_ADD]] ] | 
|  | ; AVX-NEXT:    [[I_NEXT]] = add i32 [[I]], 1 | 
|  | ; AVX-NEXT:    [[AGAIN:%.*]] = icmp ult i32 [[I_NEXT]], 32 | 
|  | ; AVX-NEXT:    br i1 [[AGAIN]], label [[LOOP]], label [[DONE]], !llvm.loop [[LOOP3:![0-9]+]] | 
|  | ; AVX:       done: | 
|  | ; AVX-NEXT:    [[TOT_NEXT_LCSSA:%.*]] = phi double [ [[TOT_NEXT]], [[NEXT_ITER]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ] | 
|  | ; AVX-NEXT:    ret double [[TOT_NEXT_LCSSA]] | 
|  | ; | 
|  | entry: | 
|  | br label %loop | 
|  |  | 
|  | loop: | 
|  | %i = phi i32 [0, %entry], [%i.next, %next.iter] | 
|  | %tot = phi double [0.0, %entry], [%tot.next, %next.iter] | 
|  |  | 
|  | %addr = getelementptr double, ptr %arr, i32 %i | 
|  | %nextval = load double, ptr %addr | 
|  |  | 
|  | %tst = fcmp fast une double %nextval, 42.0 | 
|  | br i1 %tst, label %do.add, label %no.add | 
|  |  | 
|  | do.add: | 
|  | %tot.new = fadd fast double %tot, %nextval | 
|  | br label %next.iter | 
|  |  | 
|  | no.add: | 
|  | br label %next.iter | 
|  |  | 
|  | next.iter: | 
|  | %tot.next = phi double [%tot, %no.add], [%tot.new, %do.add] | 
|  | %i.next = add i32 %i, 1 | 
|  | %again = icmp ult i32 %i.next, 32 | 
|  | br i1 %again, label %loop, label %done | 
|  |  | 
|  | done: | 
|  | ret double %tot.next | 
|  | } | 
|  |  |