test/Transforms/SLPVectorizer/X86/horizontal-list.ll - llvm - Git at Google

 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 | FileCheck %s

 @n = external local_unnamed_addr global i32, align 4
 @arr = common local_unnamed_addr global [20 x float] zeroinitializer, align 16
 @arr1 = common local_unnamed_addr global [20 x float] zeroinitializer, align 16
 @res = external local_unnamed_addr global float, align 4

 define float @baz() {
 ; CHECK-LABEL: @baz(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @n, align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
 ; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
 ; CHECK-NEXT:    [[TMP2:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
 ; CHECK-NEXT:    [[MUL4:%.*]] = fmul fast float [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[MUL4]], [[CONV]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
 ; CHECK-NEXT:    [[MUL4_1:%.*]] = fmul fast float [[TMP4]], [[TMP3]]
 ; CHECK-NEXT:    [[ADD_1:%.*]] = fadd fast float [[MUL4_1]], [[ADD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2) to <2 x float>*), align 8
 ; CHECK-NEXT:    [[TMP6:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2) to <2 x float>*), align 8
 ; CHECK-NEXT:    [[TMP7:%.*]] = fmul fast <2 x float> [[TMP6]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0
 ; CHECK-NEXT:    [[ADD_2:%.*]] = fadd fast float [[TMP8]], [[ADD_1]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[TMP7]], i32 1
 ; CHECK-NEXT:    [[ADD_3:%.*]] = fadd fast float [[TMP9]], [[ADD_2]]
 ; CHECK-NEXT:    [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV]]
 ; CHECK-NEXT:    [[ADD19:%.*]] = fadd fast float [[MUL4]], [[ADD7]]
 ; CHECK-NEXT:    [[ADD19_1:%.*]] = fadd fast float [[MUL4_1]], [[ADD19]]
 ; CHECK-NEXT:    [[ADD19_2:%.*]] = fadd fast float [[TMP8]], [[ADD19_1]]
 ; CHECK-NEXT:    [[ADD19_3:%.*]] = fadd fast float [[TMP9]], [[ADD19_2]]
 ; CHECK-NEXT:    store float [[ADD19_3]], float* @res, align 4
 ; CHECK-NEXT:    ret float [[ADD19_3]]
 ;
 entry:
   %0 = load i32, i32* @n, align 4
   %mul = mul nsw i32 %0, 3
   %conv = sitofp i32 %mul to float
   %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
   %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
   %mul4 = fmul fast float %2, %1
   %add = fadd fast float %mul4, %conv
   %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
   %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
   %mul4.1 = fmul fast float %4, %3
   %add.1 = fadd fast float %mul4.1, %add
   %5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
   %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
   %mul4.2 = fmul fast float %6, %5
   %add.2 = fadd fast float %mul4.2, %add.1
   %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
   %8 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
   %mul4.3 = fmul fast float %8, %7
   %add.3 = fadd fast float %mul4.3, %add.2
   %add7 = fadd fast float %add.3, %conv
   %add19 = fadd fast float %mul4, %add7
   %add19.1 = fadd fast float %mul4.1, %add19
   %add19.2 = fadd fast float %mul4.2, %add19.1
   %add19.3 = fadd fast float %mul4.3, %add19.2
   store float %add19.3, float* @res, align 4
   ret float %add19.3
 }

 define float @bazz() {
 ; CHECK-LABEL: @bazz(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @n, align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[MUL]] to float
 ; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
 ; CHECK-NEXT:    [[TMP2:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
 ; CHECK-NEXT:    [[MUL4:%.*]] = fmul fast float [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[MUL4]], [[CONV]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
 ; CHECK-NEXT:    [[MUL4_1:%.*]] = fmul fast float [[TMP4]], [[TMP3]]
 ; CHECK-NEXT:    [[ADD_1:%.*]] = fadd fast float [[MUL4_1]], [[ADD]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
 ; CHECK-NEXT:    [[TMP6:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
 ; CHECK-NEXT:    [[MUL4_2:%.*]] = fmul fast float [[TMP6]], [[TMP5]]
 ; CHECK-NEXT:    [[ADD_2:%.*]] = fadd fast float [[MUL4_2]], [[ADD_1]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
 ; CHECK-NEXT:    [[MUL4_3:%.*]] = fmul fast float [[TMP8]], [[TMP7]]
 ; CHECK-NEXT:    [[ADD_3:%.*]] = fadd fast float [[MUL4_3]], [[ADD_2]]
 ; CHECK-NEXT:    [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
 ; CHECK-NEXT:    [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
 ; CHECK-NEXT:    [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 4), align 16
 ; CHECK-NEXT:    [[TMP10:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 4), align 16
 ; CHECK-NEXT:    [[MUL18:%.*]] = fmul fast float [[TMP10]], [[TMP9]]
 ; CHECK-NEXT:    [[ADD19:%.*]] = fadd fast float [[MUL18]], [[ADD7]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 5), align 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 5), align 4
 ; CHECK-NEXT:    [[MUL18_1:%.*]] = fmul fast float [[TMP12]], [[TMP11]]
 ; CHECK-NEXT:    [[ADD19_1:%.*]] = fadd fast float [[MUL18_1]], [[ADD19]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 6) to <2 x float>*), align 8
 ; CHECK-NEXT:    [[TMP14:%.*]] = load <2 x float>, <2 x float>* bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 6) to <2 x float>*), align 8
 ; CHECK-NEXT:    [[TMP15:%.*]] = fmul fast <2 x float> [[TMP14]], [[TMP13]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x float> [[TMP15]], i32 0
 ; CHECK-NEXT:    [[ADD19_2:%.*]] = fadd fast float [[TMP16]], [[ADD19_1]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <2 x float> [[TMP15]], i32 1
 ; CHECK-NEXT:    [[ADD19_3:%.*]] = fadd fast float [[TMP17]], [[ADD19_2]]
 ; CHECK-NEXT:    store float [[ADD19_3]], float* @res, align 4
 ; CHECK-NEXT:    ret float [[ADD19_3]]
 ;
 entry:
   %0 = load i32, i32* @n, align 4
   %mul = mul nsw i32 %0, 3
   %conv = sitofp i32 %mul to float
   %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
   %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
   %mul4 = fmul fast float %2, %1
   %add = fadd fast float %mul4, %conv
   %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
   %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
   %mul4.1 = fmul fast float %4, %3
   %add.1 = fadd fast float %mul4.1, %add
   %5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
   %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
   %mul4.2 = fmul fast float %6, %5
   %add.2 = fadd fast float %mul4.2, %add.1
   %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
   %8 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
   %mul4.3 = fmul fast float %8, %7
   %add.3 = fadd fast float %mul4.3, %add.2
   %mul5 = shl nsw i32 %0, 2
   %conv6 = sitofp i32 %mul5 to float
   %add7 = fadd fast float %add.3, %conv6
   %9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 4), align 16
   %10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 4), align 16
   %mul18 = fmul fast float %10, %9
   %add19 = fadd fast float %mul18, %add7
   %11 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 5), align 4
   %12 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 5), align 4
   %mul18.1 = fmul fast float %12, %11
   %add19.1 = fadd fast float %mul18.1, %add19
   %13 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 6), align 8
   %14 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 6), align 8
   %mul18.2 = fmul fast float %14, %13
   %add19.2 = fadd fast float %mul18.2, %add19.1
   %15 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 7), align 4
   %16 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 7), align 4
   %mul18.3 = fmul fast float %16, %15
   %add19.3 = fadd fast float %mul18.3, %add19.2
   store float %add19.3, float* @res, align 4
   ret float %add19.3
 }

 define float @bazzz() {
 ; CHECK-LABEL: @bazzz(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @n, align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
 ; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
 ; CHECK-NEXT:    [[TMP2:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
 ; CHECK-NEXT:    [[MUL_1:%.*]] = fmul fast float [[TMP4]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fadd fast float [[MUL_1]], [[MUL]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
 ; CHECK-NEXT:    [[TMP7:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
 ; CHECK-NEXT:    [[MUL_2:%.*]] = fmul fast float [[TMP7]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = fadd fast float [[MUL_2]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
 ; CHECK-NEXT:    [[MUL_3:%.*]] = fmul fast float [[TMP10]], [[TMP9]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd fast float [[MUL_3]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = fmul fast float [[CONV]], [[TMP11]]
 ; CHECK-NEXT:    store float [[TMP12]], float* @res, align 4
 ; CHECK-NEXT:    ret float [[TMP12]]
 ;
 entry:
   %0 = load i32, i32* @n, align 4
   %conv = sitofp i32 %0 to float
   %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
   %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
   %mul = fmul fast float %2, %1
   %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
   %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
   %mul.1 = fmul fast float %4, %3
   %5 = fadd fast float %mul.1, %mul
   %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
   %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
   %mul.2 = fmul fast float %7, %6
   %8 = fadd fast float %mul.2, %5
   %9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
   %10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
   %mul.3 = fmul fast float %10, %9
   %11 = fadd fast float %mul.3, %8
   %12 = fmul fast float %conv, %11
   store float %12, float* @res, align 4
   ret float %12
 }

 define i32 @foo() {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @n, align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
 ; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
 ; CHECK-NEXT:    [[TMP2:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[TMP2]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
 ; CHECK-NEXT:    [[MUL_1:%.*]] = fmul fast float [[TMP4]], [[TMP3]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = fadd fast float [[MUL_1]], [[MUL]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
 ; CHECK-NEXT:    [[TMP7:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
 ; CHECK-NEXT:    [[MUL_2:%.*]] = fmul fast float [[TMP7]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = fadd fast float [[MUL_2]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
 ; CHECK-NEXT:    [[TMP10:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
 ; CHECK-NEXT:    [[MUL_3:%.*]] = fmul fast float [[TMP10]], [[TMP9]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd fast float [[MUL_3]], [[TMP8]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = fmul fast float [[CONV]], [[TMP11]]
 ; CHECK-NEXT:    [[CONV4:%.*]] = fptosi float [[TMP12]] to i32
 ; CHECK-NEXT:    store i32 [[CONV4]], i32* @n, align 4
 ; CHECK-NEXT:    ret i32 [[CONV4]]
 ;
 entry:
   %0 = load i32, i32* @n, align 4
   %conv = sitofp i32 %0 to float
   %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
   %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
   %mul = fmul fast float %2, %1
   %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
   %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
   %mul.1 = fmul fast float %4, %3
   %5 = fadd fast float %mul.1, %mul
   %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
   %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
   %mul.2 = fmul fast float %7, %6
   %8 = fadd fast float %mul.2, %5
   %9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
   %10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
   %mul.3 = fmul fast float %10, %9
   %11 = fadd fast float %mul.3, %8
   %12 = fmul fast float %conv, %11
   %conv4 = fptosi float %12 to i32
   store i32 %conv4, i32* @n, align 4
   ret i32 %conv4
 }

 define float @bar() {
 ; CHECK-LABEL: @bar(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr to <2 x float>*), align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, <2 x float>* bitcast ([20 x float]* @arr1 to <2 x float>*), align 16
 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
 ; CHECK-NEXT:    [[CMP4:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
 ; CHECK-NEXT:    [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float [[TMP3]], float [[TMP4]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
 ; CHECK-NEXT:    [[TMP6:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
 ; CHECK-NEXT:    [[MUL3_1:%.*]] = fmul fast float [[TMP6]], [[TMP5]]
 ; CHECK-NEXT:    [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], [[MUL3_1]]
 ; CHECK-NEXT:    [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float [[MUL3_1]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
 ; CHECK-NEXT:    [[TMP8:%.*]] = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
 ; CHECK-NEXT:    [[MUL3_2:%.*]] = fmul fast float [[TMP8]], [[TMP7]]
 ; CHECK-NEXT:    [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], [[MUL3_2]]
 ; CHECK-NEXT:    [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float [[MUL3_2]]
 ; CHECK-NEXT:    store float [[MAX_0_MUL3_2]], float* @res, align 4
 ; CHECK-NEXT:    ret float [[MAX_0_MUL3_2]]
 ;
 entry:
   %0 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
   %1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
   %mul = fmul fast float %1, %0
   %2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
   %3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
   %mul3 = fmul fast float %3, %2
   %cmp4 = fcmp fast ogt float %mul, %mul3
   %max.0.mul3 = select i1 %cmp4, float %mul, float %mul3
   %4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
   %5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
   %mul3.1 = fmul fast float %5, %4
   %cmp4.1 = fcmp fast ogt float %max.0.mul3, %mul3.1
   %max.0.mul3.1 = select i1 %cmp4.1, float %max.0.mul3, float %mul3.1
   %6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
   %7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
   %mul3.2 = fmul fast float %7, %6
   %cmp4.2 = fcmp fast ogt float %max.0.mul3.1, %mul3.2
   %max.0.mul3.2 = select i1 %cmp4.2, float %max.0.mul3.1, float %mul3.2
   store float %max.0.mul3.2, float* @res, align 4
   ret float %max.0.mul3.2
 }
	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 \| FileCheck %s

	@n = external local_unnamed_addr global i32, align 4
	@arr = common local_unnamed_addr global [20 x float] zeroinitializer, align 16
	@arr1 = common local_unnamed_addr global [20 x float] zeroinitializer, align 16
	@res = external local_unnamed_addr global float, align 4

	define float @baz() {
	; CHECK-LABEL: @baz(
	; CHECK-NEXT: entry:
	; CHECK-NEXT: [[TMP0:%.]] = load i32, i32 @n, align 4
	; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
	; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
	; CHECK-NEXT: [[TMP1:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
	; CHECK-NEXT: [[TMP2:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
	; CHECK-NEXT: [[MUL4:%.*]] = fmul fast float [[TMP2]], [[TMP1]]
	; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[MUL4]], [[CONV]]
	; CHECK-NEXT: [[TMP3:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
	; CHECK-NEXT: [[TMP4:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
	; CHECK-NEXT: [[MUL4_1:%.*]] = fmul fast float [[TMP4]], [[TMP3]]
	; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[MUL4_1]], [[ADD]]
	; CHECK-NEXT: [[TMP5:%.]] = load <2 x float>, <2 x float> bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2) to <2 x float>*), align 8
	; CHECK-NEXT: [[TMP6:%.]] = load <2 x float>, <2 x float> bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2) to <2 x float>*), align 8
	; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x float> [[TMP6]], [[TMP5]]
	; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP7]], i32 0
	; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[TMP8]], [[ADD_1]]
	; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP7]], i32 1
	; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[TMP9]], [[ADD_2]]
	; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV]]
	; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float [[MUL4]], [[ADD7]]
	; CHECK-NEXT: [[ADD19_1:%.*]] = fadd fast float [[MUL4_1]], [[ADD19]]
	; CHECK-NEXT: [[ADD19_2:%.*]] = fadd fast float [[TMP8]], [[ADD19_1]]
	; CHECK-NEXT: [[ADD19_3:%.*]] = fadd fast float [[TMP9]], [[ADD19_2]]
	; CHECK-NEXT: store float [[ADD19_3]], float* @res, align 4
	; CHECK-NEXT: ret float [[ADD19_3]]
	;
	entry:
	%0 = load i32, i32* @n, align 4
	%mul = mul nsw i32 %0, 3
	%conv = sitofp i32 %mul to float
	%1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
	%2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
	%mul4 = fmul fast float %2, %1
	%add = fadd fast float %mul4, %conv
	%3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
	%4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
	%mul4.1 = fmul fast float %4, %3
	%add.1 = fadd fast float %mul4.1, %add
	%5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
	%6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
	%mul4.2 = fmul fast float %6, %5
	%add.2 = fadd fast float %mul4.2, %add.1
	%7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
	%8 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
	%mul4.3 = fmul fast float %8, %7
	%add.3 = fadd fast float %mul4.3, %add.2
	%add7 = fadd fast float %add.3, %conv
	%add19 = fadd fast float %mul4, %add7
	%add19.1 = fadd fast float %mul4.1, %add19
	%add19.2 = fadd fast float %mul4.2, %add19.1
	%add19.3 = fadd fast float %mul4.3, %add19.2
	store float %add19.3, float* @res, align 4
	ret float %add19.3
	}

	define float @bazz() {
	; CHECK-LABEL: @bazz(
	; CHECK-NEXT: entry:
	; CHECK-NEXT: [[TMP0:%.]] = load i32, i32 @n, align 4
	; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP0]], 3
	; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[MUL]] to float
	; CHECK-NEXT: [[TMP1:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
	; CHECK-NEXT: [[TMP2:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
	; CHECK-NEXT: [[MUL4:%.*]] = fmul fast float [[TMP2]], [[TMP1]]
	; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[MUL4]], [[CONV]]
	; CHECK-NEXT: [[TMP3:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
	; CHECK-NEXT: [[TMP4:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
	; CHECK-NEXT: [[MUL4_1:%.*]] = fmul fast float [[TMP4]], [[TMP3]]
	; CHECK-NEXT: [[ADD_1:%.*]] = fadd fast float [[MUL4_1]], [[ADD]]
	; CHECK-NEXT: [[TMP5:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
	; CHECK-NEXT: [[TMP6:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
	; CHECK-NEXT: [[MUL4_2:%.*]] = fmul fast float [[TMP6]], [[TMP5]]
	; CHECK-NEXT: [[ADD_2:%.*]] = fadd fast float [[MUL4_2]], [[ADD_1]]
	; CHECK-NEXT: [[TMP7:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
	; CHECK-NEXT: [[TMP8:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
	; CHECK-NEXT: [[MUL4_3:%.*]] = fmul fast float [[TMP8]], [[TMP7]]
	; CHECK-NEXT: [[ADD_3:%.*]] = fadd fast float [[MUL4_3]], [[ADD_2]]
	; CHECK-NEXT: [[MUL5:%.*]] = shl nsw i32 [[TMP0]], 2
	; CHECK-NEXT: [[CONV6:%.*]] = sitofp i32 [[MUL5]] to float
	; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD_3]], [[CONV6]]
	; CHECK-NEXT: [[TMP9:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 4), align 16
	; CHECK-NEXT: [[TMP10:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 4), align 16
	; CHECK-NEXT: [[MUL18:%.*]] = fmul fast float [[TMP10]], [[TMP9]]
	; CHECK-NEXT: [[ADD19:%.*]] = fadd fast float [[MUL18]], [[ADD7]]
	; CHECK-NEXT: [[TMP11:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 5), align 4
	; CHECK-NEXT: [[TMP12:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 5), align 4
	; CHECK-NEXT: [[MUL18_1:%.*]] = fmul fast float [[TMP12]], [[TMP11]]
	; CHECK-NEXT: [[ADD19_1:%.*]] = fadd fast float [[MUL18_1]], [[ADD19]]
	; CHECK-NEXT: [[TMP13:%.]] = load <2 x float>, <2 x float> bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 6) to <2 x float>*), align 8
	; CHECK-NEXT: [[TMP14:%.]] = load <2 x float>, <2 x float> bitcast (float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 6) to <2 x float>*), align 8
	; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x float> [[TMP14]], [[TMP13]]
	; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP15]], i32 0
	; CHECK-NEXT: [[ADD19_2:%.*]] = fadd fast float [[TMP16]], [[ADD19_1]]
	; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[TMP15]], i32 1
	; CHECK-NEXT: [[ADD19_3:%.*]] = fadd fast float [[TMP17]], [[ADD19_2]]
	; CHECK-NEXT: store float [[ADD19_3]], float* @res, align 4
	; CHECK-NEXT: ret float [[ADD19_3]]
	;
	entry:
	%0 = load i32, i32* @n, align 4
	%mul = mul nsw i32 %0, 3
	%conv = sitofp i32 %mul to float
	%1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
	%2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
	%mul4 = fmul fast float %2, %1
	%add = fadd fast float %mul4, %conv
	%3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
	%4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
	%mul4.1 = fmul fast float %4, %3
	%add.1 = fadd fast float %mul4.1, %add
	%5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
	%6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
	%mul4.2 = fmul fast float %6, %5
	%add.2 = fadd fast float %mul4.2, %add.1
	%7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
	%8 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
	%mul4.3 = fmul fast float %8, %7
	%add.3 = fadd fast float %mul4.3, %add.2
	%mul5 = shl nsw i32 %0, 2
	%conv6 = sitofp i32 %mul5 to float
	%add7 = fadd fast float %add.3, %conv6
	%9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 4), align 16
	%10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 4), align 16
	%mul18 = fmul fast float %10, %9
	%add19 = fadd fast float %mul18, %add7
	%11 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 5), align 4
	%12 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 5), align 4
	%mul18.1 = fmul fast float %12, %11
	%add19.1 = fadd fast float %mul18.1, %add19
	%13 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 6), align 8
	%14 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 6), align 8
	%mul18.2 = fmul fast float %14, %13
	%add19.2 = fadd fast float %mul18.2, %add19.1
	%15 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 7), align 4
	%16 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 7), align 4
	%mul18.3 = fmul fast float %16, %15
	%add19.3 = fadd fast float %mul18.3, %add19.2
	store float %add19.3, float* @res, align 4
	ret float %add19.3
	}

	define float @bazzz() {
	; CHECK-LABEL: @bazzz(
	; CHECK-NEXT: entry:
	; CHECK-NEXT: [[TMP0:%.]] = load i32, i32 @n, align 4
	; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
	; CHECK-NEXT: [[TMP1:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
	; CHECK-NEXT: [[TMP2:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
	; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[TMP2]], [[TMP1]]
	; CHECK-NEXT: [[TMP3:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
	; CHECK-NEXT: [[TMP4:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
	; CHECK-NEXT: [[MUL_1:%.*]] = fmul fast float [[TMP4]], [[TMP3]]
	; CHECK-NEXT: [[TMP5:%.*]] = fadd fast float [[MUL_1]], [[MUL]]
	; CHECK-NEXT: [[TMP6:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
	; CHECK-NEXT: [[TMP7:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
	; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast float [[TMP7]], [[TMP6]]
	; CHECK-NEXT: [[TMP8:%.*]] = fadd fast float [[MUL_2]], [[TMP5]]
	; CHECK-NEXT: [[TMP9:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
	; CHECK-NEXT: [[TMP10:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
	; CHECK-NEXT: [[MUL_3:%.*]] = fmul fast float [[TMP10]], [[TMP9]]
	; CHECK-NEXT: [[TMP11:%.*]] = fadd fast float [[MUL_3]], [[TMP8]]
	; CHECK-NEXT: [[TMP12:%.*]] = fmul fast float [[CONV]], [[TMP11]]
	; CHECK-NEXT: store float [[TMP12]], float* @res, align 4
	; CHECK-NEXT: ret float [[TMP12]]
	;
	entry:
	%0 = load i32, i32* @n, align 4
	%conv = sitofp i32 %0 to float
	%1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
	%2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
	%mul = fmul fast float %2, %1
	%3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
	%4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
	%mul.1 = fmul fast float %4, %3
	%5 = fadd fast float %mul.1, %mul
	%6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
	%7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
	%mul.2 = fmul fast float %7, %6
	%8 = fadd fast float %mul.2, %5
	%9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
	%10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
	%mul.3 = fmul fast float %10, %9
	%11 = fadd fast float %mul.3, %8
	%12 = fmul fast float %conv, %11
	store float %12, float* @res, align 4
	ret float %12
	}

	define i32 @foo() {
	; CHECK-LABEL: @foo(
	; CHECK-NEXT: entry:
	; CHECK-NEXT: [[TMP0:%.]] = load i32, i32 @n, align 4
	; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
	; CHECK-NEXT: [[TMP1:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
	; CHECK-NEXT: [[TMP2:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
	; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[TMP2]], [[TMP1]]
	; CHECK-NEXT: [[TMP3:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
	; CHECK-NEXT: [[TMP4:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
	; CHECK-NEXT: [[MUL_1:%.*]] = fmul fast float [[TMP4]], [[TMP3]]
	; CHECK-NEXT: [[TMP5:%.*]] = fadd fast float [[MUL_1]], [[MUL]]
	; CHECK-NEXT: [[TMP6:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
	; CHECK-NEXT: [[TMP7:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
	; CHECK-NEXT: [[MUL_2:%.*]] = fmul fast float [[TMP7]], [[TMP6]]
	; CHECK-NEXT: [[TMP8:%.*]] = fadd fast float [[MUL_2]], [[TMP5]]
	; CHECK-NEXT: [[TMP9:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
	; CHECK-NEXT: [[TMP10:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
	; CHECK-NEXT: [[MUL_3:%.*]] = fmul fast float [[TMP10]], [[TMP9]]
	; CHECK-NEXT: [[TMP11:%.*]] = fadd fast float [[MUL_3]], [[TMP8]]
	; CHECK-NEXT: [[TMP12:%.*]] = fmul fast float [[CONV]], [[TMP11]]
	; CHECK-NEXT: [[CONV4:%.*]] = fptosi float [[TMP12]] to i32
	; CHECK-NEXT: store i32 [[CONV4]], i32* @n, align 4
	; CHECK-NEXT: ret i32 [[CONV4]]
	;
	entry:
	%0 = load i32, i32* @n, align 4
	%conv = sitofp i32 %0 to float
	%1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
	%2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
	%mul = fmul fast float %2, %1
	%3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
	%4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
	%mul.1 = fmul fast float %4, %3
	%5 = fadd fast float %mul.1, %mul
	%6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
	%7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
	%mul.2 = fmul fast float %7, %6
	%8 = fadd fast float %mul.2, %5
	%9 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
	%10 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
	%mul.3 = fmul fast float %10, %9
	%11 = fadd fast float %mul.3, %8
	%12 = fmul fast float %conv, %11
	%conv4 = fptosi float %12 to i32
	store i32 %conv4, i32* @n, align 4
	ret i32 %conv4
	}

	define float @bar() {
	; CHECK-LABEL: @bar(
	; CHECK-NEXT: entry:
	; CHECK-NEXT: [[TMP0:%.]] = load <2 x float>, <2 x float> bitcast ([20 x float]* @arr to <2 x float>*), align 16
	; CHECK-NEXT: [[TMP1:%.]] = load <2 x float>, <2 x float> bitcast ([20 x float]* @arr1 to <2 x float>*), align 16
	; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP0]]
	; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
	; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
	; CHECK-NEXT: [[CMP4:%.*]] = fcmp fast ogt float [[TMP3]], [[TMP4]]
	; CHECK-NEXT: [[MAX_0_MUL3:%.*]] = select i1 [[CMP4]], float [[TMP3]], float [[TMP4]]
	; CHECK-NEXT: [[TMP5:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
	; CHECK-NEXT: [[TMP6:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
	; CHECK-NEXT: [[MUL3_1:%.*]] = fmul fast float [[TMP6]], [[TMP5]]
	; CHECK-NEXT: [[CMP4_1:%.*]] = fcmp fast ogt float [[MAX_0_MUL3]], [[MUL3_1]]
	; CHECK-NEXT: [[MAX_0_MUL3_1:%.*]] = select i1 [[CMP4_1]], float [[MAX_0_MUL3]], float [[MUL3_1]]
	; CHECK-NEXT: [[TMP7:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
	; CHECK-NEXT: [[TMP8:%.]] = load float, float getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
	; CHECK-NEXT: [[MUL3_2:%.*]] = fmul fast float [[TMP8]], [[TMP7]]
	; CHECK-NEXT: [[CMP4_2:%.*]] = fcmp fast ogt float [[MAX_0_MUL3_1]], [[MUL3_2]]
	; CHECK-NEXT: [[MAX_0_MUL3_2:%.*]] = select i1 [[CMP4_2]], float [[MAX_0_MUL3_1]], float [[MUL3_2]]
	; CHECK-NEXT: store float [[MAX_0_MUL3_2]], float* @res, align 4
	; CHECK-NEXT: ret float [[MAX_0_MUL3_2]]
	;
	entry:
	%0 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 0), align 16
	%1 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 0), align 16
	%mul = fmul fast float %1, %0
	%2 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 1), align 4
	%3 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 1), align 4
	%mul3 = fmul fast float %3, %2
	%cmp4 = fcmp fast ogt float %mul, %mul3
	%max.0.mul3 = select i1 %cmp4, float %mul, float %mul3
	%4 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 2), align 8
	%5 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 2), align 8
	%mul3.1 = fmul fast float %5, %4
	%cmp4.1 = fcmp fast ogt float %max.0.mul3, %mul3.1
	%max.0.mul3.1 = select i1 %cmp4.1, float %max.0.mul3, float %mul3.1
	%6 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr, i64 0, i64 3), align 4
	%7 = load float, float* getelementptr inbounds ([20 x float], [20 x float]* @arr1, i64 0, i64 3), align 4
	%mul3.2 = fmul fast float %7, %6
	%cmp4.2 = fcmp fast ogt float %max.0.mul3.1, %mul3.2
	%max.0.mul3.2 = select i1 %cmp4.2, float %max.0.mul3.1, float %mul3.2
	store float %max.0.mul3.2, float* @res, align 4
	ret float %max.0.mul3.2
	}